### Set up libraries

In [2]:
import pandas as pd
import random
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

### Fix random seed value

In [6]:
CFG = {
    'EPOCHS': 50,
    'LEARNING_RATE':5e-4,
    'BATCH_SIZE':128,
    'SEED':37
}

In [7]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(CFG['SEED']) # Seed 고정

### Data Preprocessing

- Created separate train and test datasets.
- Utilized Y_Class for classification, and excluded Y_Quality from the analysis.
- Removed Product Id and TimeStamp from the data, as they were not relevant for the analysis.
- For handling remaining missing values, filling them with zero values yielded the best performance based on experimentation.

In [8]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

In [9]:
train_x = train_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP', 'Y_Class', 'Y_Quality'])
train_y = train_df['Y_Class']

test_x = test_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP'])

In [10]:
train_x = train_x.fillna(0)
test_x = test_x.fillna(0)

In [None]:
# convert labels into numbers using qualitative to quantitative auto encoder
qual_col = ['LINE', 'PRODUCT_CODE']

for i in qual_col:
    le = LabelEncoder()
    le = le.fit(train_x[i])
    train_x[i] = le.transform(train_x[i])
    
    for label in np.unique(test_x[i]): 
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test_x[i] = le.transform(test_x[i]) 
print('Done.')

Done.


### Model 

In [12]:
xgb = XGBClassifier()
lgb = LGBMClassifier()
gbm = GradientBoostingClassifier()
cat = CatBoostClassifier()
lreg = LogisticRegression()

In [13]:
from sklearn.ensemble import VotingClassifier

In [14]:
eclf1 = VotingClassifier(estimators=[('xgb', xgb), ('lgb', lgb), ('gbm', gbm), ('cat', cat), ('lreg', lreg)], voting='soft')

In [15]:
eclf1 = eclf1.fit(train_x, train_y)

Learning rate set to 0.077079
0:	learn: 1.0458083	total: 379ms	remaining: 6m 18s
1:	learn: 1.0009665	total: 599ms	remaining: 4m 58s
2:	learn: 0.9631682	total: 814ms	remaining: 4m 30s
3:	learn: 0.9313001	total: 1.03s	remaining: 4m 16s
4:	learn: 0.9031881	total: 1.25s	remaining: 4m 9s
5:	learn: 0.8773837	total: 1.47s	remaining: 4m 3s
6:	learn: 0.8565848	total: 1.68s	remaining: 3m 58s
7:	learn: 0.8370632	total: 1.89s	remaining: 3m 54s
8:	learn: 0.8212866	total: 2.1s	remaining: 3m 51s
9:	learn: 0.8030577	total: 2.32s	remaining: 3m 49s
10:	learn: 0.7906252	total: 2.53s	remaining: 3m 47s
11:	learn: 0.7782895	total: 2.74s	remaining: 3m 45s
12:	learn: 0.7673231	total: 2.96s	remaining: 3m 44s
13:	learn: 0.7555807	total: 3.17s	remaining: 3m 43s
14:	learn: 0.7462247	total: 3.38s	remaining: 3m 41s
15:	learn: 0.7357240	total: 3.59s	remaining: 3m 40s
16:	learn: 0.7274386	total: 3.8s	remaining: 3m 39s
17:	learn: 0.7210475	total: 4.01s	remaining: 3m 39s
18:	learn: 0.7143298	total: 4.23s	remaining: 3m 

158:	learn: 0.3714745	total: 34.2s	remaining: 3m
159:	learn: 0.3701631	total: 34.4s	remaining: 3m
160:	learn: 0.3680516	total: 34.6s	remaining: 3m
161:	learn: 0.3661277	total: 34.8s	remaining: 3m
162:	learn: 0.3637309	total: 35s	remaining: 2m 59s
163:	learn: 0.3623354	total: 35.3s	remaining: 2m 59s
164:	learn: 0.3612056	total: 35.5s	remaining: 2m 59s
165:	learn: 0.3590561	total: 35.7s	remaining: 2m 59s
166:	learn: 0.3573650	total: 35.9s	remaining: 2m 59s
167:	learn: 0.3553636	total: 36.1s	remaining: 2m 58s
168:	learn: 0.3534417	total: 36.3s	remaining: 2m 58s
169:	learn: 0.3511349	total: 36.5s	remaining: 2m 58s
170:	learn: 0.3503191	total: 36.7s	remaining: 2m 58s
171:	learn: 0.3488309	total: 37s	remaining: 2m 57s
172:	learn: 0.3480103	total: 37.2s	remaining: 2m 57s
173:	learn: 0.3472670	total: 37.4s	remaining: 2m 57s
174:	learn: 0.3454662	total: 37.6s	remaining: 2m 57s
175:	learn: 0.3436513	total: 37.8s	remaining: 2m 57s
176:	learn: 0.3423163	total: 38.1s	remaining: 2m 57s
177:	learn: 0

314:	learn: 0.2086066	total: 1m 7s	remaining: 2m 27s
315:	learn: 0.2078099	total: 1m 8s	remaining: 2m 27s
316:	learn: 0.2071720	total: 1m 8s	remaining: 2m 27s
317:	learn: 0.2062136	total: 1m 8s	remaining: 2m 27s
318:	learn: 0.2056206	total: 1m 8s	remaining: 2m 26s
319:	learn: 0.2051625	total: 1m 9s	remaining: 2m 26s
320:	learn: 0.2049690	total: 1m 9s	remaining: 2m 26s
321:	learn: 0.2045355	total: 1m 9s	remaining: 2m 26s
322:	learn: 0.2038859	total: 1m 9s	remaining: 2m 26s
323:	learn: 0.2029200	total: 1m 9s	remaining: 2m 25s
324:	learn: 0.2017516	total: 1m 10s	remaining: 2m 25s
325:	learn: 0.2010192	total: 1m 10s	remaining: 2m 25s
326:	learn: 0.2003171	total: 1m 10s	remaining: 2m 25s
327:	learn: 0.2000007	total: 1m 10s	remaining: 2m 25s
328:	learn: 0.1996109	total: 1m 11s	remaining: 2m 24s
329:	learn: 0.1991343	total: 1m 11s	remaining: 2m 24s
330:	learn: 0.1985668	total: 1m 11s	remaining: 2m 24s
331:	learn: 0.1976694	total: 1m 11s	remaining: 2m 24s
332:	learn: 0.1970287	total: 1m 11s	re

468:	learn: 0.1334294	total: 1m 41s	remaining: 1m 54s
469:	learn: 0.1330099	total: 1m 41s	remaining: 1m 54s
470:	learn: 0.1328299	total: 1m 41s	remaining: 1m 54s
471:	learn: 0.1326203	total: 1m 42s	remaining: 1m 54s
472:	learn: 0.1323114	total: 1m 42s	remaining: 1m 53s
473:	learn: 0.1318611	total: 1m 42s	remaining: 1m 53s
474:	learn: 0.1313890	total: 1m 42s	remaining: 1m 53s
475:	learn: 0.1310875	total: 1m 42s	remaining: 1m 53s
476:	learn: 0.1305650	total: 1m 43s	remaining: 1m 53s
477:	learn: 0.1301380	total: 1m 43s	remaining: 1m 52s
478:	learn: 0.1298113	total: 1m 43s	remaining: 1m 52s
479:	learn: 0.1291123	total: 1m 43s	remaining: 1m 52s
480:	learn: 0.1288658	total: 1m 43s	remaining: 1m 52s
481:	learn: 0.1285770	total: 1m 44s	remaining: 1m 51s
482:	learn: 0.1283956	total: 1m 44s	remaining: 1m 51s
483:	learn: 0.1281980	total: 1m 44s	remaining: 1m 51s
484:	learn: 0.1278208	total: 1m 44s	remaining: 1m 51s
485:	learn: 0.1274801	total: 1m 45s	remaining: 1m 51s
486:	learn: 0.1272128	total:

621:	learn: 0.0936860	total: 2m 14s	remaining: 1m 21s
622:	learn: 0.0935919	total: 2m 14s	remaining: 1m 21s
623:	learn: 0.0932790	total: 2m 14s	remaining: 1m 21s
624:	learn: 0.0930679	total: 2m 15s	remaining: 1m 21s
625:	learn: 0.0929116	total: 2m 15s	remaining: 1m 20s
626:	learn: 0.0926935	total: 2m 15s	remaining: 1m 20s
627:	learn: 0.0924045	total: 2m 15s	remaining: 1m 20s
628:	learn: 0.0922009	total: 2m 15s	remaining: 1m 20s
629:	learn: 0.0920101	total: 2m 16s	remaining: 1m 19s
630:	learn: 0.0918257	total: 2m 16s	remaining: 1m 19s
631:	learn: 0.0915969	total: 2m 16s	remaining: 1m 19s
632:	learn: 0.0913331	total: 2m 16s	remaining: 1m 19s
633:	learn: 0.0911233	total: 2m 17s	remaining: 1m 19s
634:	learn: 0.0910381	total: 2m 17s	remaining: 1m 18s
635:	learn: 0.0908470	total: 2m 17s	remaining: 1m 18s
636:	learn: 0.0906569	total: 2m 17s	remaining: 1m 18s
637:	learn: 0.0905432	total: 2m 17s	remaining: 1m 18s
638:	learn: 0.0903788	total: 2m 18s	remaining: 1m 18s
639:	learn: 0.0902182	total:

776:	learn: 0.0695066	total: 2m 47s	remaining: 48.2s
777:	learn: 0.0693614	total: 2m 48s	remaining: 48s
778:	learn: 0.0692936	total: 2m 48s	remaining: 47.7s
779:	learn: 0.0691607	total: 2m 48s	remaining: 47.5s
780:	learn: 0.0690571	total: 2m 48s	remaining: 47.3s
781:	learn: 0.0689422	total: 2m 48s	remaining: 47.1s
782:	learn: 0.0688629	total: 2m 49s	remaining: 46.9s
783:	learn: 0.0686803	total: 2m 49s	remaining: 46.7s
784:	learn: 0.0685677	total: 2m 49s	remaining: 46.5s
785:	learn: 0.0683964	total: 2m 49s	remaining: 46.2s
786:	learn: 0.0682243	total: 2m 50s	remaining: 46s
787:	learn: 0.0681014	total: 2m 50s	remaining: 45.8s
788:	learn: 0.0679961	total: 2m 50s	remaining: 45.6s
789:	learn: 0.0678222	total: 2m 50s	remaining: 45.4s
790:	learn: 0.0676748	total: 2m 50s	remaining: 45.2s
791:	learn: 0.0674868	total: 2m 51s	remaining: 44.9s
792:	learn: 0.0673779	total: 2m 51s	remaining: 44.7s
793:	learn: 0.0672389	total: 2m 51s	remaining: 44.5s
794:	learn: 0.0671430	total: 2m 51s	remaining: 44.

933:	learn: 0.0531873	total: 3m 21s	remaining: 14.3s
934:	learn: 0.0530418	total: 3m 21s	remaining: 14s
935:	learn: 0.0529790	total: 3m 22s	remaining: 13.8s
936:	learn: 0.0528872	total: 3m 22s	remaining: 13.6s
937:	learn: 0.0528401	total: 3m 22s	remaining: 13.4s
938:	learn: 0.0527753	total: 3m 22s	remaining: 13.2s
939:	learn: 0.0526190	total: 3m 22s	remaining: 13s
940:	learn: 0.0525397	total: 3m 23s	remaining: 12.7s
941:	learn: 0.0524016	total: 3m 23s	remaining: 12.5s
942:	learn: 0.0523706	total: 3m 23s	remaining: 12.3s
943:	learn: 0.0523003	total: 3m 23s	remaining: 12.1s
944:	learn: 0.0522641	total: 3m 24s	remaining: 11.9s
945:	learn: 0.0521598	total: 3m 24s	remaining: 11.7s
946:	learn: 0.0521237	total: 3m 24s	remaining: 11.4s
947:	learn: 0.0520560	total: 3m 24s	remaining: 11.2s
948:	learn: 0.0519916	total: 3m 24s	remaining: 11s
949:	learn: 0.0518910	total: 3m 25s	remaining: 10.8s
950:	learn: 0.0518283	total: 3m 25s	remaining: 10.6s
951:	learn: 0.0517810	total: 3m 25s	remaining: 10.4s

### Test prediction

In [16]:
preds = eclf1.predict(test_x)

In [1]:
import pickle

In [17]:
submit = pd.read_csv('./sample_submission.csv')

In [18]:
submit['Y_Class'] = preds

In [19]:
submit.to_csv('./baseline_submission2.csv', index=False)