<a href="https://colab.research.google.com/github/chaeyh4/SmartFactory/blob/main/GBM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import

In [None]:
import pandas as pd
import random
import os
import numpy as np

from sklearn.preprocessing import LabelEncoder

## Data Load

In [None]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

In [None]:
train_df.shape

(598, 2881)

In [None]:
test_df.shape

(310, 2879)

In [None]:
train_x = train_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP', 'Y_Class', 'Y_Quality'])
train_y = train_df['Y_Class']

test_x = test_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP'])

## Data Pre-processing

In [None]:
train_x = train_x.fillna(0)
test_x = test_x.fillna(0)

In [None]:
# qualitative to quantitative
qual_col = ['LINE', 'PRODUCT_CODE']

for i in qual_col:
    le = LabelEncoder()
    le = le.fit(train_x[i])
    train_x[i] = le.transform(train_x[i])
    
    for label in np.unique(test_x[i]): 
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test_x[i] = le.transform(test_x[i]) 
print('Done.')

Done.


In [None]:
from keras.utils import np_utils

In [None]:
train_y_encoded = np_utils.to_categorical(train_y)

In [None]:
train_x_encoded = pd.get_dummies(data = train_x, columns = ['LINE'], prefix = 'Line')
train_x_encoded = pd.get_dummies(data = train_x_encoded, columns = ['PRODUCT_CODE'], prefix = 'PRODUCT_CODE')

In [None]:
test_x_encoded = pd.get_dummies(data = test_x, columns = ['LINE'], prefix = 'Line')
test_x_encoded = pd.get_dummies(data = test_x_encoded, columns = ['PRODUCT_CODE'], prefix = 'PRODUCT_CODE')

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
gb = GradientBoostingClassifier(random_state=0)
gb.fit(train_x_encoded, train_y)

## 파라미터 조정

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
params = {
    'n_estimators':[100, 500],
    'learning_rate' : [ 0.05, 0.1]
}
grid_cv = GridSearchCV(gb , param_grid=params , cv=2 ,verbose=1)
grid_cv.fit(train_x_encoded , train_y)
print('최적 하이퍼 파라미터:\n', grid_cv.best_params_)
print('최고 예측 정확도: {0:.4f}'.format(grid_cv.best_score_))

Fitting 2 folds for each of 4 candidates, totalling 8 fits
최적 하이퍼 파라미터:
 {'learning_rate': 0.05, 'n_estimators': 100}
최고 예측 정확도: 0.4197


In [None]:
preds = grid_cv.best_estimator_.predict(test_x_encoded)

## 예측

In [None]:
preds = gb.predict(test_x_encoded)

In [None]:
submit = pd.read_csv('./sample_submission.csv')

In [None]:
submit['Y_Class'] = preds

In [None]:
submit.to_csv('./xgboost_submission_gbmbest.csv', index=False)