In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import random
import os
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(37) # Seed 고정

In [47]:
train_df = pd.read_csv('/content/drive/MyDrive/LG/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/LG/test.csv')

# 데이터 전처리

In [48]:
train_x = train_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP', 'Y_Class', 'Y_Quality'])
train_y = train_df['Y_Quality']
test_x = test_df.drop(columns=['PRODUCT_ID', 'TIMESTAMP'])

In [49]:
train_x = train_x.fillna(0)
test_x = test_x.fillna(0)

In [50]:
# qualitative to quantitative
qual_col = ['LINE', 'PRODUCT_CODE']

for i in qual_col:
    le = LabelEncoder()
    le = le.fit(train_x[i])
    train_x[i] = le.transform(train_x[i])
    for label in np.unique(test_x[i]): 
        if label not in le.classes_: 
            le.classes_ = np.append(le.classes_, label)
    test_x[i] = le.transform(test_x[i]) 
print('Done.')

Done.


In [51]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(train_x, train_y, test_size = 0.15, random_state = 7)

In [52]:
train_x.head()

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2866,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875
0,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,39.34,40.89,32.56,34.09,77.77,0.0,0.0,0.0,0.0,0.0
1,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,38.89,42.82,43.92,35.34,72.55,0.0,0.0,0.0,0.0,0.0
2,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,39.19,36.65,42.47,36.53,78.35,0.0,0.0,0.0,0.0,0.0
3,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,37.74,39.17,52.17,30.58,71.78,0.0,0.0,0.0,0.0,0.0
4,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,38.7,41.89,46.93,33.09,76.97,0.0,0.0,0.0,0.0,0.0


# Y_Quality 예측하기

In [53]:
#RandomForestRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
model = RandomForestRegressor()
model.fit(x_train, y_train)
y_pred = model.predict(x_val)
import pandas as pd
df = pd.DataFrame({'y_val':y_val, 'y_pred':y_pred})
cor = df['y_val'].corr(df['y_pred'])
print(cor)
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
print('MAE: %d' %mae)
print('MSE: %d' %mse)
print(r2_score(y_val, y_pred))

0.8787761342708151
MAE: 0
MSE: 0
0.6791328754775149


In [54]:
#GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor()
model.fit(x_train, y_train)
y_pred = model.predict(x_val)
import pandas as pd
df = pd.DataFrame({'y_val':y_val, 'y_pred':y_pred})
cor = df['y_val'].corr(df['y_pred'])
print(cor)
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
print('MAE: %d' %mae)
print('MSE: %d' %mse)
print(r2_score(y_val, y_pred))

0.8970696181249885
MAE: 0
MSE: 0
0.7509177214114132


In [55]:
#XGBRegressor
from xgboost import XGBRegressor
model = XGBRegressor()
model.fit(x_train, y_train)
y_pred = model.predict(x_val)
import pandas as pd
df = pd.DataFrame({'y_val':y_val, 'y_pred':y_pred})
cor = df['y_val'].corr(df['y_pred'])
print(cor)
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
print('MAE: %d' %mae)
print('MSE: %d' %mse)
print(r2_score(y_val, y_pred))

0.8980905467629758
MAE: 0
MSE: 0
0.7572947989471335


In [56]:
#LGBMRegressor
from lightgbm import LGBMRegressor
model = LGBMRegressor()
model.fit(x_train, y_train)
y_pred = model.predict(x_val)
import pandas as pd
df = pd.DataFrame({'y_val':y_val, 'y_pred':y_pred})
cor = df['y_val'].corr(df['y_pred'])
print(cor)
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
print('MAE: %d' %mae)
print('MSE: %d' %mse)
print(r2_score(y_val, y_pred))

0.7324348315743191
MAE: 0
MSE: 0
0.48503663129244634


In [57]:
#CatBoostRegressor
!pip install catboost
from catboost import CatBoostRegressor
model = CatBoostRegressor()
model.fit(x_train, y_train)
y_pred = model.predict(x_val)
import pandas as pd
df = pd.DataFrame({'y_val':y_val, 'y_pred':y_pred})
cor = df['y_val'].corr(df['y_pred'])
print(cor)
mae = mean_absolute_error(y_val, y_pred)
mse = mean_squared_error(y_val, y_pred)
print('MAE: %d' %mae)
print('MSE: %d' %mse)
print(r2_score(y_val, y_pred))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Learning rate set to 0.036788
0:	learn: 0.0070421	total: 333ms	remaining: 5m 32s
1:	learn: 0.0069817	total: 551ms	remaining: 4m 34s
2:	learn: 0.0069411	total: 774ms	remaining: 4m 17s
3:	learn: 0.0068909	total: 1.01s	remaining: 4m 12s
4:	learn: 0.0068426	total: 1.22s	remaining: 4m 3s
5:	learn: 0.0067885	total: 1.45s	remaining: 4m
6:	learn: 0.0067398	total: 1.65s	remaining: 3m 54s
7:	learn: 0.0066808	total: 1.86s	remaining: 3m 50s
8:	learn: 0.0066415	total: 1.95s	remaining: 3m 34s
9:	learn: 0.0065984	total: 2.17s	remaining: 3m 35s
10:	learn: 0.0065545	total: 2.4s	remaining: 3m 35s
11:	learn: 0.0065130	total: 2.63s	remaining: 3m 36s
12:	learn: 0.0064742	total: 2.87s	remaining: 3m 37s
13:	learn: 0.0064285	total: 3.06s	remaining: 3m 35s
14:	learn: 0.0064005	total: 3.28s	remaining: 3m 35s
15:	learn: 0.0063823	total: 3.5s	remaining: 3m 35s
16:	learn: 0.0063336	total: 3.73s	remaining: 3m 36s
17:	

In [18]:
#GradientBoosting 채택!
#GradientBoostingRegressor
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor()
model.fit(train_x, train_y)
pred = model.predict(test_x)
print(pred)

[0.52974009 0.53031333 0.53158163 0.52471806 0.53060322 0.53074504
 0.53535852 0.52501891 0.52421328 0.53230917 0.52999298 0.53333099
 0.53530626 0.52608715 0.51719223 0.52674282 0.53043671 0.52774023
 0.53012286 0.53113062 0.52400486 0.53100392 0.52753723 0.5265506
 0.52720677 0.52604913 0.5294094  0.52701444 0.52565609 0.53135362
 0.52776735 0.53098553 0.52548693 0.52961064 0.53094499 0.51833567
 0.53183512 0.51944285 0.54436131 0.52537044 0.52975156 0.52418641
 0.52615098 0.52828107 0.53007728 0.52899844 0.53082163 0.52220086
 0.53058598 0.52131801 0.53087467 0.52129182 0.52450169 0.52616537
 0.52719791 0.52480867 0.52984625 0.5301491  0.52555095 0.52795477
 0.52667182 0.52443104 0.52315738 0.52673773 0.52219606 0.51999959
 0.52463097 0.53050981 0.52724684 0.52570454 0.52605731 0.51926588
 0.53276554 0.52909123 0.53040707 0.5296169  0.53049662 0.52714634
 0.54169121 0.53002752 0.53152461 0.53135842 0.53026724 0.52978277
 0.5315935  0.53090483 0.51892132 0.5317413  0.51644172 0.52528

In [58]:
#2 XGBRegressor로 똑같이 돌려보기
from xgboost import XGBRegressor
model = XGBRegressor()
model.fit(train_x, train_y)
pred = model.predict(test_x)
print(pred)

[0.5300826  0.53335905 0.5330728  0.52693814 0.53114325 0.53265584
 0.53518385 0.52503693 0.5244367  0.5349249  0.53421706 0.5362497
 0.5360222  0.5326302  0.52129096 0.52449244 0.5310538  0.5270069
 0.5303354  0.53068256 0.52672184 0.53119445 0.52586365 0.5243328
 0.52622    0.527236   0.52909917 0.52453387 0.5257574  0.5310306
 0.5247016  0.53065974 0.52484274 0.5290763  0.53080237 0.52069277
 0.533051   0.51703835 0.5452564  0.52727336 0.5307245  0.5234286
 0.525488   0.5304378  0.5303881  0.5295539  0.5318954  0.51832527
 0.52968895 0.5203579  0.5312664  0.51906997 0.5241336  0.5251485
 0.5260349  0.5244768  0.5298272  0.5294678  0.52434754 0.5261637
 0.5254705  0.5244179  0.5297283  0.5335276  0.5232314  0.5222772
 0.52329636 0.5313574  0.52856946 0.52804166 0.52718306 0.5195991
 0.53296673 0.5323598  0.52871484 0.53326046 0.529795   0.5296723
 0.5441595  0.5322118  0.53500044 0.5317471  0.53284955 0.5333875
 0.53340125 0.53085417 0.5185641  0.53249437 0.51671875 0.5273256
 0.5335

In [59]:
test_x['Y_Quality']=pred
test_x.head()

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875,Y_Quality
0,5,2,2.0,94.0,0.0,45.0,10.0,0.0,51.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.530083
1,4,2,2.0,93.0,0.0,45.0,11.0,0.0,45.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.533359
2,4,2,2.0,95.0,0.0,45.0,11.0,0.0,45.0,10.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.533073
3,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.526938
4,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.531143


In [60]:
train_x = train_x.join(train_df['Y_Quality'])
train_x.head()

Unnamed: 0,LINE,PRODUCT_CODE,X_1,X_2,X_3,X_4,X_5,X_6,X_7,X_8,...,X_2867,X_2868,X_2869,X_2870,X_2871,X_2872,X_2873,X_2874,X_2875,Y_Quality
0,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,40.89,32.56,34.09,77.77,0.0,0.0,0.0,0.0,0.0,0.533433
1,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,42.82,43.92,35.34,72.55,0.0,0.0,0.0,0.0,0.0,0.541819
2,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,36.65,42.47,36.53,78.35,0.0,0.0,0.0,0.0,0.0,0.531267
3,3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,39.17,52.17,30.58,71.78,0.0,0.0,0.0,0.0,0.0,0.537325
4,2,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,41.89,46.93,33.09,76.97,0.0,0.0,0.0,0.0,0.0,0.53159


# 분류 모델 구축하기

In [62]:
train_y = train_df['Y_Class']
train_y.head()

0    1
1    2
2    1
3    2
4    1
Name: Y_Class, dtype: int64

In [63]:
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(train_x, train_y, test_size = 0.15, random_state = 7)

In [64]:
#RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
model = RandomForestClassifier()
model.fit(X=x_train, y=y_train)
y_pred = model.predict(x_val) # 예측치
acc = accuracy_score(y_val, y_pred)
print(acc)
print(classification_report(y_val, y_pred))

0.7555555555555555
              precision    recall  f1-score   support

           0       0.83      0.33      0.48        15
           1       0.72      1.00      0.84        55
           2       1.00      0.40      0.57        20

    accuracy                           0.76        90
   macro avg       0.85      0.58      0.63        90
weighted avg       0.80      0.76      0.72        90



In [65]:
#GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier()
model.fit(X=x_train, y=y_train)
y_pred = model.predict(x_val) # 예측치
acc = accuracy_score(y_val, y_pred)
print(acc)
print(classification_report(y_val, y_pred))

1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        55
           2       1.00      1.00      1.00        20

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



In [66]:
#XGBClassifier
from xgboost import XGBClassifier
model = XGBClassifier()
model.fit(X=x_train, y=y_train)
y_pred = model.predict(x_val) # 예측치
acc = accuracy_score(y_val, y_pred)
print(acc)
print(classification_report(y_val, y_pred))

1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        55
           2       1.00      1.00      1.00        20

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



In [67]:
#LGBMClassifier
from lightgbm import LGBMClassifier
model = LGBMClassifier()
model.fit(X=x_train, y=y_train)
y_pred = model.predict(x_val) # 예측치
acc = accuracy_score(y_val, y_pred)
print(acc)
print(classification_report(y_val, y_pred))

1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        15
           1       1.00      1.00      1.00        55
           2       1.00      1.00      1.00        20

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



In [68]:
#GaussianNB
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X=x_train, y=y_train)
y_pred = model.predict(x_val) # 예측치
acc = accuracy_score(y_val, y_pred)
print(acc)
print(classification_report(y_val, y_pred))

0.7333333333333333
              precision    recall  f1-score   support

           0       0.62      0.67      0.65        15
           1       0.76      0.93      0.84        55
           2       0.71      0.25      0.37        20

    accuracy                           0.73        90
   macro avg       0.70      0.61      0.62        90
weighted avg       0.73      0.73      0.70        90



In [37]:
#SVC
from sklearn.svm import SVC # model 생성
model = SVC()
model.fit(x_train, y_train)
y_pred = model.predict(x_val)
acc = accuracy_score(y_val, y_pred)
print(acc)
print(classification_report(y_val, y_pred))

0.6555555555555556
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        15
           1       0.65      0.98      0.78        55
           2       0.71      0.25      0.37        20

    accuracy                           0.66        90
   macro avg       0.45      0.41      0.38        90
weighted avg       0.56      0.66      0.56        90



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [69]:
#머지 먼가 이상한데,,그래도 gradientBoostingClassifier로 제출해보자
#GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier
model = GradientBoostingClassifier()
model.fit(train_x, train_y)
gbc_pred = model.predict(test_x)
print(gbc_pred)

[1 1 1 1 1 1 2 0 0 2 1 2 2 1 0 0 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 1 0 1
 0 2 1 1 0 1 1 1 1 1 0 1 0 1 0 0 1 1 0 1 1 0 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1
 1 1 1 1 2 1 2 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 2 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1
 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 2 2 1 2 2 2 2 0 1 1 1 1 2 2 0 1
 1 0 0 0 1 1 0 0 0 1 0 1 1 1]


In [70]:
gbc_pred == xgb_pred

array([ True,  True,  True, False,  True,  True,  True,  True,  True,
       False,  True, False,  True,  True,  True, False,  True,  True,
        True,  True, False,  True,  True, False,  True,  True,  True,
       False,  True,  True, False,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [71]:
submit = pd.read_csv('/content/drive/MyDrive/LG/sample_submission.csv')
submit['Y_Class'] = gbc_pred
submit.to_csv('./0213_2.csv', index=False)

In [40]:
#0.67652로 성능 제일 높였다!!!!!!!!!!!!!!!!!!!1
#두번째는 0.5790268432

In [43]:
#그럼 XGBClassifier는?
#XGBClassifier
from xgboost import XGBClassifier
model = XGBClassifier()
model.fit(train_x, train_y)
xgb_pred = model.predict(test_x)
print(xgb_pred)

[1 1 1 0 1 1 2 0 0 1 1 1 2 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
 0 2 1 1 0 1 1 1 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 1
 1 1 1 1 2 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 2 1 1 1 1 1 1 1 1 1 1
 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 2 2 1 2 1 1 1 0 1 1 1 1 1 1 0 1
 1 1 1 0 1 1 0 1 1 1 0 1 1 1]


In [44]:
gbc_pred == xgb_pred

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,

In [45]:
#LGBMClassifier
from lightgbm import LGBMClassifier
model = LGBMClassifier()
model.fit(train_x, train_y)
lgbm_pred = model.predict(test_x)
print(lgbm_pred)

[1 1 1 0 1 1 2 0 0 1 1 1 2 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
 0 2 1 1 0 1 1 1 1 1 0 1 0 1 0 0 1 1 0 1 1 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 1
 1 1 1 1 2 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 1 1 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 2 1 1 1 1 1 1 1 1 1 1
 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 2 2 1 2 1 1 1 0 1 1 1 1 1 1 0 1
 1 1 1 0 1 1 0 1 1 1 0 1 1 1]


In [46]:
lgbm_pred == gbc_pred

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,