In [42]:
import pandas as pd
import matplotlib as plt
import numpy as np
import os
from sklearn.metrics import log_loss
from catboost import Pool, CatBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import StratifiedKFold
import lightgbm as lgbm
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
greeks = pd.read_csv('greeks.csv')

In [43]:
train['EJ'] = train['EJ'].replace({'A': 0, 'B': 1})
test['EJ'] = test['EJ'].replace({'A': 0, 'B': 1})

In [45]:
X,y = train.drop(['Class','Id'],axis = 1), train["Class"]

In [47]:
test_df = test.drop('Id',axis=1)

In [48]:
model_lgbm = lgbm.LGBMClassifier(max_depth = 5, n_estimators = 140, random_state = 73, class_weight = 'balanced',num_leaves = 31,learning_rate=0.1)

model_lgbm.fit(
X,y,eval_metric='binary_logloss',
)

pred_lgb = pd.DataFrame(model_lgbm.predict_proba(test_df))
pred_lgb

Unnamed: 0,0,1
0,0.996302,0.003698
1,0.996302,0.003698
2,0.996302,0.003698
3,0.996302,0.003698
4,0.996302,0.003698


In [50]:
import xgboost as xgb

model_xgb = xgb.XGBClassifier(
    max_depth = 3,
    n_estimators = 120,
    random_state = 73,
    class_weight = 'balanced'
)

model_xgb.fit(
    X, y,
    eval_metric = 'logloss'
)

pred_xgb = pd.DataFrame(model_xgb.predict_proba(test_df))
pred_xgb



Parameters: { "class_weight" } are not used.



Unnamed: 0,0,1
0,0.867215,0.132785
1,0.867215,0.132785
2,0.867215,0.132785
3,0.867215,0.132785
4,0.867215,0.132785


In [51]:
model_cat = CatBoostClassifier(
    max_depth = 3,
    n_estimators = 120,
    random_state = 73,
    auto_class_weights='Balanced'
)

model_cat.fit(
    X, y,
    verbose = False
)

pred_cat = pd.DataFrame(model_cat.predict_proba(test_df))
pred_cat

Unnamed: 0,0,1
0,0.746314,0.253686
1,0.746314,0.253686
2,0.746314,0.253686
3,0.746314,0.253686
4,0.746314,0.253686


In [52]:
base_models = [model_xgb, model_cat]

ensemble_models = model_lgbm

In [53]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=73, test_size=0.2)

predictions = []
for model in base_models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    predictions.append(y_pred)

ensemble_X = np.column_stack(predictions)

ensemble_models.fit(ensemble_X, y_test)

ensemble_pred = ensemble_models.predict_proba(ensemble_X)

ensemble_pred

Parameters: { "class_weight" } are not used.

Learning rate set to 0.053232
0:	learn: 0.6692337	total: 3.15ms	remaining: 375ms
1:	learn: 0.6408616	total: 5.25ms	remaining: 310ms
2:	learn: 0.6240545	total: 7.38ms	remaining: 288ms
3:	learn: 0.5987886	total: 9.27ms	remaining: 269ms
4:	learn: 0.5777435	total: 11.2ms	remaining: 258ms
5:	learn: 0.5488209	total: 13.1ms	remaining: 249ms
6:	learn: 0.5332000	total: 14.9ms	remaining: 241ms
7:	learn: 0.5118189	total: 16.7ms	remaining: 234ms
8:	learn: 0.4957523	total: 18.4ms	remaining: 228ms
9:	learn: 0.4777963	total: 21.2ms	remaining: 233ms
10:	learn: 0.4592584	total: 23.8ms	remaining: 236ms
11:	learn: 0.4487586	total: 26.3ms	remaining: 237ms
12:	learn: 0.4266131	total: 28.7ms	remaining: 236ms
13:	learn: 0.4185960	total: 31.8ms	remaining: 241ms
14:	learn: 0.4066676	total: 35.6ms	remaining: 249ms
15:	learn: 0.3982743	total: 38.7ms	remaining: 252ms
16:	learn: 0.3857386	total: 40.6ms	remaining: 246ms
17:	learn: 0.3773769	total: 42.5ms	remaining: 241m

array([[0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.11302492, 0.88697508],
       [0.7913289 , 0.2086711 ],
       [0.11302492, 0.88697508],
       [0.7913289 , 0.2086711 ],
       [0.11302492, 0.88697508],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.11302492, 0.88697508],
       [0.7913289 , 0.2086711 ],
       [0.11302492, 0.88697508],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.7913289 , 0.2086711 ],
       [0.

In [54]:
predictions = []
for model in base_models:
    model.fit(X, y)
    y_pred = model.predict(test_df)
    predictions.append(y_pred)

ensemble_X = np.column_stack(predictions)

ensemble_pred = pd.DataFrame(ensemble_models.predict_proba(ensemble_X))

ensemble_pred

Parameters: { "class_weight" } are not used.

Learning rate set to 0.058584
0:	learn: 0.6707233	total: 2.36ms	remaining: 281ms
1:	learn: 0.6412267	total: 4.73ms	remaining: 279ms
2:	learn: 0.6236255	total: 6.7ms	remaining: 261ms
3:	learn: 0.5942125	total: 8.7ms	remaining: 252ms
4:	learn: 0.5669312	total: 10.7ms	remaining: 246ms
5:	learn: 0.5378838	total: 12.7ms	remaining: 241ms
6:	learn: 0.5221937	total: 14.8ms	remaining: 238ms
7:	learn: 0.5048631	total: 17.4ms	remaining: 244ms
8:	learn: 0.4906272	total: 20ms	remaining: 247ms
9:	learn: 0.4775834	total: 22.1ms	remaining: 243ms
10:	learn: 0.4605206	total: 24.8ms	remaining: 246ms
11:	learn: 0.4461241	total: 27ms	remaining: 243ms
12:	learn: 0.4379751	total: 29.1ms	remaining: 239ms
13:	learn: 0.4291559	total: 31.9ms	remaining: 242ms
14:	learn: 0.4189305	total: 34.3ms	remaining: 240ms
15:	learn: 0.4060206	total: 36.7ms	remaining: 239ms
16:	learn: 0.3919436	total: 39.4ms	remaining: 239ms
17:	learn: 0.3872427	total: 42.7ms	remaining: 242ms
18:	

Unnamed: 0,0,1
0,0.791329,0.208671
1,0.791329,0.208671
2,0.791329,0.208671
3,0.791329,0.208671
4,0.791329,0.208671


In [57]:
sample_submission = pd.read_csv('sample_submission.csv')

In [59]:
sample_submission['Id'] = test.reset_index()['Id']
sample_submission['class_0'] = ensemble_pred[0]
sample_submission['class_1']  = ensemble_pred[1]

sample_submission.set_index('Id').to_csv('submission.csv')