In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
cd /content/drive/My Drive/psat_summer

/content/drive/My Drive/psat_summer


In [6]:
import pandas as pd
import numpy as np
import os

import lightgbm as lgb
from lightgbm import LGBMClassifier

from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, classification_report
from sklearn.model_selection import KFold

from imblearn.over_sampling import SMOTE

import pickle
import joblib

In [4]:
train = pd.read_csv("train_pca.csv")

In [5]:
y = train['target']
x = train.drop('target', axis = 1)

In [10]:
CM = []
f1_scores = []

folds = KFold(n_splits = 5, shuffle = True, random_state = 2021)

for train_idx, valid_idx in folds.split(x):
  train_x, train_y = x.iloc[train_idx], y.iloc[train_idx]
  valid_x, valid_y = x.iloc[valid_idx], y.iloc[valid_idx]

  smote = SMOTE(random_state = 2021)
  smote = smote.fit_sample(train_x, train_y)
  smote_x = pd.DataFrame(smote[0])
  smote_y = smote[1]

  smote_x.columns = valid_x.columns

  params = {'learning_rate': 0.01,
            'n_estimators': 1000, 
            'max_depth': -1, 
            'boosting': 'gbdt', 
            'objective': 'binary', 
            'metric': 'auc',  
            'num_leaves': 31,  
            'colsample_bytree': 0.9, 
            'subsample': 1.0, 
            'random_state':2021}

  model = lgb.LGBMClassifier(**params)
  model.fit(smote_x, smote_y, eval_set=[(valid_x, valid_y)], early_stopping_rounds= 100, verbose=100)

  y_pred = model.predict(valid_x)

  CM.append(confusion_matrix(valid_y, y_pred))
  f1_scores.append(f1_score(valid_y, y_pred))
  print(classification_report(valid_y, y_pred, target_names=['class 0', 'class 1']))

CM = sum(CM)
f1_scores = np.mean(f1_scores)
print("F1 score : %f" % f1_scores)



Training until validation scores don't improve for 100 rounds.
[100]	valid_0's auc: 0.840374
Early stopping, best iteration is:
[91]	valid_0's auc: 0.84088
              precision    recall  f1-score   support

     class 0       0.96      0.82      0.88      5041
     class 1       0.30      0.70      0.42       559

    accuracy                           0.81      5600
   macro avg       0.63      0.76      0.65      5600
weighted avg       0.90      0.81      0.84      5600





Training until validation scores don't improve for 100 rounds.
[100]	valid_0's auc: 0.845935
Early stopping, best iteration is:
[79]	valid_0's auc: 0.846327
              precision    recall  f1-score   support

     class 0       0.96      0.80      0.88      5030
     class 1       0.30      0.74      0.42       570

    accuracy                           0.80      5600
   macro avg       0.63      0.77      0.65      5600
weighted avg       0.90      0.80      0.83      5600





Training until validation scores don't improve for 100 rounds.
[100]	valid_0's auc: 0.838648
Early stopping, best iteration is:
[83]	valid_0's auc: 0.839087
              precision    recall  f1-score   support

     class 0       0.96      0.79      0.87      5028
     class 1       0.28      0.73      0.41       572

    accuracy                           0.78      5600
   macro avg       0.62      0.76      0.64      5600
weighted avg       0.89      0.78      0.82      5600





KeyboardInterrupt: ignored