In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb

In [2]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier

In [3]:
def print_metrics(y_test,predictions,model):
    print('\033[1m' + model + '\033[0m' + '\n\n')
    print('\033[1m' + 'Classification Report - ' + '\033[0m')
    print(classification_report(y_test,predictions))
    print('\033[1m' + 'ROC AUC Score - ' + '\033[0m')
    print(roc_auc_score(y_test,predictions))
    print('\033[1m' + 'Accuracy Score - ' + '\033[0m')
    print(accuracy_score(y_test,predictions))

def dtree(X,Y):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
    dt = DecisionTreeClassifier()
    dt.fit(X_train,y_train)
    predictions_dt = dt.predict(X_test)
    print_metrics(y_test,predictions_dt,'Decision Tree')
    
def xboost(X,Y):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
    boost = XGBClassifier(eval_metric='auc')
    boost.fit(X_train,y_train)
    predictions_boost = boost.predict(X_test)
    print_metrics(y_test,predictions_boost,'XGBoost')
    
def rforrest(X,Y,n):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
    rfc = RandomForestClassifier(n_estimators=n,n_jobs=-1)
    rfc.fit(X_train,y_train)
    predictions_rfc = rfc.predict(X_test)
    print_metrics(y_test,predictions_rfc,'Random Forrest')
    


In [8]:
def lgbm_runner(X,Y):
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=18)
    train_lgb = lgb.Dataset(data=X_train, label=y_train)
    test_lgb = lgb.Dataset(data=X_test, label=y_test)
    params = {'task': 'train', 'boosting_type': 'gbdt', 'objective': 'binary', 'metric': 'auc', 
          'learning_rate': 0.01, 'num_leaves': 48, 'verbose': 0 ,
          'colsample_bytree':.8, 'subsample':.9, 'max_depth':7, 'reg_alpha':.1, 'reg_lambda':.1, 
          'min_split_gain':.01, 'min_child_weight':1}
    model = lgb.train(params, train_lgb,500)
    predictions_lgb = model.predict(X_test)
    print('Light GBM')
    print(roc_auc_score(y_test,predictions_lgb))

In [5]:
X = pd.read_csv('../input/final_data.csv')
Y = pd.read_csv('../input/final_target.csv', header=None)

In [6]:
Y = Y[1]

In [7]:
xboost(X,Y)

[1mXGBoost[0m


[1mClassification Report - [0m
              precision    recall  f1-score   support

           0       0.92      1.00      0.96     84841
           1       0.53      0.01      0.02      7413

   micro avg       0.92      0.92      0.92     92254
   macro avg       0.73      0.50      0.49     92254
weighted avg       0.89      0.92      0.88     92254

[1mROC AUC Score - [0m
0.5043619393179392
[1mAccuracy Score - [0m
0.9197433173629328


In [9]:
lgbm_runner(X,Y)

Light GBM
0.7729935820792314
