In [5]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import lightgbm as lgb

# Importing the dataset
dataset = pd.read_csv('train.csv')

FEATURE_SIZE=13
X = dataset.iloc[:,0:FEATURE_SIZE].values
y = dataset.iloc[:, FEATURE_SIZE].values

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 0)
x_val, x_test, y_val, y_test = train_test_split(x_val, y_val, test_size = 0.2, random_state = 0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
x_val=sc.transform(x_val)

# Training
d_train = lgb.Dataset(x_train, label=y_train)
d_val = lgb.Dataset(x_val, label=y_val)
d_test=lgb.Dataset(x_test,label=y_test)

params = {
            'objective': 'binary',
            'boosting_type': 'gbdt',
#             'boosting_type': 'rf',
            'nthread': 4,
            'learning_rate': 0.02,  # 02,
            'num_leaves': 15,
            'feature_fraction': 0.9,
            'bagging_fraction': 0.7,
            'bagging_freq': 1,
            'early_stopping_round':5,
            'max_depth':2,
            'reg_alpha': 0.041545473,
            'reg_lambda': 0.0735294,
            'min_split_gain': 0.0222415,
            'min_child_weight': 60,
            'seed': 0,
            'verbose': -1,
            'metric': 'auc',
}


clf = lgb.train(params, d_train, 20,d_val)

#Prediction
y_pred=clf.predict(x_test)
#convert into binary values
for i in range(len(y_pred)):
    if y_pred[i]>=.5:       # setting threshold to .5
       y_pred[i]=1
    else:
       y_pred[i]=0
    
#Accuracy
from sklearn.metrics import accuracy_score
accuracy=accuracy_score(y_pred,y_test)
print(accuracy)

# AUC
from sklearn.metrics import roc_auc_score
print(roc_auc_score(y_test, y_pred))


[1]	valid_0's auc: 0.973164
Training until validation scores don't improve for 5 rounds.
[2]	valid_0's auc: 0.974783
[3]	valid_0's auc: 0.975053
[4]	valid_0's auc: 0.975366
[5]	valid_0's auc: 0.975327
[6]	valid_0's auc: 0.975413
[7]	valid_0's auc: 0.983587
[8]	valid_0's auc: 0.983719
[9]	valid_0's auc: 0.985384
[10]	valid_0's auc: 0.985509
[11]	valid_0's auc: 0.991205
[12]	valid_0's auc: 0.992166
[13]	valid_0's auc: 0.992434
[14]	valid_0's auc: 0.992457
[15]	valid_0's auc: 0.99259
[16]	valid_0's auc: 0.992698
[17]	valid_0's auc: 0.992826
[18]	valid_0's auc: 0.992875
[19]	valid_0's auc: 0.992985
[20]	valid_0's auc: 0.992997
Did not meet early stopping. Best iteration is:
[20]	valid_0's auc: 0.992997
0.9527618095476131
0.9532197154303907




In [6]:
testdata = pd.read_csv('test.csv')
x_testing = testdata.iloc[:,0:FEATURE_SIZE].values
x_testing = sc.transform(x_testing)
y_pred=clf.predict(x_testing)

In [7]:
import csv
with open("kaggle.csv","w",newline="") as csvfile:
    writer=csv.writer(csvfile)
    writer.writerow(["Id","Prediction"])
    test_id=1
    for prediction in y_pred:
        writer.writerow([test_id,prediction])
        test_id+=1