In [1]:
#importing data
import pandas as pd
import numpy as np
np.random.seed(1)
df_train = pd.read_csv('Train.csv')
df_test = pd.read_csv('Test.csv')

In [2]:
#creating test and train sets
X=df_train.drop('IsUnderRisk', axis = 1)
y=df_train['IsUnderRisk']

In [3]:
#importing packages
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import log_loss, make_scorer, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier

In [7]:
#train_test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size = .4, random_state = 42)

#preprocessing
scaler=StandardScaler()
scaler.fit_transform(X_train)
scaler.transform(X_test)

#Initializing Classifiers
xgb=XGBClassifier()
rf=RandomForestClassifier()

#param_grid for each classifier
params_class = {xgb : {'eta' : np.linspace(0.1, 1, 10), 
             'max_depth' : range(3, 11)},  
                rf : {'n_estimators' : range(10, 150, 10),
                     'criterion' : ['gini', 'entropy'],
                     'max_depth' : range(2, 11),
                     }
             }
scorer=make_scorer(log_loss)
classifiers = [xgb, rf]

In [6]:
for c in classifiers:
    gcv=GridSearchCV(c, param_grid=params_class[c], scoring=scorer, cv=10, refit = True)
    gcv.fit(X_train, y_train)
    predictions = gcv.predict(X_test)
    print(gcv.score(X_test, y_test))
    print("Best Parameters are {}".format(gcv.best_params_))
    print('\n\n')



5.2974123033404865
Best Parameters are {'eta': 0.1, 'max_depth': 10}



4.873569675355496
Best Parameters are {'criterion': 'gini', 'max_depth': 2, 'n_estimators': 10}







In [8]:
rf=RandomForestClassifier(max_depth = 2, n_estimators=10)
rf.fit(X_train, y_train)
pred=rf.predict(X_test)
accuracy_score(pred, y_test)
submission = rf.predict_proba(df_test)

In [15]:
pd.DataFrame(submission).to_excel('sub1.xlsx', index = False)

In [13]:
pred

array([1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1,
       1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 1], dtype=int64)