# 1. import libraries

In [1]:
import warnings
import numpy as np
import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV

In [3]:
warnings.filterwarnings("ignore")
%matplotlib inline
plt.style.use('seaborn-white')
plt.rcParams['figure.figsize'] = 15,5

---
# 2. import dataset

In [4]:
df = pd.read_csv('temp.csv')
df.shape

(522, 10)

In [5]:
y = df['Y']
x = df.drop('Y', axis=1)

---
# 3. RF Model - 1st iteration

In [6]:
parameters = {"max_depth": [10,100,1000],
              "min_samples_leaf": [10,100,1000],
              "n_estimators": [10,100,1000]}

In [7]:
model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator = model, param_grid = parameters, scoring = "roc_auc", cv = 3, n_jobs = -1)
grid_search = grid_search.fit(x,y)

In [8]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.5436329091551785
{'max_depth': 10, 'min_samples_leaf': 100, 'n_estimators': 1000}


---
# 4. RF Model - 2nd iteration

In [9]:
parameters = {"max_depth": [5,10,50],
              "min_samples_leaf": [50,100,500],
              "n_estimators": [1000]}

In [10]:
model = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(estimator = model, param_grid = parameters, scoring = "roc_auc", cv = 3, n_jobs = -1)
grid_search = grid_search.fit(x,y)

In [11]:
print(grid_search.best_score_)
print(grid_search.best_params_)

0.5436329091551785
{'max_depth': 5, 'min_samples_leaf': 100, 'n_estimators': 1000}


---
# 6. ROC AUC

In [12]:
model = RandomForestClassifier(max_depth=5, min_samples_leaf=100, n_estimators=1000, random_state = 42)
accuracies = cross_val_score(estimator = model, X= x, y = y, scoring = "roc_auc", cv = 3, n_jobs = -1)

In [13]:
print("AUROC: %0.3f (+/- %0.3f)"  % (accuracies.mean(), accuracies.std() * 2))

AUROC: 0.544 (+/- 0.099)
