In [98]:
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn import metrics

In [99]:
data = make_classification(n_samples=1000, n_features=15, n_classes=2)

In [100]:
features = data[0]
target = data[1]

In [101]:
# Splitting the data
X_train, X_test, Y_train, Y_test = train_test_split(features, target, train_size=0.70)

# Base Model - Random Forest

In [102]:
base_model = RandomForestClassifier()

In [103]:
base_model.fit(X_train, Y_train)

RandomForestClassifier()

In [104]:
Y_pred = base_model.predict(X_test)
print(Y_pred)

[0 1 1 1 0 0 1 0 1 0 0 0 1 0 1 0 1 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 1 0 1 1 0
 1 0 1 1 1 0 0 0 1 1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 1
 1 0 1 0 0 1 0 0 1 1 1 0 0 1 1 1 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1
 1 1 0 0 0 1 0 1 0 1 1 0 0 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 1 1 0
 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 1 1 1 0 1 1 0 0 0 1 0 1 1 1 0 1 0 1 0 1 0 1
 1 0 1 1 1 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1
 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 1 0 1 0 0 1 1 0 1 0 0 1 0 1
 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 0 0 0
 0 1 1 1]


In [105]:
print(Y_test)

[0 1 1 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0 1 0 1 0 1 1 0
 1 0 0 1 1 0 0 0 1 1 0 1 0 1 1 0 0 1 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 1 1
 1 0 1 1 0 1 0 0 1 1 1 0 0 1 1 0 1 1 0 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1
 1 1 0 0 0 1 0 1 0 1 1 0 0 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 0 0 1 1 1 1 1 0
 0 0 1 0 0 0 0 0 1 0 0 1 1 1 0 1 1 1 1 1 1 0 0 0 1 0 1 1 1 0 1 0 1 0 1 0 1
 1 0 1 1 1 0 1 0 0 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 1 0 0 1 0 0 0 0 1 0 0 1 1
 1 0 1 1 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 1 0 1 1 1 1 1 0 1 0 0 1 0 0
 0 0 0 1 0 1 1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0 0 1 1 1 0 1 0 0 0
 1 1 1 1]


In [106]:
metrics.accuracy_score(Y_test, Y_pred)

0.96

# Manual Hyperparameter tuning

In [107]:
model2 = RandomForestClassifier(n_estimators=20, max_depth=5, max_features=12)

In [108]:
model2.fit(X_train, Y_train)

RandomForestClassifier(max_depth=5, max_features=12, n_estimators=20)

In [109]:
Y_pred = model2.predict(X_test)

In [110]:
metrics.accuracy_score(Y_test, Y_pred)

0.95

# Grid Search tuning

In [111]:
params = {"n_estimators": [15, 20, 25, 30, 35, 40, 45,50], 
          "max_depth": [5, 10, 15, 20]}

In [112]:
grid_model = GridSearchCV(base_model, param_grid=params, cv=3)

In [113]:
grid_model.fit(X_train, Y_train)

GridSearchCV(cv=3, estimator=RandomForestClassifier(),
             param_grid={'max_depth': [5, 10, 15, 20],
                         'n_estimators': [15, 20, 25, 30, 35, 40, 45, 50]})

In [114]:
# Getting the best model parameters
grid_model.best_params_

{'max_depth': 20, 'n_estimators': 20}

# Random Forest with best parameters

In [115]:
model3 = RandomForestClassifier(max_depth=20, n_estimators=20)

In [116]:
model3.fit(X_train, Y_train)

RandomForestClassifier(max_depth=20, n_estimators=20)

In [117]:
Y_pred = model3.predict(X_test)

In [118]:
metrics.accuracy_score(Y_test, Y_pred)

0.9466666666666667