# Tuning the RF model

## Importing the dependencies

In [None]:
import numpy as np
import pandas as pd

## Importing the datasets

### Training data

In [None]:
df_train = pd.read_csv('../data/train.csv')

### Test data

In [None]:
df_test = pd.read_csv('../data/test.csv')

## Data splits

In [None]:
y = df_train['Survived']
features = ['Pclass', 'Sex', 'SibSp', 'Parch']
X = pd.get_dummies(df_train[features])
# X = df_train.drop(['Survived'], axis=1)
X_test_actual = pd.get_dummies(df_test[features])

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=1)

## Hyperparameters

In [None]:
n_estimators = [int(x) for x in np.linspace(start=10, stop=100, num=50, endpoint=True)]
criterion = ['gini', 'entropy', 'log_loss']
max_features = [None, 'sqrt', 'log2'] + list(int(x) for x in np.linspace(start=1, stop=5, num=5, endpoint=True))
max_depth = [None] + list(int(x) for x in np.linspace(start=1, stop=5, num=5, endpoint=True))
min_samples_split = [int(x) for x in np.linspace(start=1, stop=10, num=10, endpoint=True)]
min_samples_leaf = [int(x) for x in np.linspace(start=1, stop=10, num=10, endpoint=True)]
bootstrap = [True, False]
random_state = [None] + list(int(x) for x in np.linspace(start=1, stop=50, num=50, endpoint=True))

In [None]:
param_grid = {
  'n_estimators': n_estimators,
  'criterion': criterion,
  'max_features': max_features,
  'max_depth': max_depth,
  'min_samples_split': min_samples_split,
  'min_samples_leaf': min_samples_leaf,
  'bootstrap': bootstrap,
  'random_state': random_state
}
print(param_grid)

## Train model

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
rf = RandomForestClassifier()

In [None]:
rf_grid = GridSearchCV(estimator=rf, param_grid=param_grid, n_jobs=2, cv=3, verbose=2, error_score='raise')

In [None]:
rf_grid.fit(X_train, y_train)

In [None]:
rf_grid.best_params_

In [None]:
print(f'Train accuracy - : {rf_grid.score(X_train, y_train):.3f}')
print(f'Test accuracy - : {rf_grid.score(X_test, y_test):.3f}')