In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeClassifier

import matplotlib.pyplot as plt

## Data import

In [None]:
url = 'https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


### Binary encoding sex

In [None]:
df['Sex'] = df['Sex'].replace({'female': 0, 'male': 1})

## Searches
### Defining params

In [None]:
search_params = {
    'max_depth': list(range(1, 51, 1)),
    'min_samples_leaf': list(range(1, 16, 1)),
    'min_samples_split': (2, 5, 7, 10, 15, 30)
}

### Test split

In [None]:
features = ['Pclass', 'Sex', 'Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare']

X = df[features]
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

### Grid Search

In [None]:
grid_search = GridSearchCV(
  estimator=DecisionTreeClassifier(),
  param_grid=search_params,
  cv=2,
  n_jobs=-1,
  verbose=2
)
grid_search.fit(X_train, y_train)

Fitting 2 folds for each of 4500 candidates, totalling 9000 fits


The best params for model by the grid search are

In [None]:
grid_search.best_params_

{'max_depth': 7, 'min_samples_leaf': 2, 'min_samples_split': 2}

### Random Search

In [None]:
random_search = RandomizedSearchCV(DecisionTreeClassifier(), search_params, n_iter=200)
random_search.fit(X_train, y_train)

The best params for model by the random search are

In [None]:
random_search.best_params_

{'min_samples_split': 5, 'min_samples_leaf': 6, 'max_depth': 4}