In [1]:
import sklearn
import numpy as np
import pandas as pd


from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [2]:
titanic_df = pd.read_csv('./datasets/titanic_processed.csv')

In [3]:
titanic_df.head()

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_C,Embarked_Q,Embarked_S
0,1,2,0,23.0,0,0,13.7917,1,0,0
1,0,1,0,25.0,1,2,151.55,0,0,1
2,0,3,1,22.0,0,0,7.7958,0,0,1
3,0,3,0,6.0,4,2,31.275,0,0,1
4,1,3,0,38.0,1,5,31.3875,0,0,1


In [4]:
X = titanic_df.drop(['Survived'],axis=1)
Y = titanic_df['Survived']

x_train,x_test,y_train,y_test = train_test_split(X,Y, test_size = 0.2) 

In [5]:
def summarize(y_test,y_pred):
    acc = accuracy_score(y_test,y_pred,normalize=True)      # op :fraction
    num_acc = accuracy_score(y_test,y_pred,normalize=False) # op :number
    
    precis = precision_score(y_test,y_pred)
    recall = recall_score(y_test,y_pred)
    
    return {'Test_data_count': len(y_test),
            'Accuracy_Count':num_acc,
            'Accuracy' : acc,
            'Precision': precis,
            'Recall'   : recall}

In [6]:
from sklearn.model_selection import GridSearchCV

In [8]:
parameters = {'max_depth':[2,4,5,7,9,10]} #parameter to select from list

# cv = cross validation  - 
grid_search = GridSearchCV(DecisionTreeClassifier(),parameters,cv=3,return_train_score=True)
grid_search.fit(x_train,y_train)

grid_search.best_params_

{'max_depth': 4}

In [9]:
for i in range(6):
    
    print('Parameters: ',grid_search.cv_results_['params'][i])
    
    print('Mean Test Score: ',grid_search.cv_results_['mean_test_score'][i])
    
    print('Rank',grid_search.cv_results_['rank_test_score'][i])

Parameters:  {'max_depth': 2}
Mean Test Score:  0.7926186291739895
Rank 2
Parameters:  {'max_depth': 4}
Mean Test Score:  0.7943760984182777
Rank 1
Parameters:  {'max_depth': 5}
Mean Test Score:  0.7803163444639719
Rank 4
Parameters:  {'max_depth': 7}
Mean Test Score:  0.7908611599297012
Rank 3
Parameters:  {'max_depth': 9}
Mean Test Score:  0.7750439367311072
Rank 5
Parameters:  {'max_depth': 10}
Mean Test Score:  0.7662565905096661
Rank 6


In [10]:
decision_tree_model = DecisionTreeClassifier(\
                      max_depth=grid_search.best_params_['max_depth']).fit(
                        x_train,y_train)

In [11]:
y_pred = decision_tree_model.predict(x_test)

In [12]:
summarize(y_test,y_pred)

{'Test_data_count': 143,
 'Accuracy_Count': 113,
 'Accuracy': 0.7902097902097902,
 'Precision': 0.9,
 'Recall': 0.5806451612903226}