In [1]:
import pandas as pd
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn import tree

In [2]:
rawdata=pd.read_csv('train.csv')

# PREPROCESS DATA
rawdata.loc[:,'Age']=rawdata[['Age']].fillna(rawdata[['Age']].mean())
rawdata=rawdata.drop(['PassengerId','Name','Ticket','Cabin'],axis=1)
rawdata=pd.get_dummies(rawdata,columns=['Pclass','Sex','SibSp','Parch','Embarked'])

# Train Test Split
X_train, X_test, y_train, y_test = train_test_split(
    # features
    rawdata[[col for col in rawdata.columns if col != 'Survived']],
    # targets
    rawdata.Survived, test_size=0.4, random_state=0)



In [3]:
# Create a Models dictionary to house the parameter data for the different params
MODELS=[
    {
        'model': svm.SVC(),
        'params':{
            'C':[0.001,0.01,0.1,1],
            'kernel':['linear']},
         'name':'SVC'
              },
    
    {
        'model': linear_model.LogisticRegression(),
        'params':{
        'C':[0.001,0.01,0.1,1]},
        'name':'Logistic'
              },
    
    {
        'model': tree.DecisionTreeClassifier(),
        'params':{'criterion':['gini','entropy'],
        'max_depth':[None,3,10,25],},
        'name':'DecisionTree'
              },
]

In [4]:
def GridSearchModelling(X_train,y_train,X_test,y_test):
    for model in MODELS:
        gsmodel=GridSearchCV(estimator=model['model'],n_jobs=1,cv=10,param_grid=model['params'],verbose=1)
        gsmodel.fit(X_train,y_train)
        print(model['name'],' Score:',gsmodel.score(X_test,y_test))
        print(gsmodel.best_params_)

In [5]:
GridSearchModelling(X_train, y_train, X_test, y_test)

Fitting 10 folds for each of 4 candidates, totalling 40 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:   57.2s finished


SVC  Score: 0.7787114845938375
{'C': 0.1, 'kernel': 'linear'}
Fitting 10 folds for each of 4 candidates, totalling 40 fits
Logistic  Score: 0.7955182072829131
{'C': 1}
Fitting 10 folds for each of 8 candidates, totalling 80 fits


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    0.1s finished


DecisionTree  Score: 0.8095238095238095
{'criterion': 'gini', 'max_depth': 3}


[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    0.3s finished
