In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('train.csv')

In [3]:
df['Age'].fillna(df['Age'].mean(), inplace = True)

In [4]:
df['age_bin'] = pd.cut(df['Age'], bins = 20)

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [6]:
df = df.apply(LabelEncoder().fit_transform)

In [7]:
Features = ['Pclass','Sex','age_bin','SibSp','Parch','Fare']

In [8]:
X = df[Features]
y = df['Survived']

In [9]:
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size = 0.2, random_state =3)

In [10]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [17]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier as knn
from sklearn import svm
from sklearn.model_selection import GridSearchCV

In [24]:
model_params = {
    'svm': {
        'model' : svm.SVC(gamma= 'auto'),
        'params': {
            'C': [1,10,20],
            'kernel': ['rbf', 'linear']
        } 
        
    },
    'knn' : {
        'model': knn(),
        'params': {
            'n_neighbors': [1,10,20]
            
        }
    },
    'logistic_regression':{
        'model': LogisticRegression(),
        'params':{
            'C': [1,10,20]
        }
    }
}

In [25]:
scores = []

for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'],mp['params'],cv=5, return_train_score = False)
    clf.fit(X_train, y_train)
    scores.append({
        'model': model_name,
        'best_score': clf.best_score_,
        'best_params' : clf.best_params_
    })

In [26]:
score = pd.DataFrame(scores, columns = ['model','best_score','best_params'])

In [27]:
score

Unnamed: 0,model,best_score,best_params
0,svm,0.83009,"{'C': 1, 'kernel': 'rbf'}"
1,knn,0.818861,{'n_neighbors': 20}
2,logistic_regression,0.802009,{'C': 10}


In [28]:
final_model = svm.SVC(gamma='auto', kernel = 'rbf', C = 1)

In [29]:
final_model.fit(X_train,y_train)

SVC(C=1, gamma='auto')

In [30]:
final_model.score(X_test, y_test)

0.7988826815642458

In [31]:
df_test = pd.read_csv('test.csv')
df_test['Age'].fillna(df_test['Age'].mean(), inplace = True)
df_test['age_bin'] = pd.cut(df_test['Age'], bins = 20)
df_test = df_test.apply(LabelEncoder().fit_transform)
Features = ['Pclass','Sex','age_bin','SibSp','Parch','Fare']
Xtest = df_test[Features]
Xtest = scaler.transform(Xtest)
results = final_model.predict(Xtest)

In [34]:
df2 = pd.read_csv('test.csv')
df2['Survived'] = results
df2[['PassengerId', 'Survived']].to_csv('titanic_result_best_model.csv', index = False)