# Importing Libraries

In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Sklearn
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Part a

In [3]:
df_titanic = pd.read_csv('/content/drive/MyDrive/Data Mining/HW2/titanic.csv')
df_titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [4]:
df_titanic['Age'].fillna(df_titanic['Age'].median(), inplace=True)
df_titanic['Embarked'].fillna(df_titanic['Embarked'].mode().iloc[0], inplace=True)
df_titanic.drop('Cabin', axis=1, inplace=True)

In [5]:
df_titanic['FamilySize'] = df_titanic['SibSp'] + df_titanic['Parch'] + 1
df_titanic.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Embarked,FamilySize
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,S,2
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C,2
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,S,1
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,S,2
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,S,1


In [6]:
df_titanic.loc[df_titanic['Age']<=10 , 'Sex'] = 'child'

In [7]:

df_titanic.drop(['Parch', 'SibSp', 'Ticket', 'Name', 'PassengerId'], axis=1, inplace=True)

In [8]:
df_titanic = pd.get_dummies(df_titanic, columns = ['Embarked', 'Sex'])
df_titanic.head()

Unnamed: 0,Survived,Pclass,Age,Fare,FamilySize,Embarked_C,Embarked_Q,Embarked_S,Sex_child,Sex_female,Sex_male
0,0,3,22.0,7.25,2,0,0,1,0,0,1
1,1,1,38.0,71.2833,2,1,0,0,0,1,0
2,1,3,26.0,7.925,1,0,0,1,0,1,0
3,1,1,35.0,53.1,2,0,0,1,0,1,0
4,0,3,35.0,8.05,1,0,0,1,0,0,1


In [9]:
X = df_titanic.loc[:, df_titanic.columns!='Survived']
y = df_titanic.loc[:, 'Survived']

In [10]:
X.shape, y.shape

((891, 10), (891,))

# Part b

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((623, 10), (268, 10), (623,), (268,))

# Part c

In [11]:
mlp_clf = MLPClassifier()
mlp_clf.fit(X_train, y_train)
y_pred = mlp_clf.predict(X_test)
print(f'Test Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'Train Accuracy: {mlp_clf.score(X_train, y_train)}')

Test Accuracy: 0.8022388059701493
Train Accuracy: 0.8346709470304976


# Part d

In [12]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
mlp_clf = MLPClassifier()
mlp_clf.fit(X_train, y_train)
y_pred = mlp_clf.predict(X_test)
print(f'Test Accuracy: {accuracy_score(y_test, y_pred)}')
print(f'Train Accuracy: {mlp_clf.score(X_train, y_train)}')

Test Accuracy: 0.8208955223880597
Train Accuracy: 0.8459069020866774




2 % increased test accuracy and almost 1 % in train accuracy

# Part e

In [1]:
main_arr = np.arange(100, 1000)

arr1 = [(a, 1) for a in main_arr]
arr2 = [(a, b) for a in main_arr for b in main_arr]
arr3 = [(a, b, c) for a in main_arr for b in main_arr for c in main_arr]

res = np.concatenate((arr1, arr2, arr3), axis=0)


NameError: ignored

In [None]:
hyper_paramater_dict = {
    'solver': ['adam', 'sgd'],
    'learning_rate_init': [1e-2 , 1e-3, 1e-4, 1e-5],
    'hidden_layer_sizes': ,
    'activation': ['identity','tanh', 'relu'],
}

In [None]:
grid = GridSearchCV(mlp_clf, param_grid=hyper_paramater_dict, verbose=1)
grid.fit(X_train, y_train)

In [None]:
print('Train Accuracy : %.3f'%grid.best_estimator_.score(X_train, y_train))
print('Test Accuracy : %.3f'%grid.best_estimator_.score(X_test, y_test))
print('Best Accuracy Through Grid Search : %.3f'%grid.best_score_)
print('Best Parameters : ',grid.best_params_)

# Part f