In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
import pandas as pd
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
import numpy as np

In [0]:
def processData(data):
  copy = data.drop(['Sex'], axis=1)
  copy.fillna(0, inplace=True) #Replacing all missing data with 0
  copy['Cabin'][copy['Cabin'] != 0] = 1
  xEncoder = preprocessing.OneHotEncoder(handle_unknown='ignore')
  x1 = data[['Sex']]
  xEncoder.fit(x1)
  x1 = xEncoder.transform(x1).toarray()
  df = pd.DataFrame(data=x1, columns=['Male', 'Female'])
  output = pd.concat([copy, df], axis=1)
  output=(output-output.min())/(output.max()-output.min()) #MinMax normalization
  #print('After normalization')
  #print(output.head())
  return output

In [0]:
df = pd.read_csv('/content/drive/My Drive/Dataset/Titanic/train.csv', header=0)
train, test = train_test_split(df, test_size=0.2)
train.reset_index(inplace=True, drop=True)
test.reset_index(inplace=True, drop=True)
trainX = train.drop(['Survived', 'PassengerId', 'Name', 'Ticket', 'Embarked'], axis=1)
testX = test.drop(['Survived', 'PassengerId', 'Name', 'Ticket', 'Embarked'], axis=1)
trainY = train[['Survived']]
testY = test[['Survived']]
trainX = processData(trainX)
testX = processData(testX)

In [33]:
singleNN = MLPClassifier(solver='sgd',learning_rate='adaptive', random_state=42)
singleNN.fit(trainX, trainY.values.ravel())
singleNN.score(testX, testY)



0.7653631284916201

In [43]:
hyperPara = {'solver':['adam', 'sgd'],'hidden_layer_sizes':[100, 150, 200], 'learning_rate':['adaptive', 'constant'], 'learning_rate_init':np.linspace(0.01, 0.1, 6)}
gsNN = GridSearchCV(MLPClassifier(solver='sgd', max_iter=1000, random_state=42), hyperPara)
gsNN.fit(trainX, trainY.values.ravel())



GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=MLPClassifier(activation='relu', alpha=0.0001,
                                     batch_size='auto', beta_1=0.9,
                                     beta_2=0.999, early_stopping=False,
                                     epsilon=1e-08, hidden_layer_sizes=(100,),
                                     learning_rate='constant',
                                     learning_rate_init=0.001, max_iter=1000,
                                     momentum=0.9, n_iter_no_change=10,
                                     nesterovs_momentum=True, power_t=0.5,
                                     rand...
                                     solver='sgd', tol=0.0001,
                                     validation_fraction=0.1, verbose=False,
                                     warm_start=False),
             iid='warn', n_jobs=None,
             param_grid={'hidden_layer_sizes': [100, 150, 200],
                      

In [44]:
print(gsNN.best_params_)
gsNN.score(testX, testY)

{'hidden_layer_sizes': 100, 'learning_rate': 'adaptive', 'learning_rate_init': 0.064, 'solver': 'sgd'}


0.8212290502793296