## Pre-processing Data

In [1]:
import pandas as pd
import csv
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score

def age_model_producer(dataset):
    X = dataset.dropna()
    y = X["Age"]
    x = X.drop(["Age"] , axis = 1 )
    
    
    model = MLPRegressor(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.05, hidden_layer_sizes = (15, 10), max_iter = 1500)
    model.fit(x , y)
    return model
    
def handle_missing_numerical_mean(dataset, column_name):
    
    series = dataset[column_name]
    temp_series = series.dropna()
    temp_array = np.array(temp_series)
    
    mean = temp_array.mean()
    temp_series = series.fillna(mean)
    return (temp_series)

def handle_missing_age(dataset, method = "mean"):
    
    if method == "mean":
        return handle_missing_numerical_mean(dataset, "Age")
    else:
        model = age_model_producer(dataset)
        
        for i in range(dataset.shape[0]):
            if np.isnan(dataset.loc[[i]]["Age"]).values[0]:
                dataset.loc[[i]] = dataset.loc[[i]].fillna(value = model.predict(dataset.loc[[i]].drop(["Age"], axis = 1))[0])
                
        return dataset["Age"]

dataset = pd.read_csv("train.csv")

X = dataset.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis = 1)

dummies_sex = pd.get_dummies(X["Sex"])
X["Pclass"] = X["Pclass"].replace(to_replace = [1, 2, 3], value = ["1_pclass", "2_pclass", "3_pclass" ])
dummies_pclass = pd.get_dummies(X["Pclass"])
dummies_embarked = pd.get_dummies(X["Embarked"])


nl = MinMaxScaler()
X[[ "Fare", "SibSp", "Parch"]] = nl.fit_transform(X[[ "Fare" , "SibSp", "Parch"]])


x = X[["Age", "Fare", "SibSp", "Parch"]]
x = pd.concat([x, dummies_sex, dummies_pclass, dummies_embarked], axis = 1)
x["Age"] = handle_missing_age(x, method = "predict")
#x["Age"] = handle_missing_age(X)
x[["Age"]] = nl.fit_transform(x[[ "Age"]])


y = pd.get_dummies(X["Survived"])

## Buliding model of neural network

In [2]:
from sklearn.neural_network import MLPClassifier

data_train, data_test, class_train, class_test = train_test_split(x, y, test_size=0.1)

print(data_train)
model = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.05, hidden_layer_sizes = (15, 10), max_iter = 1500)

          Age      Fare  SibSp     Parch  female  male  1_pclass  2_pclass  \
116  0.880623  0.015127  0.000  0.000000       0     1         0         0   
519  0.396833  0.015412  0.000  0.000000       0     1         0         0   
656  0.341296  0.015412  0.000  0.000000       0     1         0         0   
698  0.610455  0.216430  0.125  0.166667       0     1         1         0   
561  0.497361  0.015412  0.000  0.000000       0     1         0         0   
..        ...       ...    ...       ...     ...   ...       ...       ...   
697  0.240552  0.015094  0.000  0.000000       1     0         0         0   
229  0.192609  0.049708  0.375  0.166667       1     0         0         0   
367  0.192668  0.014110  0.000  0.000000       1     0         0         0   
26   0.306187  0.014102  0.000  0.000000       0     1         0         0   
278  0.082684  0.056848  0.500  0.166667       0     1         0         0   

     3_pclass  C  Q  S  
116         1  0  1  0  
519         1

## Training and Testing model

In [3]:
model.fit(data_train, class_train)
predict = model.predict(data_test)

print("Accuracy : ", accuracy_score(class_test, predict))
print("Mean Square Error : ", mean_squared_error(class_test, predict))

Accuracy :  0.8
Mean Square Error :  0.2


## Getting average accuracy and error using Cross validate

In [4]:
from sklearn.model_selection import cross_validate

CV = cross_validate(model, x, y, cv=10, scoring=['accuracy', 'neg_mean_squared_error'])

print('Average Accuracy = ', sum(CV['test_accuracy']) / len(CV['test_accuracy']))
print('Average MSE = ', sum(-1 * CV['test_neg_mean_squared_error']) / len(CV['test_neg_mean_squared_error']))

Average Accuracy =  0.7980649188514357
Average MSE =  0.20193508114856423
