In [None]:
import os 
import tarfile 
import pandas as pd

# load the dataset 
def load_titanic_data(titanic_path=os.path.join("datasets", "titanic")):
    csv_path = os.path.join(titanic_path, "train.csv")
    return pd.read_csv(csv_path)

In [None]:

titanic_train = pd.read_csv('../input/titanic/train.csv')

In [None]:
# using the head functions to get the fist five rows of the data
titanic_train.head()


In [None]:
# using the info function to know how large our dataset is 
# and checking for noiseness in our dataset
titanic_train.info()

In [None]:
titanic_train["Cabin"].value_counts()

In [None]:
titanic_train.describe()

In [None]:
titanic_train["Embarked"].value_counts()

In [None]:
titanic_train["Sex"].value_counts()

In [None]:
titanic_train["Pclass"].value_counts()

In [None]:
#  attributes[sex, cabin, embarked, age, ]
# plottng the data using a histogram to show the number of instances on the y and x gven range on the 
import matplotlib.pyplot as plt
titanic_train.hist(bins=50, figsize=(20,15))
plt.show()

In [None]:
# logistic regression task = binary classifier
# what is the probability that object a survived the shipreck based on it properties[sex, age, pclass, cabin, 
#  seems our train data has some missing values in the age attribute, cabin attribute and the Embarked attribute
# we are gonna have to transform some of the attributes to text for our machine learning algos
# using scikit-learn LabelEncoder class 
from sklearn.linear_model import LogisticRegression



In [None]:
titanic_train.shape

In [None]:
# using pands scatter_matrix funcrion to check for correlation btn attributes
from pandas.plotting import scatter_matrix

attributes = ["Sex", "Age", "Cabin",
             "Survived", "Pclass"]
scatter_matrix(titanic_train[attributes], figsize=(12,8))

In [None]:
# looking at the most promising attributes that can help us predict
# Age , Sex
titanic_train.plot(kind="hist", x="Survived", y="Age", 
            alpha=0.5)

In [None]:
# visualisations for Pclass against survived 

titanic_train.plot(kind="hist", x="Pclass", y="Survived",  
            alpha=0.5)

In [None]:
# from the above output we need to transform some of the data that's of string type to num
# most machine learning algorithms work with data that's off num type . 
# attributes to transform : Sex, Pclass, Survived
# there are some attributes that we are going to drop since they dont help us e.g Name, Ticket, Fare, Parch, Sibsp
# droppping the unimportant attributes using the drop function or dropna
# converting some of the data to number formart using scikit learn's LabelEncoder class
# after which we will put the data in a pipeline and later feed it to machine learning algorithms

import numpy as np
np.random.seed(10)
import seaborn as sns
%matplotlib inline 
from sklearn import metrics
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

# thought of using a perceptron neural network
# multilayer perceptron

# so first lets convert some of the data that is in string formart
# defining a dictionary to binarize the sex

dict_sex = {
    'male': 0,
    'female': 1
}

# defining a dictionary to transform the 0, 1 values in survived attribute to num
dict_live = {
    0: 'perished', 
    1: 'survived'
}

# let's apply the dictionary using a lambda function 
titanic_train['Bsex'] = titanic_train['Sex'].apply(lambda x : dict_sex[x])

# now we havee a new attribute Bsex

# features are a 2 column matrix 
features = titanic_train[['Pclass', 'Bsex']].to_numpy()
labels = titanic_train['Survived'].to_numpy()




In [None]:
# Artificial Neural network
# we will define sigmoid and Relu activation functions

# sigmoid 
# Define the sigmoid activator; we ask if we want the sigmoid or its derivative
def sigmoid_act(x, der=False):
    import numpy as np
    
    if (der==True) : #derivative of the sigmoid
        f = 1/(1+ np.exp(- x))*(1-1/(1+ np.exp(- x)))
    else : # sigmoid
        f = 1/(1+ np.exp(- x))
    
    return f


# Rectifier Linear Unit (ReLU)
def ReLU_act(x, der=False):
    import numpy as np
    
    if (der == True): # the derivative of the ReLU is the Heaviside Theta
        f = np.heaviside(x, 1)
    else :
        f = np.maximum(x, 0)
    
    return f

In [None]:
titanic_train.shape


In [None]:
# splitting into train sets and test sets using sklearn 

from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.30)

print('Training records:',Y_train.size)
print('Test records:',Y_test.size)

In [None]:
# Our multilayer perceptron 
# eta is the learning rate
# p & q are our number of perceptrons 
def Multilayer_perceptron(X_train, Y_train, p=4, q=4, eta=0.0015):
    
    # 0: Random initialize the relevant data for our three layers
#     1st hidden layer

    w1 = 2*np.random.rand(p , X_train.shape[1]) - 0.5 
    b1 = np.random.rand(p)

    w2 = 2*np.random.rand(q , p) - 0.5  # Layer 2
    b2 = np.random.rand(q)

    wOut = 2*np.random.rand(q) - 0.5   # Output Layer
    bOut = np.random.rand(1)

    mu = []
    vec_y = []

    # Start looping over the passengers, i.e. over I.

    for I in range(0, X_train.shape[0]-1): #loop in all the passengers:
    
        # 1: input the data 
        x = X_train[I]
    
        # 2: Start the algorithm
    
        # 2.1: Feed forward
        z1 = ReLU_act(np.dot(w1, x) + b1) # output layer 1 
        z2 = ReLU_act(np.dot(w2, z1) + b2) # output layer 2
        y = sigmoid_act(np.dot(wOut, z2) + bOut) # Output of the Output layer
    
        #2.2: Compute the output layer's error
        delta_Out = 2 * (y-Y_train[I]) * sigmoid_act(y, der=True)
    
        #2.3: Backpropagate
        delta_2 = delta_Out * wOut * ReLU_act(z2, der=True) # Second Layer Error
        delta_1 = np.dot(delta_2, w2) * ReLU_act(z1, der=True) # First Layer Error
    
        # 3: Gradient descent 
        wOut = wOut - eta*delta_Out*z2  # Outer Layer
        bOut = bOut - eta*delta_Out
    
        w2 = w2 - eta*np.kron(delta_2, z1).reshape(q,p) # Hidden Layer 2
        b2 = b2 -  eta*delta_2
    
        w1 = w1 - eta*np.kron(delta_1, x).reshape(p, x.shape[0])
        b1 = b1 - eta*delta_1
    
        # 4. Computation of the loss function
        mu.append((y-Y_train[I])**2)
        vec_y.append(y)
    
    batch_loss = []
    for i in range(0, 10):
        loss_avg = 0
        for m in range(0, 60):
            loss_avg+=vec_y[60*i+m]/60
        batch_loss.append(loss_avg)
    
    
    plt.figure(figsize=(10,6))
    plt.scatter(np.arange(1, len(batch_loss)+1), batch_loss, alpha=1, s=10, label='error')
    plt.title('Averege Loss by epoch', fontsize=20)
    plt.xlabel('Epoch', fontsize=16)
    plt.ylabel('Loss', fontsize=16)
    plt.show()
    
    return w1, b1, w2, b2, wOut, bOut, mu


In [None]:
w1, b1, w2, b2, wOut, bOut, mu = Multilayer_perceptron(X_train, Y_train, p=8, q=4, eta=0.0015)

In [None]:
# computing predictions 
# using weights and biases to compute predictions 

def MLP_pred(X_test, w1, b1, w2, b2, wOut, bOut, mu):
    import numpy as np
    
    pred = []
    
    for I in range(0, X_test.shape[0]): #loop in all the passengers
        # 1: input the data 
        x = X_test[I]
        
        # 2.1: Feed forward
        z1 = ReLU_act(np.dot(w1, x) + b1) # output layer 1 
        z2 = ReLU_act(np.dot(w2, z1) + b2) # output layer 2
        y = sigmoid_act(np.dot(wOut, z2) + bOut)  # Output of the Output layer
        
        # Append the prediction;
        # We now need a binary classifier; we this apply an Heaviside Theta and we set to 0.5 the threshold
        # if y < 0.5 the output is zero, otherwise is 1
        pred.append( np.heaviside(y - 0.5, 1)[0] )
    
    
    return np.array(pred);


In [None]:
preds = MLP_pred(X_test, w1, b1, w2, b2, wOut, bOut, mu)

In [None]:
# visualizations 
# plotting the confusion matrix 

cm = confusion_matrix(Y_test, preds)

df_cm = pd.DataFrame(cm, index = [dict_live[i] for i in range(0,2)], columns = [dict_live[i] for i in range(0,2)])
plt.figure(figsize = (7,7))
sns.heatmap(df_cm, annot=True, cmap=plt.cm.Blues, fmt='g')
plt.xlabel("Predicted Class", fontsize=18)
plt.ylabel("True Class", fontsize=18)
plt.show()

In [None]:
# tesing our MLP on the test dataset 
# loading the test dataset 

titanic_test = pd.read_csv('../input/titanic/test.csv')

titanic_test.head()

In [None]:
# binarizing the sex attribute
# extracting the important attributes that we will be using 

titanic_test['Bsex'] = titanic_test['Sex'].apply(lambda x : dict_sex[x])


X = titanic_test[['Pclass', 'Bsex']].to_numpy()

test_preds = MLP_pred(X, w1, b1, w2, b2, wOut, bOut, mu)
test_preds

In [None]:
# exporting the predictions as csv

submission = pd.DataFrame({
        "PassengerId": titanic_test["PassengerId"],
        "Survived": test_preds
    })

submission.head(5)

# Export it in a 'Comma Separated Values' (CSV) file
import os
os.chdir(r'../working')
submission.to_csv(r'submission.csv', index=False)
# Creating a link to download the .csv file we created
from IPython.display import FileLink
FileLink(r'submission.csv')

In [None]:
submitted_data = pd.read_csv('../working/submission.csv')
submitted_data.head()

In [None]:
submitted_data["Survived"].value_counts()


In [None]:
# visualisations for Pclass against survived 

submitted_data.plot(kind="hist", y="Survived",  
            alpha=0.5)

In [None]:
# From the above output our perceptron predicted that out of the 410 sample people from the titanic
# only 80 of the survived while the rest 338 perished 