In [35]:

import pandas as pd
import numpy as np

def predict(theta,data):
    num = data.shape[0]
    col = data.shape[1]
    predicts = np.zeros(num)
    for i in range(num):
        temp = 0
        for j in range(col):
            temp += theta[j] * data[i,j]
        predicts[i] = 1/(1+np.exp(-temp))
        if(predicts[i] >= .5):
            predicts[i] = 1
        else:
            predicts[i] = 0
    return predicts

def gradient_descent(theta, data, answer, alpha = 0.1, n_iter = 50):
    num = data.shape[0]
    col = data.shape[1]
    for iteration in range(n_iter):
        # perform gradient descent
        grads = np.zeros(col)
        for i in range(num):
            for j in range(col):
                temp = 0 
                for k in range(col):
                    temp += ( theta[k] * data[i,k] ) 
                #grads[j] += (answer[i] - (1/(1+np.exp(-temp))))  * data[i,j] / num
                grads[j] += ((1/(1+np.exp(temp))) - answer[i])  * data[i,j] / num  
        theta = theta - alpha * grads
    return theta

def impute_age(cols):
    Age = cols[0]
    Pclass = cols[1]
    
    if pd.isnull(Age):

        if Pclass == 1:
            return 37

        elif Pclass == 2:
            return 29

        else:
            return 24

    else:
        return Age

train = pd.read_csv('titanic_train.csv')
train['Age'] = train[['Age','Pclass']].apply(impute_age,axis=1)
train.drop('Cabin',axis=1,inplace=True)
sex = pd.get_dummies(train['Sex'],drop_first=True)
embark = pd.get_dummies(train['Embarked'],drop_first=True)
train.drop(['Sex','Embarked','Name','Ticket'],axis=1,inplace=True)
train = pd.concat([train,sex,embark],axis=1)
train['intercept'] = 1
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(train.drop('Survived',axis=1),train['Survived'], test_size=0.20,random_state=101)
theta = np.zeros(X_train.shape[1])
X_train = X_train.to_numpy() 
X_test = X_test.to_numpy()
Y_train = Y_train.to_numpy() 
Y_test = Y_test.to_numpy()
model = gradient_descent(theta,X_train,Y_train)
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
print('----------------------------------------')
print('Training sets stats')
print(classification_report(Y_train,predict(model,X_train),zero_division=1))
print("Accuracy:", accuracy_score(Y_train, predict(model,X_train)))

print('----------------------------------------')
print('Test sets stats')
print(classification_report(Y_test,predict(model,X_test),zero_division=1))
print("Accuracy:", accuracy_score(Y_test, predict(model,X_test)))

----------------------------------------
Training sets stats
              precision    recall  f1-score   support

           0       0.63      1.00      0.77       450
           1       1.00      0.00      0.00       262

    accuracy                           0.63       712
   macro avg       0.82      0.50      0.39       712
weighted avg       0.77      0.63      0.49       712

Accuracy: 0.6320224719101124
----------------------------------------
Test sets stats
              precision    recall  f1-score   support

           0       0.55      1.00      0.71        99
           1       1.00      0.00      0.00        80

    accuracy                           0.55       179
   macro avg       0.78      0.50      0.36       179
weighted avg       0.75      0.55      0.39       179

Accuracy: 0.553072625698324


  predicts[i] = 1/(1+np.exp(-temp))
  predicts[i] = 1/(1+np.exp(-temp))
  predicts[i] = 1/(1+np.exp(-temp))
  predicts[i] = 1/(1+np.exp(-temp))
