In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
df= pd.read_csv("gender_submission.csv")
train_df=pd.read_csv("train.csv")
test_df=pd.read_csv("test.csv")
alpha = 0.01 #learning rate

In [5]:
#We need to change the NaN value to 0
train_df=train_df.fillna(0)
test_df=test_df.fillna(0)

In [6]:
train_df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,0,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,0,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,0,S


In [7]:
test_df.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,0,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,0,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,0,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,0,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,0,S


In [9]:
#Certain Parameters requires the creation of dummy values
train_df_sex = pd.get_dummies(train_df['Sex'])
train_df_new = pd.concat([train_df, train_df_sex], axis=1)
train_df_new = train_df_new.drop('Sex', axis=1)

train_df_emb = pd.get_dummies(train_df_new['Embarked'])
train_df_new = pd.concat([train_df_new, train_df_emb], axis=1)
train_df_new = train_df_new.drop('Embarked', axis=1)

train_df_new = train_df_new.drop('Ticket', axis=1)
train_df_new = train_df_new.drop('Name', axis=1)
train_df_new = train_df_new.drop('Cabin', axis=1)
train_df_new = train_df_new.drop('PassengerId',axis=1)

In [10]:
#Normalizing the data set
fl = (train_df_new-train_df_new.mean())/(train_df_new.max()-train_df_new.min()) 
features = fl.drop('Survived', axis=1)
targets = train_df_new.iloc[:,0:1]

In [11]:
features=np.array(features)
targets=np.array(targets)
wts = np.zeros([11,1]) #weights

In [12]:
def sigmoid(features, weights):
    z = np.dot(features, weights)
    pred=1/(1+np.exp(-z))
    return pred

In [13]:
def propagate(features, targets, weights):
    N = len(targets)
    pred = sigmoid(features, weights)
    loss = targets*np.log(pred)+(1-targets)*np.log(1-pred)
    error=(-1/N)*loss.sum()
    return error

In [21]:
def update(features, targets, weights, alpha):
    N = len(features)
    pred = sigmoid(features, weights)
    grad = np.dot(features.T,  pred - targets)
    grad /= N
    grad *= alpha
    weights -= grad
    
    return weights

In [22]:
def train(features,targets, weights, lr, iters):
    cost_dict = []

    for i in range(iters):
        weights = update(features,targets, weights, lr)
        cost = propagate(features,targets, weights)
        cost_dict.append(cost)    
    return cost_dict,weights

In [23]:
error,wts = train(features,targets, wts, alpha, iters = 10000)

In [26]:
pred=sigmoid(features, wts)

In [27]:
print("train accuracy: {} %".format(100 - np.mean(np.abs(targets -pred)) * 100))

train accuracy: 67.77341055946778 %


In [31]:
test_df_sex = pd.get_dummies(test_df['Sex'])
test_df_new = pd.concat([test_df, test_df_sex], axis=1)
test_df_new = test_df_new.drop('Sex', axis=1)

test_df_emb = pd.get_dummies(test_df_new['Embarked'])
test_df_new = pd.concat([test_df_new, test_df_emb], axis=1)
test_df_new = test_df_new.drop('Embarked', axis=1)

test_df_new = test_df_new.drop('Ticket', axis=1)
test_df_new = test_df_new.drop('Name', axis=1)
test_df_new =test_df_new.drop('Cabin', axis=1)
test_df_new = test_df_new.drop('PassengerId',axis=1)
test_feat=test_df_new

emb_zeros = np.zeros([418,1])
test_feat['0']=emb_zeros
test_feat=test_feat[['Pclass','Age','SibSp','Parch','Fare','female','male','0','C','Q','S']]


test_targets=df.drop('PassengerId',axis=1)

In [32]:
fin = (test_feat-test_feat.mean())/(test_feat.max()-test_feat.min())

In [34]:
test_feat=np.array(test_feat)
test_targets=np.array(test_targets)

In [35]:
pred_test=sigmoid(test_feat, wts)

In [36]:
print("test accuracy: {} %".format(100 - np.mean(np.abs(pred_test - test_targets)) * 100))

test accuracy: 67.93857622601342 %
