In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def relu(z):
    return np.max(0, z)

def getThetaFromGD(Xf, yf, n_iter=400, alpha=1, reg=0):
    Xf_t = Xf.transpose()
    m, n = Xf.shape
    theta = np.random.rand(n)
    for i in range(n_iter):
        h = sigmoid(Xf @ theta)
        d_theta = Xf_t @ (h - yf)
        d_theta[1:] += reg * theta[1:]
        d_theta /= m_train
        theta -= d_theta * alpha
    return theta

def getCostFonShanon(Xf, yf, reg=0):
    h_train = sigmoid(Xf_train @ theta)
    J = - yf @ np.log(h_train) - (1 - yf) @ (np.log(1 - h_train))

In [4]:
# data polishing
df = pd.read_csv('data/train.csv')
print(df.shape)
for c in df.columns:
    print(c, (df.loc[:,c].isna()).sum())
df = df.drop(columns=["Cabin"])
df = df.dropna()
#features= ['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked']
features= ['Pclass','Sex','Age','Embarked']
print(df.shape)
# creating input and output dataframes
yf = df.loc[:,'Survived']
Xf = df.loc[:,features]

(891, 12)
PassengerId 0
Survived 0
Pclass 0
Name 0
Sex 0
Age 177
SibSp 0
Parch 0
Ticket 0
Fare 0
Cabin 687
Embarked 2
(712, 11)


In [5]:
# mapping strings to numbers and normalizing
Xf.loc[:,'Sex'] = Xf.loc[:,'Sex'].map({'male': 0, 'female': 1})
Xf.loc[:,'Embarked'] = Xf.loc[:,'Embarked'].map({'C': 0, 'S': 1, 'Q': 2})
Xf_mean, Xf_std = Xf.mean(), Xf.std()
Xf = (Xf - Xf_mean) / Xf_std
Xf.insert(0, "cst", 1.)
# sample sizes
m, n = Xf.shape
m_test = m - (m // 4 * 3)
yf_test = yf.loc[m // 4 * 3:]
Xf_test = Xf.loc[m // 4 * 3:,:]

In [None]:
## gradient descent
n_iter = 400
alpha = 1
reg = 0.0
m_train_arr = np.arange(100, m // 4 * 3, 30)
J_train_arr = []
J_test_arr = []
for i_m_train in range(len(m_train_arr)):
    m_train = m_train_arr[i_m_train]
    print(m_train)
    yf_train = yf.loc[:m_train]
    Xf_train = Xf.loc[:m_train,:]
    theta = getThetaFromGD(Xf_train, yf_train, n_iter, alpha, reg)
    # cost function on the training examples
    h_train = sigmoid(Xf_train @ theta)
    J_train = - yf_train @ np.log(h_train) - (1 - yf_train) @ (np.log(1 - h_train))
    J_train += reg * (theta[1:] ** 2).sum() / 2
    J_train /= m_train
    J_train_arr.append(J_train)
    # cost function on the test examples
    h_test = sigmoid(Xf_test @ theta)
    J_test = - (yf_test @ np.log(h_test) + (1 - yf_test) @ (np.log(1 - h_test))) / m_test
    J_test_arr.append(J_test)
    

In [None]:
%matplotlib qt5
fig, fax = plt.subplots()
fax.plot(m_train_arr, J_train_arr, label="train")
fax.plot(m_train_arr, J_test_arr, label="test")
plt.legend()
plt.show()