In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn.functional as F

In [7]:
def preprocess_data(data_path):
    data = pd.read_csv(data_path)
    features = data.columns.to_list()
    features.remove('PassengerId')
    features.remove('Name')
    features.remove('Ticket')
    features.remove('Embarked')
    features.remove('Parch')
    features.remove('Cabin')
    features.remove('Pclass')
    features.remove('Survived')
    x_data = data[features]
    y_data = data['Survived']
    x_data['Sex'] = x_data['Sex'].apply(lambda x:0 if x=='male' else 1)
    x_data['Age'].fillna(x_data['Age'].mean(),inplace=True)
    x_data['Fare'].fillna(x_data['Fare'].median(),inplace=True)
    x_data = torch.tensor(x_data.to_numpy(),dtype=torch.float32) ## Computation on top of this array will be executed on GPU
    y_data = torch.tensor(y_data.to_numpy(),dtype=torch.float32)
    return x_data.T,y_data

In [8]:
x_data,y_data = preprocess_data('tested.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_data['Sex'] = x_data['Sex'].apply(lambda x:0 if x=='male' else 1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_data['Age'].fillna(x_data['Age'].mean(),inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_data['Fare'].fillna(x_data['Fare'].median(),inplace=True)


In [24]:
x_data.shape,y_data.shape

(torch.Size([4, 418]), torch.Size([418]))

In [66]:
def get_params(n,l1):
    W1 = torch.randn((n,l1),dtype=torch.float32,requires_grad = True)
    b1 = torch.randn((l1,),dtype=torch.float32,requires_grad = True)
    W2 = torch.randn((l1,),dtype=torch.float32,requires_grad = True)
    b2 = torch.randn((1,),dtype=torch.float32,requires_grad=True)
    return W1,b1,W2,b2

In [67]:
def forward(X,Y,W1,b1,W2,b2):
    Z1 = W1.T @ x_data 
    A1 = F.relu(Z1)
    Z2 = W2.T @ A1 
    A2 = F.sigmoid(Z2)
    loss = F.binary_cross_entropy(A2,y_data)
    return loss

In [75]:
def backward(loss,W1,b1,W2,b2):
    W1.grad = None
    W2.grad = None
    loss.backward()
    W1.data -= (0.1) * (W1.grad)
    W2.data -= (0.1) * (W2.grad)
    return W1,W2

In [76]:
def train(X,Y):
    W1,b1,W2,b2 = get_params(4,4)
    for iteration in range(100):
        loss = forward(X,Y,W1,b1,W2,b2)
        W1,W2 = backward(loss,W1,b1,W2,b2)
        print(f"Loss after {iteration}: {loss}")

In [77]:
train(x_data,y_data)

Loss after 0: 17.83586311340332
Loss after 1: 36.58770751953125
Loss after 2: 5.150224685668945
Loss after 3: 2.788870334625244
Loss after 4: 6.128706455230713
Loss after 5: 3.840625762939453
Loss after 6: 1.189003586769104
Loss after 7: 1.7929822206497192
Loss after 8: 0.792607843875885
Loss after 9: 0.7422385215759277
Loss after 10: 0.7739688754081726
Loss after 11: 0.664193868637085
Loss after 12: 0.6497581005096436
Loss after 13: 0.6462661623954773
Loss after 14: 0.6438620686531067
Loss after 15: 0.6423622965812683
Loss after 16: 0.6415045857429504
Loss after 17: 0.6408703327178955
Loss after 18: 0.6403250098228455
Loss after 19: 0.6398763656616211
Loss after 20: 0.6393342018127441
Loss after 21: 0.6389802694320679
Loss after 22: 0.638615071773529
Loss after 23: 0.6383011341094971
Loss after 24: 0.6380271911621094
Loss after 25: 0.6376669406890869
Loss after 26: 0.6372765302658081
Loss after 27: 0.6367805004119873
Loss after 28: 0.6361074447631836
Loss after 29: 0.6354281306266785
