In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# from google.colab import files
# files.upload()
# files

In [3]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [4]:
df = pd.read_csv("./train.csv")
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [5]:
df = df[["Pclass", "Sex", "Age", "Fare", "Survived"]]
df.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,male,22.0,7.25,0
1,1,female,38.0,71.2833,1
2,3,female,26.0,7.925,1
3,1,female,35.0,53.1,1
4,3,male,35.0,8.05,0


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Pclass    891 non-null    int64  
 1   Sex       891 non-null    object 
 2   Age       714 non-null    float64
 3   Fare      891 non-null    float64
 4   Survived  891 non-null    int64  
dtypes: float64(2), int64(2), object(1)
memory usage: 34.9+ KB


In [7]:
mean_age = df["Age"].mean()
df.fillna(mean_age, axis=1, inplace=True)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Pclass    891 non-null    int64  
 1   Sex       891 non-null    object 
 2   Age       891 non-null    float64
 3   Fare      891 non-null    float64
 4   Survived  891 non-null    int64  
dtypes: float64(2), int64(2), object(1)
memory usage: 34.9+ KB


In [8]:
df["Sex"] = df["Sex"].map({"male":0, "female":1})
df.head()

Unnamed: 0,Pclass,Sex,Age,Fare,Survived
0,3,0,22.0,7.25,0
1,1,1,38.0,71.2833,1
2,3,1,26.0,7.925,1
3,1,1,35.0,53.1,1
4,3,0,35.0,8.05,0


In [9]:
x = df[["Pclass", "Sex", "Age", "Fare"]].values
y = df["Survived"].values

In [10]:
sc = StandardScaler()
x = sc.fit_transform(x)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [12]:
print(type(X_train))

<class 'numpy.ndarray'>


In [13]:
X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

In [14]:
y_train = y_train.view(-1, 1)
y_test = y_test.view(-1, 1)

In [15]:
_ , input_features = X_train.shape
_, output_features = y_test.shape

print(input_features, output_features)

4 1


In [16]:
class Model(nn.Module):

    def __init__(self, in_dim, out_dim):
        super(Model, self).__init__()
        self.linear = nn.Linear(in_dim, out_dim)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

In [17]:
model = Model(input_features, output_features)
model.to(device)

Model(
  (linear): Linear(in_features=4, out_features=1, bias=True)
)

In [18]:
learning_rate = 0.01
iters = 10000

criterion = nn.BCELoss() #Binary cross entropy because its a binary classifier
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

In [19]:
for epoch in range(iters):
    y_pred = model(X_train).to(device)
    loss = criterion(y_pred, y_train)

    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print("Epoch: {}, Loss:{}".format(epoch, loss))

Epoch: 0, Loss:0.5751470327377319
Epoch: 10, Loss:0.5699382424354553
Epoch: 20, Loss:0.5649990439414978
Epoch: 30, Loss:0.5603129267692566
Epoch: 40, Loss:0.5558644533157349
Epoch: 50, Loss:0.5516390800476074
Epoch: 60, Loss:0.5476235151290894
Epoch: 70, Loss:0.5438052415847778
Epoch: 80, Loss:0.5401725172996521
Epoch: 90, Loss:0.5367145538330078
Epoch: 100, Loss:0.5334210991859436
Epoch: 110, Loss:0.5302828550338745
Epoch: 120, Loss:0.5272907614707947
Epoch: 130, Loss:0.5244365930557251
Epoch: 140, Loss:0.5217128396034241
Epoch: 150, Loss:0.5191120505332947
Epoch: 160, Loss:0.5166275501251221
Epoch: 170, Loss:0.5142530202865601
Epoch: 180, Loss:0.5119824409484863
Epoch: 190, Loss:0.5098102688789368
Epoch: 200, Loss:0.5077314376831055
Epoch: 210, Loss:0.5057408213615417
Epoch: 220, Loss:0.503834068775177
Epoch: 230, Loss:0.5020066499710083
Epoch: 240, Loss:0.5002546310424805
Epoch: 250, Loss:0.49857404828071594
Epoch: 260, Loss:0.4969615340232849
Epoch: 270, Loss:0.49541357159614563
Ep

In [23]:
zeros = 0
ones = 0
for i in model(X_test).round():
    if i == 0:
        zeros += 1
    else:
        ones += 1

print("Dead: {}, Alive: {}".format(zeros, ones))

Dead: 119, Alive: 60
