### 0. Import all of the packages

In [9]:
import torch
from torch import nn as nn
from torch import tensor
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


### 1. Importing Data

In [10]:
data = pd.read_csv('Titanic-Dataset.csv')

### 2. Dropping Data that do not have weight / contribute to their survival

In [11]:
#We are dropping columns that do not have an age (or that is also NaN)
data.dropna(subset=['Age'], inplace=True)

#how do we change the sex to be numbers? 
#males will be zero
#females will be 1
data.loc[data["Sex"] == "male", "Sex"] = 0
data.loc[data["Sex"] == "female", "Sex"] = 1

#drop name, embarked, cabin, ticket, fare
features = data.drop(['Embarked', 'Cabin', 'Ticket', 'Name', 'Fare', 'PassengerId', 'Survived'], axis=1)

labels = data['Survived']

print(features)

print(labels)

     Pclass Sex   Age  SibSp  Parch
0         3   0  22.0      1      0
1         1   1  38.0      1      0
2         3   1  26.0      0      0
3         1   1  35.0      1      0
4         3   0  35.0      0      0
..      ...  ..   ...    ...    ...
885       3   1  39.0      0      5
886       2   0  27.0      0      0
887       1   1  19.0      0      0
889       1   0  26.0      0      0
890       3   0  32.0      0      0

[714 rows x 5 columns]
0      0
1      1
2      1
3      1
4      0
      ..
885    0
886    0
887    1
889    1
890    0
Name: Survived, Length: 714, dtype: int64


### 3. Standardizing and Scaling our Data

In [12]:
scaler = StandardScaler() #creating instance of a scale
scaled_features = scaler.fit_transform(features) #making our data be standardize so each feature has the same weight, the machine will find which one on their own 

### 3. Manipulate the Data

In [13]:
from torch.utils.data import DataLoader, TensorDataset

training_features, testing_features, training_labels, testing_labels = train_test_split(scaled_features, labels, test_size=0.2, random_state=42)



training_features = torch.tensor(training_features, dtype=torch.float32) 
testing_features = torch.tensor(testing_features, dtype=torch.float32)

training_labels = torch.tensor(training_labels.values, dtype=torch.float32)
testing_labels = torch.tensor(testing_labels.values, dtype=torch.float32)

training_dataset = TensorDataset(training_features, training_labels)
testing_dataset = TensorDataset(testing_features, testing_labels)

training_loader = DataLoader(training_dataset, batch_size=32, shuffle=True)
testing_loader = DataLoader(testing_dataset, batch_size=32, shuffle=False)


### 4. Creating Model

In [14]:
class BinaryModel(nn.Module):
    def __init__(self):
        super(BinaryModel, self).__init__() #calling binarymodel inside to utilize nn.module attributes inside our intialization
        self.linear1 = nn.Linear(5,15) #we have 5 features, but we want 15 neurons
        self.linear2 = nn.Linear(15, 10)
        self.linear3 = nn.Linear(10, 5)
        self.linear4 = nn.Linear(5, 1)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        x = self.linear4(x)
        x = self.activation(x)
        return x
    
model = BinaryModel()


##use relu and drop to enhance model optimize 

### 5. Testing Our Model

In [15]:
from torch import optim
criterion = nn.BCELoss()

optimizer = optim.Adam(model.parameters(), lr=0.01)

model.train()

num_of_epochs = 200
for epoch in range(num_of_epochs):
  for features, labels in training_loader:
    optimizer.zero_grad()
    output = model(features)
    loss = criterion(output, labels.view(-1,1))
    loss.backward()
    optimizer.step()
  if epoch%10 == 0:
    print(f'Epoch {epoch+1}/{num_of_epochs}, Loss: {loss.item()}')

Epoch 1/200, Loss: 0.304869681596756
Epoch 11/200, Loss: 0.40848469734191895
Epoch 21/200, Loss: 0.32631853222846985
Epoch 31/200, Loss: 0.407760351896286
Epoch 41/200, Loss: 0.5543063282966614
Epoch 51/200, Loss: 0.44769856333732605
Epoch 61/200, Loss: 0.3819611072540283
Epoch 71/200, Loss: 0.20524530112743378
Epoch 81/200, Loss: 0.44059765338897705
Epoch 91/200, Loss: 0.42041969299316406
Epoch 101/200, Loss: 0.36332982778549194
Epoch 111/200, Loss: 0.4181223213672638
Epoch 121/200, Loss: 0.3568379878997803
Epoch 131/200, Loss: 0.40223637223243713
Epoch 141/200, Loss: 0.4422937333583832
Epoch 151/200, Loss: 0.3670613467693329
Epoch 161/200, Loss: 0.47777220606803894
Epoch 171/200, Loss: 0.40914005041122437
Epoch 181/200, Loss: 0.36370551586151123
Epoch 191/200, Loss: 0.45452025532722473


In [16]:
from torchmetrics import Accuracy

accuracy = Accuracy(task='binary')
model.eval()
with torch.no_grad():
  for features, labels in testing_loader:
    output = model(features)
    prediction = output.round()
    accuracy.update(prediction, labels.view(-1,1))
model.train()

print(f"Accuracy: {accuracy.compute().item()}")


#how to check each weight for each feature, like which one contributes the most to their survival?
print(list(model. parameters())) 

Accuracy: 0.748251736164093
[Parameter containing:
tensor([[ 6.1213e-01, -6.5545e-01,  4.6576e-01,  2.9365e-01,  1.3392e-01],
        [ 3.4480e-01, -5.8022e-01,  2.0472e-01, -2.5705e-04,  1.5655e-01],
        [-7.5896e-01,  9.4351e-01, -6.0306e-01, -3.5202e-01, -1.0990e-01],
        [-6.4125e-01,  5.2991e-01, -3.5948e-01, -1.9669e-01,  1.4099e-01],
        [ 2.5566e-03,  1.7320e-03,  2.8370e-02,  1.1704e-02,  1.9667e-02],
        [ 3.1754e-02, -2.3145e-02,  2.8974e-02,  3.1359e-02,  1.9040e-03],
        [-4.1112e-02,  4.4850e-02, -2.8536e-02, -2.6841e-02, -1.0189e-02],
        [ 7.4780e-01, -9.1012e-01,  1.5922e-01, -1.0619e-03, -5.6891e-02],
        [-3.5006e-01,  6.1307e-01, -1.7893e-01,  3.6687e-02,  3.5244e-01],
        [-1.5763e-02,  2.0745e-02, -1.4287e-03,  1.7530e-02, -2.7208e-02],
        [ 3.6185e-01, -5.8620e-01,  2.0445e-01,  4.3428e-01, -7.1609e-02],
        [-2.8920e-03,  5.2711e-03, -7.1089e-03, -1.1370e-02,  7.5859e-03],
        [ 6.1928e-01, -4.0504e-01,  5.4761e-01,  