# Titanic - Machine Learning from Disaster

In [280]:
import pandas as pd
import numpy as np

# Load data from files
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

In [281]:
train.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [282]:
test.head(3)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q


## Clean Data

In [283]:
# Function for cleaning data

def clean_data(data: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    
    # Drop unnecessary columns
    to_drop = ["PassengerId","Name","SibSp","Parch","Ticket","Fare","Cabin","Embarked"]
    data.drop(columns=to_drop, inplace=True)

    # Rephrase male and female into discrete values between 0 and 1
    data = data.replace("male", 0)
    data = data.replace("female", 1)

    # Calculate avg age to replace null values with
    avg_age = round(sum(data["Age"].dropna()) / len(data["Age"].dropna()))
    data["Age"] = data["Age"].fillna(avg_age)

    # Calculate max values for normalization of columns
    max_age = max(data["Age"])
    max_pclass = max(data["Pclass"])

    # Normalize values
    data["Pclass"] /= max_pclass
    data["Age"] /= max_age

    return data

In [284]:
# Clean both train and test data
ctrain = clean_data(train)
ctest = clean_data(test)

# Get labels
train_labels = ctrain.columns

In [285]:
import torch

# Put data onto tensors
train_tensor = torch.tensor(ctrain.to_numpy())
test_tensor = torch.tensor(ctest.to_numpy())

In [286]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(train_tensor, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_tensor, batch_size=32, shuffle=True)

In [287]:
# Load neural network
from torch import nn

# Select device (aka cpu/gpu)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [299]:
# Defining the neural network class

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential
        (
            nn.Linear(3, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, 2)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [302]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
)


In [307]:
X = torch.rand(1, 3, device=device)
logits = model(X)
#pred_probab = nn.Softmax(dim=1)(logits)
#y_pred = pred_probab.argmax(1)
#print(f"Predicted class: {y_pred}")

TypeError: torch.FloatTensor is not a Module subclass