# Titanic - Machine Learning from Disaster

### Load data

In [137]:
import pandas as pd
import numpy as np

# Load data from files
test_X = pd.read_csv('test.csv')
train = pd.read_csv('train.csv')

# Split data into X:parameters and y:output
train_y = train["Survived"]
train_X = train.drop(columns="Survived")

### Clean data

In [138]:
# Function for cleaning data

def clean_data(data: pd.core.frame.DataFrame) -> pd.core.frame.DataFrame:
    
    # Drop unnecessary columns
    to_drop = ["PassengerId","Name","SibSp","Parch","Ticket","Fare","Cabin","Embarked"]
    data.drop(columns=to_drop, inplace=True)

    # Rephrase male and female into discrete values between 0 and 1
    data = data.replace("male", 0)
    data = data.replace("female", 1)

    # Calculate avg age to replace null values with
    avg_age = round(sum(data["Age"].dropna()) / len(data["Age"].dropna()))
    data["Age"] = data["Age"].fillna(avg_age)

    # Calculate max values for normalization of columns
    max_age = max(data["Age"])
    max_pclass = max(data["Pclass"])

    # Normalize values
    data["Pclass"] /= max_pclass
    data["Age"] /= max_age

    return data

# Clean both train and test data
ctrain = clean_data(train_X)
ctest = clean_data(test_X)

# Get labels
train_labels = ctrain.columns

In [139]:
import torch

# Set seed for torch
torch.manual_seed(42)

# Put data onto tensors
train_X_tensor = torch.tensor(ctrain.to_numpy())
train_y_tensor = torch.tensor(train_y.to_numpy())
test_X_tensor = torch.tensor(ctest.to_numpy())

# Split data into training- and validation sets
train_split = int(0.8 * len(train_X_tensor))
X_train, y_train = train_X_tensor[:train_split], train_y_tensor[:train_split]
X_valid, y_valid = train_X_tensor[train_split:], train_y_tensor[train_split:]

In [140]:
#from torch.utils.data import DataLoader

#train_dataloader = DataLoader(train_tensor, batch_size=32, shuffle=True)
#test_dataloader = DataLoader(test_tensor, batch_size=32, shuffle=True)

### Create model

In [141]:
# Load neural network
from torch import nn

# Select device (aka cpu/gpu)
device = "cuda" if torch.cuda.is_available() else "cpu"
#print(f"Using {device} device")

# Defining the neural network class
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        #self.flatten = nn.Flatten()

        self.layer_1 = nn.Linear(in_features=3, out_features=16)
        self.layer_2 = nn.Linear(in_features=16, out_features=16)
        self.layer_3 = nn.Linear(in_features=16, out_features=1)

        """
        self.linear_relu_stack = nn.Sequential
        (
            nn.Linear(3, 16),
            nn.ReLU(),
            nn.Linear(16, 16),
            nn.ReLU(),
            nn.Linear(16, 1)
        )
        """
    
    def forward(self, x):
        layer1 = self.layer_1(x)
        layer2 = self.layer_2(layer1)
        layer3 = self.layer_3(layer2)
        output = 0 if layer3 < 0.5 else 1
        return output

model = NeuralNetwork().to(device)

### Train model

In [142]:
# Set loss function
loss_fn = nn.BCEWithLogitsLoss()

# Optimizer
optim = torch.optim.SGD(params=model.parameters(), lr=0.1)

# Define accuracy function
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_true)) * 100
    return acc

# Amount of training epochs to run
epochs = 10

In [148]:
y_preds = model(X_valid.float())

#accuracy_fn(y_valid, y_preds)


RuntimeError: Boolean value of Tensor with more than one value is ambiguous