In [124]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import csv
import numpy as np
import torch_directml
from tqdm import tqdm

dml = torch_directml.device()

In [125]:
# datasets

class TitanicSurvivalDataset(Dataset):
    def __init__(self, data_tensor: torch.Tensor, target_tensor: torch.Tensor):
        self.data = data_tensor
        self.target = target_tensor

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data.data[index], self.target.data[index]

In [126]:
# functions for converting data
MAXLENGTH = 1000

def add_padding(list_item, max=500):
    result = list_item
    while len(result) < max:
        result.append(0)
    return result

def list_string_to_ascii(train_list):
    result = []
    for train in train_list:
        # print(train)
        temp = []
        for i, item in enumerate(train):
            current_item = []
            # print(i, item)
            if (i != 2):
                for char in item:
                    current_item.append(ord(char))
                
                current_item = add_padding(current_item, 40)
            else:
                for char in item:
                    current_item.append(ord(char))
                current_item = add_padding(current_item, 250)
            current_item.append(-1)
            temp = temp + current_item
        add_padding(temp, MAXLENGTH)
        result.append(temp)
    return result

In [127]:
class TitanicSurvivalDatasetTest(Dataset):
    def __init__(self, test_tensor: torch.Tensor):
        self.data = test_tensor

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        return self.data.data[index]

In [128]:
# reading and converting file contents into tensor

def read_csv(csv_file):
    temp = list(csv.reader(csv_file, delimiter=","))
    del temp[0]
    return temp

def remove_id(list_file):
    result = []
    for item in list_file:
        result.append(int(item[1]))
    return result

In [129]:


test_file = open("test.csv", "r")
target_file = open("gender_submission.csv", "r")

test_list_initial = read_csv(test_file)
target_list_initial = read_csv(target_file)


test_list = test_list_initial
target_list = remove_id(target_list_initial)


test_ascii = list_string_to_ascii(test_list)

test_tensor = torch.FloatTensor(test_ascii)

test_dataset = TitanicSurvivalDatasetTest(test_tensor)


# test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

In [130]:
class SurvivedModel(nn.Module):
    def __init__(self, MAXLENGTH=1000):
        super(SurvivedModel, self).__init__()
        self.layer1 = nn.Linear(MAXLENGTH, 48)
        self.layer2 = nn.Linear(48, 1)
        self.sigmoid = nn.Sigmoid()
        pass

    def forward(self, x):
        x = self.sigmoid(self.layer1(x))
        x = self.layer2(x)
        x = self.sigmoid(x)
        return x

In [131]:
path = "survived_model.pt"

survived_model: SurvivedModel = torch.load(path)

def binary_output(output, minimum_threshold = 0.5):
    return 1 if (output>minimum_threshold) else 0

def check_accuracy(list1, list2):
    total = len(list1)
    correct = 0
    for i in range(total):
        if (list1[i] == list2[i]):
            correct=correct+1
    return correct / total

result = []
def start_inference():
    survived_model.eval()
    for person in tqdm(test_dataset, desc="Test data"):
        person = person
        output: torch.Tensor = survived_model(person)
        result.append(binary_output(output.item(), 0.5))
    return result

start_inference()
print("Accuracy: ", check_accuracy(result, target_list))

Test data: 100%|██████████| 418/418 [00:00<00:00, 9606.73it/s]

Accuracy:  0.916267942583732





In [132]:
# save submission
file = open("jdrlaurian_submission.csv", 'w')

writer = csv.writer(file)

header = ["PassengerId", "Survived"]

writer.writerow(header)

for i in range(len(test_list)):
    writer.writerow([test_list[i][0], result[i]])
