In [48]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn import tree
import matplotlib.pyplot as plt


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

kaggle/input/.DS_Store
kaggle/input/titanic/test.csv
kaggle/input/titanic/train.csv


In [49]:
train_data = pd.read_csv("kaggle/input/titanic/train.csv")
train_data.head()

test_data = pd.read_csv("kaggle/input/titanic/test.csv")
test_data.head()

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [50]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import LabelEncoder  


device = torch.device('mps') if torch.backends.mps.is_available() else torch.device('cpu')


class Model(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Model, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.ReLU(),
            nn.Linear(output_dim, output_dim),
            nn.ReLU(),
            nn.Linear(output_dim, 1),
            nn.Sigmoid(),
        )
    
    def forward(self, x):
        out = self.model(x)
        return out

model = Model(8, 1024)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-4)

In [51]:
def prep_data(data):
    X_Train = data
    X_Train = X_Train.drop(["Ticket"], axis=1)
    X_Train = X_Train.drop(["Cabin"], axis=1)
    X_Train = X_Train.drop(["Embarked"], axis=1)
    X_Train = X_Train.drop(["PassengerId"], axis=1)
    X_Train = X_Train.drop(["Fare"], axis=1)
    X_Train["Age"] = X_Train["Age"].fillna(X_Train["Age"].mean())

    # Replace Age with teen/child/adult columns with a 1 or a 0 
    X_Train["Teen"] = 0
    X_Train["Child"] = 0
    X_Train["Adult"] = 0
    X_Train.loc[X_Train["Age"] < 13, "Child"] = 1
    X_Train.loc[(X_Train["Age"] >= 13) & (X_Train["Age"] < 20), "Teen"] = 1
    X_Train.loc[X_Train["Age"] >= 20, "Adult"] = 1
    X_Train = X_Train.drop(["Age"], axis=1)

    # If rare titles are found in name, add a 1 to the rare field 
    X_Train["Rare"] = 0
    X_Train.loc[X_Train["Name"].str.contains("Lady."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Countess."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Capt."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Col."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Don."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Dr."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Major."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Rev."), "Rare"] = 1
    X_Train.loc[X_Train["Name"].str.contains("Sir."), "Rare"] = 1
    X_Train = X_Train.drop(["Name"], axis=1)

    try:
        X_Train = X_Train.drop(["Survived"], axis=1)
    except:
        pass

    le = LabelEncoder()
    X_Train["Sex"] = le.fit_transform(X_Train["Sex"])
    return X_Train

X_Train = prep_data(train_data)
X_Test = prep_data(test_data)
y = train_data["Survived"]

print(X_Train)
X_Train.describe()

     Pclass  Sex  SibSp  Parch  Teen  Child  Adult  Rare
0         3    1      1      0     0      0      1     0
1         1    0      1      0     0      0      1     0
2         3    0      0      0     0      0      1     0
3         1    0      1      0     0      0      1     0
4         3    1      0      0     0      0      1     0
..      ...  ...    ...    ...   ...    ...    ...   ...
886       2    1      0      0     0      0      1     1
887       1    0      0      0     1      0      0     0
888       3    0      1      2     0      0      1     0
889       1    1      0      0     0      0      1     0
890       3    1      0      0     0      0      1     0

[891 rows x 8 columns]


Unnamed: 0,Pclass,Sex,SibSp,Parch,Teen,Child,Adult,Rare
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,2.308642,0.647587,0.523008,0.381594,0.106622,0.077441,0.815937,0.041526
std,0.836071,0.47799,1.102743,0.806057,0.308805,0.26744,0.387753,0.199616
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
50%,3.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
75%,3.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0
max,3.0,1.0,8.0,6.0,1.0,1.0,1.0,1.0


In [52]:

epoch = 500
loss_values = []
X_Train = torch.from_numpy(X_Train.to_numpy()).float()
X_Test = torch.from_numpy(X_Test.to_numpy()).float()
y = torch.from_numpy(y.to_numpy()).float().unsqueeze(1)

train_data_loader = DataLoader(TensorDataset(X_Train, y), batch_size=1, shuffle=True, pin_memory=True)
test_data_loader = DataLoader(TensorDataset(X_Train, y), batch_size=1, shuffle=True, pin_memory=True)

for n in range(epoch):
    for x, y_test in train_data_loader:
        optimizer.zero_grad()
        y_pred = model(x)
        loss = nn.functional.mse_loss(y_pred, y_test)
        loss.backward()
        optimizer.step()

    loss_values.append(loss.item())
    print("Epoch: ", n, "Loss: ", loss.item())

step = np.linspace(0, 100, epoch)
fig, ax = plt.subplots(figsize=(8,5))
plt.plot(step, np.array(loss_values))
plt.title("Step-wise Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.show()

Epoch:  0 Loss:  0.19102360308170319
Epoch:  1 Loss:  0.17289914190769196


KeyboardInterrupt: 

In [None]:
num_right = 0
for x, y_test in test_data_loader:
    y_pred = model(x)
    ypred = int(y_pred.round().item())
    if ypred == y_test.item():
        num_right += 1

print("Accuracy: ", num_right / len(train_data))

Accuracy:  0.8271604938271605


In [None]:
predictions = []
for n in X_Test:
    predictions.append(int(model(n).round().item()))

print(predictions)

output = pd.DataFrame({'PassengerId': test_data.PassengerId, 'Survived': predictions})
output.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")

[0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 