# Setup

In [None]:
# Kaggle.json

from google.colab import files
files.upload()

In [None]:
!mkdir -p /root/.config/kaggle
!mv kaggle.json /root/.config/kaggle/
!chmod 600 /root/.config/kaggle/kaggle.json

from kaggle.cli import main
!kaggle competitions download -c titanic
!unzip titanic.zip
!rm titanic.zip

In [None]:
# Data Loading

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

target = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']
x_train = train[target]
x_train['Sex'] = x_train['Sex'].map({'male': 0, 'female': 1})
x_train['Age'] = x_train['Age'].fillna(x_train['Age'].mean())
y_train = train['Survived']

x_test = test[target]
x_test['Sex'] = x_test['Sex'].map({'male': 0, 'female': 1})
x_test['Age'] = x_test['Age'].fillna(x_train['Age'].mean())
x_test['Fare'] = x_test['Fare'].fillna(x_train['Fare'].mean())



In [None]:
!rm -rf submission*.csv

In [None]:
import pandas as pd

# Create a new submit_file where 'Survived' is all 1s
submit_file_all_survived = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': 1})

submit_file_all_survived.to_csv('submission_all_survived.csv', index = False)
!kaggle competitions submit -c titanic -f 'submission_all_survived.csv' -m "Message"

100% 2.77k/2.77k [00:00<00:00, 3.49kB/s]
400 Client Error: Bad Request for url: https://www.kaggle.com/api/v1/competitions/submissions/submit/titanic


# Normalization + Support Vector Machine

- Score: 0.7790

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

x_train = pd.DataFrame(x_train_scaled, columns=x_train.columns)
x_test = pd.DataFrame(x_test_scaled, columns=x_test.columns)

model = SVC(kernel = 'rbf', C = 1.0, gamma = 'scale')
model.fit(x_train, y_train)

y_test = model.predict(x_test)
submit_file = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': y_test})

# Normalization + Decision Tree

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

x_train = pd.DataFrame(x_train_scaled, columns=x_train.columns)
x_test = pd.DataFrame(x_test_scaled, columns=x_test.columns)

model = DecisionTreeClassifier(criterion = 'entropy', max_depth = 4)
model.fit(x_train, y_train)

y_test = model.predict(x_test)
submit_file = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': y_test})

In [None]:
submit_file.to_csv('submission_decisiontree.csv', index = False)
!kaggle competitions submit -c titanic -f submission_decisiontree.csv -m "Message"

100% 2.77k/2.77k [00:00<00:00, 3.31kB/s]
400 Client Error: Bad Request for url: https://www.kaggle.com/api/v1/competitions/submissions/submit/titanic


# Normalization + RandomForest

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

x_train = pd.DataFrame(x_train_scaled, columns=x_train.columns)
x_test = pd.DataFrame(x_test_scaled, columns=x_test.columns)

model = RandomForestClassifier(n_estimators = 75, criterion = 'entropy', max_depth = 6)
model.fit(x_train, y_train)

y_test = model.predict(x_test)
submit_file = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': y_test})

In [None]:
submit_file.to_csv('submission_rfc.csv', index = False)
!kaggle competitions submit -c titanic -f submission_rfc.csv -m "Message"

  0% 0.00/2.77k [00:00<?, ?B/s]100% 2.77k/2.77k [00:00<00:00, 14.6kB/s]
Successfully submitted to Titanic - Machine Learning from Disaster

# Normalization + Adaboost

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

x_train = pd.DataFrame(x_train_scaled, columns=x_train.columns)
x_test = pd.DataFrame(x_test_scaled, columns=x_test.columns)

base = DecisionTreeClassifier(criterion = 'entropy', max_depth = 5)

model = AdaBoostClassifier(
    n_estimators = 100,
    learning_rate = 0.1,
    estimator = base
)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
submitted_file = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': y_pred})


In [None]:
submitted_file.to_csv('submission_adaboost.csv', index = False)
!kaggle competitions submit -c titanic -f submission_adaboost.csv -m "Is Adaboost good?"

100% 2.77k/2.77k [00:01<00:00, 1.78kB/s]
400 Client Error: Bad Request for url: https://www.kaggle.com/api/v1/competitions/submissions/submit/titanic


# Neural Network

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler # <-- Import added
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train.values)
x_test_scaled = scaler.transform(x_test.values)


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Linear(6, 10)
        self.layer2 = nn.Linear(10, 100)
        self.layer3 = nn.Linear(100, 10)
        self.layer4 = nn.Linear(10, 2)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        x = F.relu(self.layer3(x))
        x = self.layer4(x)
        return F.log_softmax(x, dim = 1)

x_train_tensor = torch.tensor(x_train_scaled.astype(np.float32)).to(device)
y_train_tensor = torch.tensor(y_train.values).long().to(device)
x_test_tensor = torch.tensor(x_test_scaled.astype(np.float32)).to(device)

model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.01)

num_epoch = 10000
for epoch in range(num_epoch):
    model.train()
    optimizer.zero_grad()
    output = model(x_train_tensor)

    loss = F.nll_loss(output, y_train_tensor)
    loss.backward()
    optimizer.step()

    if epoch % 1000 == 0:
        print('Train Epoch: {} \tLoss: {:.6f}'.format(epoch, loss.item()))


model.eval()
with torch.no_grad():
    y_pred_logits = model(x_test_tensor)

y_pred = torch.argmax(y_pred_logits, dim = 1).cpu()
submit_file = pd.DataFrame({'PassengerId': test['PassengerId'], 'Survived': y_pred})

Train Epoch: 0 	Loss: 0.757276
Train Epoch: 1000 	Loss: 0.477783
Train Epoch: 2000 	Loss: 0.424592
Train Epoch: 3000 	Loss: 0.412406
Train Epoch: 4000 	Loss: 0.403564
Train Epoch: 5000 	Loss: 0.397167
Train Epoch: 6000 	Loss: 0.392404
Train Epoch: 7000 	Loss: 0.388433
Train Epoch: 8000 	Loss: 0.384740
Train Epoch: 9000 	Loss: 0.382263


In [None]:
submit_file.to_csv('submission_nn.csv', index = False)
!kaggle competitions submit -c titanic -f submission_nn.csv -m "Is NN good?"

100% 2.77k/2.77k [00:00<00:00, 3.40kB/s]
Successfully submitted to Titanic - Machine Learning from Disaster