In [1]:
#!pip install -qU ipython
%load_ext autoreload
%autoreload 2

In [2]:
cd titanic

/kaggle/working/titanic


In [3]:
import numpy as np
import pandas as pd

path = "/kaggle/input/competitions/titanic/train.csv"
df = pd.read_csv(path)

df["Pclass"] = df["Pclass"] - 1
df["Age"] = df["Age"].fillna(value=df["Age"].mean())
df["Sex"] = df["Sex"].map({"male": 0, "female": 1})
df = pd.get_dummies(df, columns=["Embarked", "Cabin"], dummy_na=True, dtype=np.int64)

df.drop(["Name", "Ticket", "PassengerId"], axis=1, inplace=True)
tgts = df["Survived"].to_numpy()
df.drop(["Survived"], axis=1, inplace=True)
inps = df.to_numpy()

In [4]:
import torch

SEED = 63166
BATCH_SZ = 32
LR = 1e-4
device = "cuda" if torch.cuda.is_available() else "cpu"
#amp_dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

In [5]:
from sklearn.model_selection import train_test_split

train_inps, eval_inps, train_tgts, eval_tgts = train_test_split(inps,
                                                                tgts,
                                                                test_size=0.2,
                                                                random_state=SEED, 
                                                                stratify=tgts)

In [6]:
from torch.utils.data import TensorDataset, DataLoader

train_ds = TensorDataset(torch.tensor(train_inps).float(), torch.tensor(train_tgts))
eval_ds = TensorDataset(torch.tensor(train_inps).float(), torch.tensor(train_tgts))

train_dl = DataLoader(train_ds,
                      batch_size=BATCH_SZ,
                      shuffle=True,
                      num_workers=2, 
                      pin_memory=False, 
                      drop_last=False,
                      persistent_workers=True)

eval_dl = DataLoader(train_ds,
                      batch_size=BATCH_SZ,
                      shuffle=False,
                      num_workers=2, 
                      pin_memory=False, 
                      drop_last=False,
                      persistent_workers=True)

In [7]:
import torch.nn.functional as F

def loss_fn(inps, tgts):
    inps = inps.view(-1)
    tgts = tgts.view(-1).float()
    return F.binary_cross_entropy_with_logits(inps, tgts)

def accuracy(inps, tgts):
    inps = inps.view(-1)
    tgts = tgts.view(-1)

    inps = (inps.sigmoid() > 0.5).long()
    return (inps == tgts).float().mean() * 100

In [8]:
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, in_features=1):
        super().__init__()
        self.linear1 = nn.Linear(in_features=in_features, out_features=1)

    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        return x

In [11]:
model = Model(in_features=train_inps.shape[1])
optimizer = torch.optim.SGD(model.parameters())
scaler = torch.GradScaler(device=device)

In [12]:
from tqdm.auto import tqdm

def train_one_epoch(model,
                    optimizer,
                    train_dl, 
                    scaler):
    all_inps = []
    all_tgts = []
    all_losses = []

    model.train()
    for inps, tgts in tqdm(train_dl):
        inps = inps.to(device, non_blocking=True)
        tgts = tgts.to(device, non_blocking=True)

        with torch.autocast(device_type=device, dtype=torch.float16, cache_enabled=True):
            lgts = model(inps)
            loss = loss_fn(lgts, tgts)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad()

        all_inps.append(inps.cpu().view(-1))
        all_tgts.append(tgts.cpu().view(-1))
        all_losses.append(loss.detach().cpu())

In [None]:
for e in range(1):
    train_one_epoch(model, optimizer, train_dl, scaler)