In [3]:
import copy
import matplotlib.pyplot as plt
import numpy as np
import joblib
import pandas as pd
import torch
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

In [None]:
test_df  = pd.read_csv("../dataset/test.csv",  index_col="PassengerId")

In [9]:
# Define custom functions so joblib knows what they are
drop_cols = ["Name", "Ticket", "Cabin"]

def drop_unneeded_cols_df(df: pd.DataFrame) -> pd.DataFrame:
    return df.drop(columns=drop_cols, errors="ignore")

def log1p_fare_df(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["Fare"] = np.log1p(df["Fare"])
    return df

# Define the model architecture so PyTorch can hold the saved weights
class SimpleNN(nn.Module):
    def __init__(self, input_size, hidden_size=16, num_layers=1, dropout=0.1):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, int(hidden_size))
        self.fc3 = nn.Linear(int(hidden_size), 1)
        self.dropout = nn.Dropout(dropout)
        self.hidden_size = hidden_size
        self.num_layers = num_layers

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout(x)
        for _ in range(1, self.num_layers):
            x = torch.relu(self.fc2(x))
            x = self.dropout(x)
        x = self.fc3(x)
        return x

In [10]:
# Load the Scikit-Learn Preprocessor
preprocess = joblib.load("../models/titanic_preprocessor.joblib")

# Create the model (Note: Make sure input_size matches your training data!
model = SimpleNN(input_size=10, hidden_size=16, num_layers=2, dropout=0.1)
model.load_state_dict(torch.load("../models/titanic_best_model.pth"))
model.eval() # CRITICAL: Turn off dropout for real predictions!

SimpleNN(
  (fc1): Linear(in_features=10, out_features=16, bias=True)
  (fc2): Linear(in_features=16, out_features=16, bias=True)
  (fc3): Linear(in_features=16, out_features=1, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [11]:
movie_characters = pd.DataFrame([
    {
        "PassengerId": 9991,
        "Pclass": 3,               # Jack was in 3rd class
        "Name": "Dawson, Mr. Jack",
        "Sex": "male",
        "Age": 20,
        "SibSp": 0,                # Traveled with Fabrizio, but no family
        "Parch": 0,
        "Ticket": "A/5 21171",     # Won in a poker game
        "Fare": 0.0,               # Technically didn't pay
        "Cabin": None,
        "Embarked": "S"            # Southampton
    },
    {
        "PassengerId": 9992,
        "Pclass": 1,               # Rose was in 1st class
        "Name": "DeWitt Bukater, Miss. Rose",
        "Sex": "female",
        "Age": 17,
        "SibSp": 1,                # Traveled with Cal (fiancÃ©)
        "Parch": 1,                # Traveled with her mother
        "Ticket": "PC 17599",
        "Fare": 150.00,            # Expensive 1st class suite
        "Cabin": "B52",
        "Embarked": "S"
    }
]).set_index("PassengerId")

In [12]:
# Preprocess the data using ONLY .transform()
X_movie = preprocess.transform(movie_characters)

# Convert to PyTorch Tensor
X_movie_t = torch.tensor(X_movie, dtype=torch.float32)

# Make Predictions
with torch.no_grad():
    logits = model(X_movie_t)
    probabilities = torch.sigmoid(logits).numpy()

# Print the results beautifully
print("--- TITANIC SURVIVAL PREDICTIONS ---")
for i, name in enumerate(["Jack Dawson", "Rose DeWitt Bukater"]):
    prob = probabilities[i][0] * 100
    status = "SURVIVES" if prob >= 50 else "TRAGICALLY DIES"
    print(f"{name}: {prob:.1f}% chance of survival -> {status}")

--- TITANIC SURVIVAL PREDICTIONS ---
Jack Dawson: 4.9% chance of survival -> TRAGICALLY DIES
Rose DeWitt Bukater: 96.1% chance of survival -> SURVIVES
