In [None]:
import numpy as np
import pandas as pd
import sklearn
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.compose import ColumnTransformer
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import DataLoader, TensorDataset, WeightedRandomSampler

from utils.config import load_config
from utils.data import download_competition_data

config = load_config("config.yaml")
sklearn.set_config(transform_output="pandas")

Config loaded from config.yaml.


In [None]:
train, test = download_competition_data(
    competition_name=config.competition_name,
    data_path=config.data_path,
)

month_map = {
    "jan": 1,
    "feb": 2,
    "mar": 3,
    "apr": 4,
    "may": 5,
    "jun": 6,
    "jul": 7,
    "aug": 8,
    "sep": 9,
    "oct": 10,
    "nov": 11,
    "dec": 12,
}
train["month"] = train["month"].map(month_map)
test["month"] = test["month"].map(month_map)

train["sin_day"] = np.sin(2 * np.pi * train["day"] / 31)
train["cos_day"] = np.cos(2 * np.pi * train["day"] / 31)
test["sin_day"] = np.sin(2 * np.pi * test["day"] / 31)
test["cos_day"] = np.cos(2 * np.pi * test["day"] / 31)
train["sin_month"] = np.sin(2 * np.pi * train["month"] / 12)
train["cos_month"] = np.cos(2 * np.pi * train["month"] / 12)
test["sin_month"] = np.sin(2 * np.pi * test["month"] / 12)
test["cos_month"] = np.cos(2 * np.pi * test["month"] / 12)

train.drop(columns=["day", "month"], inplace=True)
test.drop(columns=["day", "month"], inplace=True)

train["never_contacted_before"] = train.apply(
    lambda row: 1 if row["pdays"] == -1 else 0, axis=1
)
test["never_contacted_before"] = test.apply(
    lambda row: 1 if row["pdays"] == -1 else 0, axis=1
)
max_pdays = train["pdays"].max()
train["pdays"] = train["pdays"].apply(lambda x: x if x != -1 else max_pdays)
test["pdays"] = test["pdays"].apply(lambda x: x if x != -1 else max_pdays)

Data for competition 'playground-series-s5e8' already exists at 'data/playground-series-s5e8.zip'.


In [3]:
X_train_raw = train.drop(columns=[config.target_column])
y_train_raw = train[config.target_column]
X_test_raw = test.copy()

X_train_raw, X_val_raw, y_train_raw, y_val_raw = train_test_split(
    X_train_raw, y_train_raw, test_size=0.1, random_state=42, stratify=y_train_raw
)

features = X_train_raw.drop(columns=["id"]).columns.tolist()

In [4]:
binary_features = ["default", "housing", "loan"]
categorical_features = [
    "job",
    "marital",
    "education",
    "contact",
    "poutcome",
]
numerical_features = [
    "age",
    "balance",
    "duration",
    "campaign",
    "pdays",
    "previous",
    "sin_day",
    "cos_day",
    "sin_month",
    "cos_month",
]

In [None]:
pipeline = Pipeline(
    [
        (
            "column_transform",
            ColumnTransformer(
                [
                    (
                        "binary_encode",
                        OrdinalEncoder(
                            handle_unknown="use_encoded_value", unknown_value=np.nan
                        ),
                        binary_features,
                    ),
                    (
                        "categorical_encode",
                        OneHotEncoder(
                            handle_unknown="ignore",
                            sparse_output=False,
                            drop="first",
                        ),
                        categorical_features,
                    ),
                    (
                        "scale_numerical",
                        StandardScaler(),
                        numerical_features,
                    ),
                    # (
                    #    "passthrough",
                    #    "passthrough",
                    #    numerical_features,
                    # ),
                ],
                remainder="drop",
                verbose_feature_names_out=False,
            ),
        ),
        # ("scaler", StandardScaler()),
        # ("imputer", KNNImputer(n_neighbors=10, weights="distance")),
    ]
)

X_train = pipeline.fit_transform(X_train_raw)
X_val = pipeline.transform(X_val_raw)
X_test = pipeline.transform(X_test_raw)

oe = OrdinalEncoder()
y_train = oe.fit_transform(pd.DataFrame(y_train_raw)).iloc[:, 0]
y_val = oe.transform(pd.DataFrame(y_val_raw)).iloc[:, 0]

class_weights = compute_class_weight("balanced", classes=y_train.unique(), y=y_train)
torch_weights = torch.tensor(class_weights, dtype=torch.float)

In [None]:
class SelfNormalizingNN(nn.Module):
    """
    A simple self-normalizing neural network using SELU activations
    and LeCun normal initialization for stable training.
    """

    def __init__(self, input_size, hidden_sizes, output_size, dropout_rate=0.05):
        super(SelfNormalizingNN, self).__init__()

        # Build layers
        self.layers = nn.ModuleList()
        layer_sizes = [input_size] + hidden_sizes + [output_size]

        for i in range(len(layer_sizes) - 1):
            layer = nn.Linear(layer_sizes[i], layer_sizes[i + 1])
            # LeCun normal initialization for self-normalization
            nn.init.normal_(
                layer.weight,
                mean=0,
                std=1 / torch.sqrt(torch.tensor(layer_sizes[i], dtype=torch.float32)),
            )
            nn.init.zeros_(layer.bias)
            self.layers.append(layer)

        # Alpha dropout for SELU (maintains self-normalizing property)
        self.dropout = nn.AlphaDropout(p=dropout_rate)

    def forward(self, x):
        # Apply SELU activation to all hidden layers
        for i, layer in enumerate(self.layers[:-1]):
            x = layer(x)
            x = F.selu(x)  # Self-normalizing activation
            x = self.dropout(x)

        # Output layer (no activation for flexibility)
        x = self.layers[-1](x)
        return x


sample_weights = torch.tensor([torch_weights[int(i)] for i in y_train.values])
sampler = WeightedRandomSampler(
    weights=sample_weights, num_samples=len(sample_weights), replacement=True
)


def create_dataloader(X, y, batch_size=2**16, shuffle=True, sampler=None):
    X_tensor = torch.tensor(X.values, dtype=torch.float32)
    y_tensor = torch.tensor(y.values, dtype=torch.float32)
    dataset = TensorDataset(X_tensor, y_tensor)
    if sampler:
        shuffle = False  # Sampler handles shuffling
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, sampler=sampler)


# Create the dataloaders
train_loader = create_dataloader(X_train, y_train, sampler=sampler)
val_loader = create_dataloader(X_val, y_val, shuffle=False)

device = torch.device(
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

model = SelfNormalizingNN(
    input_size=X_train.shape[1],
    hidden_sizes=[32, 64, 32],
    output_size=1,
    dropout_rate=0.2,
)
# model = torch.compile(model)
model = model.to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-5)


def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            with torch.autocast(device_type=device.type):
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f"Epoch [{epoch + 1}/{num_epochs}]\nTraining Loss: {epoch_loss:.4f}")

        validate_model(model, val_loader, criterion)


def validate_model(model, val_loader, criterion):
    model.eval()
    running_loss = 0.0
    all_labels = []
    all_outputs = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), labels)
            running_loss += loss.item() * inputs.size(0)
            all_outputs.extend(outputs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    epoch_loss = running_loss / len(val_loader.dataset)
    auroc = roc_auc_score(all_labels, all_outputs)
    print(f"Validation Loss: {epoch_loss:.4f}, AUROC: {auroc:.4f}")

In [8]:
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=1000)

Epoch [1/1000]
Training Loss: 0.6804
Validation Loss: 0.3209, AUROC: 0.9461
Epoch [2/1000]
Training Loss: 0.2878
Validation Loss: 0.3221, AUROC: 0.9460
Epoch [3/1000]
Training Loss: 0.2875
Validation Loss: 0.3151, AUROC: 0.9460
Epoch [4/1000]
Training Loss: 0.2882
Validation Loss: 0.3107, AUROC: 0.9460
Epoch [5/1000]
Training Loss: 0.2877
Validation Loss: 0.3100, AUROC: 0.9461
Epoch [6/1000]
Training Loss: 0.2872
Validation Loss: 0.3109, AUROC: 0.9461
Epoch [7/1000]
Training Loss: 0.2867
Validation Loss: 0.3116, AUROC: 0.9462
Epoch [8/1000]
Training Loss: 0.2868
Validation Loss: 0.3114, AUROC: 0.9462
Epoch [9/1000]
Training Loss: 0.2863
Validation Loss: 0.3108, AUROC: 0.9462
Epoch [10/1000]
Training Loss: 0.2864
Validation Loss: 0.3104, AUROC: 0.9462
Epoch [11/1000]
Training Loss: 0.2883
Validation Loss: 0.3107, AUROC: 0.9462
Epoch [12/1000]
Training Loss: 0.2869
Validation Loss: 0.3105, AUROC: 0.9463
Epoch [13/1000]
Training Loss: 0.2873
Validation Loss: 0.3102, AUROC: 0.9463
Epoch [1

In [None]:
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
model.eval()
with torch.no_grad():
    X_test_tensor = X_test_tensor.to(device)
    test_outputs = torch.sigmoid(model(X_test_tensor).cpu()).flatten().numpy()

submission = pd.DataFrame()
submission["id"] = test["id"]
submission[config.target_column] = test_outputs
submission.to_csv(config.data_path / config.submission_file, index=False)

array([0.02921791, 0.8046558 , 0.00224506, ..., 0.89249635, 0.01050335,
       0.72527885], dtype=float32)