In [1]:
# Install missing packages
# %pip install seaborn

# Import necessary libraries
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, learning_curve
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC, SVR, LinearSVC, LinearSVR
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import roc_auc_score, precision_recall_curve, f1_score, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler, OneHotEncoder, TargetEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.calibration import CalibratedClassifierCV
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
from time import time

# --- Step 0: One-time Setup & Parity Guarantees ---
# Fix seeds for reproducibility (Python, NumPy, and PyTorch)
def set_seed(s=4242):
    random.seed(s)
    np.random.seed(s)
    torch.manual_seed(s)
    torch.cuda.manual_seed_all(s)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(4242)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# --- Step 1: Data Handling ---
# Load datasets
hotel_data = pd.read_csv('hotel_bookings.csv')
us_accidents_data = pd.read_csv('US_Accidents_March23.csv')

# Preprocess datasets
def preprocess_hotel_booking(data):
    """
    Preprocess the Hotel Booking dataset.
    """
    # Remove post-outcome fields to prevent leakage
    data = data.drop(columns=['reservation_status'])

    # Separate numeric and categorical columns
    numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
    categorical_cols = data.select_dtypes(include=['object']).columns

    # Impute missing values for numeric columns
    numeric_imputer = SimpleImputer(strategy='mean')
    data[numeric_cols] = numeric_imputer.fit_transform(data[numeric_cols])

    # Impute missing values for categorical columns using the most frequent strategy
    categorical_imputer = SimpleImputer(strategy='most_frequent')
    data[categorical_cols] = categorical_imputer.fit_transform(data[categorical_cols])

    # Encode categorical variables
    encoder = TargetEncoder()
    # Ensure the target variable is passed to the encoder
    data[categorical_cols] = encoder.fit_transform(data[categorical_cols], data['is_canceled'])

    # Scale numeric features
    scaler = StandardScaler()
    data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

    return data


def preprocess_us_accidents(data):
    """
    Preprocess the US Accidents dataset.
    """
    # Clean the datetime columns to remove fractional seconds or extraneous characters
    data['End_Time'] = data['End_Time'].str.split('.').str[0]
    data['Start_Time'] = data['Start_Time'].str.split('.').str[0]

    # Calculate the duration in minutes
    data['Duration'] = (pd.to_datetime(data['End_Time'], format='mixed') -
                       pd.to_datetime(data['Start_Time'], format='mixed')).dt.total_seconds() / 60

    # Scale numeric features
    numeric_cols = data.select_dtypes(include=['float64', 'int64']).columns
    scaler = StandardScaler()
    data[numeric_cols] = scaler.fit_transform(data[numeric_cols])

    return data


hotel_data_preprocessed = preprocess_hotel_booking(hotel_data)
us_accidents_data_preprocessed = preprocess_us_accidents(us_accidents_data)

# Split datasets
X_hotel_train, X_hotel_test, y_hotel_train, y_hotel_test = train_test_split(
    hotel_data_preprocessed.drop('is_canceled', axis=1),
    hotel_data_preprocessed['is_canceled'],
    test_size=0.2,
    random_state=42
)
X_accidents_train, X_accidents_test, y_accidents_train, y_accidents_test = train_test_split(
    us_accidents_data_preprocessed.drop('Duration', axis=1),
    us_accidents_data_preprocessed['Duration'],
    test_size=0.2,
    random_state=42
)

# Inspect the data types of the columns
print(X_accidents_test.dtypes)

# Drop non-numeric columns (if they are not needed)
X_accidents_test = X_accidents_test.select_dtypes(include=['float64', 'int64'])

# Encode categorical columns (if any)
categorical_cols = X_accidents_test.select_dtypes(include=['object']).columns
if not categorical_cols.empty:
    encoder = OneHotEncoder(handle_unknown='ignore')
    X_accidents_test[categorical_cols] = encoder.fit_transform(X_accidents_test[categorical_cols])

# Impute missing values (if any)
imputer = SimpleImputer(strategy='most_frequent')
X_accidents_test = pd.DataFrame(imputer.fit_transform(X_accidents_test), columns=X_accidents_test.columns)

# Convert to numeric types
X_accidents_test = X_accidents_test.astype(float)

# Validate the data types
print(X_accidents_test.dtypes)

# Define Xtr and ytr for the Hotel Booking dataset
Xtr = torch.from_numpy(X_hotel_train.values).float()
ytr = torch.from_numpy(y_hotel_train.values).long()

print(Xtr.shape)
print(ytr.shape)
# Convert to PyTorch tensors
Xva = torch.from_numpy(X_hotel_test.values).float()  # Hotel test set
yva = torch.from_numpy(y_hotel_test.values).long()   # Hotel test labels
Xte = torch.from_numpy(X_accidents_test.values).float()  # Accidents test set
yte = torch.from_numpy(y_accidents_test.values).float()   # Accidents test labels

# Create DataLoader objects
train_loader = DataLoader(TensorDataset(Xtr, ytr), batch_size=256, shuffle=True, drop_last=False)
val_loader = DataLoader(TensorDataset(Xva, yva), batch_size=1024, shuffle=False)
test_loader = DataLoader(TensorDataset(Xte, yte), batch_size=1024, shuffle=False)


ID                        object
Source                    object
Severity                 float64
Start_Time                object
End_Time                  object
Start_Lat                float64
Start_Lng                float64
End_Lat                  float64
End_Lng                  float64
Distance(mi)             float64
Description               object
Street                    object
City                      object
County                    object
State                     object
Zipcode                   object
Country                   object
Timezone                  object
Airport_Code              object
Weather_Timestamp         object
Temperature(F)           float64
Wind_Chill(F)            float64
Humidity(%)              float64
Pressure(in)             float64
Visibility(mi)           float64
Wind_Direction            object
Wind_Speed(mph)          float64
Precipitation(in)        float64
Weather_Condition         object
Amenity                     bool
Bump      

In [2]:
# --- Step 2: Model Definition ---
# Mirror the sklearn MLP with an nn.Module
# Use the SAME hidden sizes and activations you claimed in SL; that’s your “fixed backbone”
# Keep output head/activation appropriate for the task (e.g., CrossEntropyLoss for multiclass, MSELoss for regression)

class MLP(nn.Module):
    def __init__(self, in_dim, hidden=[128, 64], out_dim=4, dropout_p=0.0):
        super().__init__()
        layers = []
        dims = [in_dim] + hidden
        for i in range(len(dims)-1):
            layers += [nn.Linear(dims[i], dims[i+1]), nn.ReLU()]
            if dropout_p > 0:
                layers += [nn.Dropout(p=dropout_p)]
        layers += [nn.Linear(hidden[-1] if hidden else in_dim, out_dim)]
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)
# Example usage
num_classes = len(np.unique(y_hotel_train))  # Define num_classes based on the target variable
model = MLP(in_dim=Xtr.shape[1], hidden=[128, 64], out_dim=num_classes).to(device)


# --- Step 3: Freezing Layers ---
# Freeze all but the last k layers (this is the crux for RO and for Part-specific constraints)
def linear_layers(model):
    return [m for m in model.modules() if isinstance(m, nn.Linear)]

def freeze_all_but_last_k(model, k=2):
    layers = linear_layers(model)
    # Freeze all first
    for p in model.parameters():
        p.requires_grad = False
    # Unfreeze last k Linear layers
    for m in layers[-k:]:
        for p in m.parameters():
            p.requires_grad = True
    # Report counts (needed for RO cap)
    tot = sum(p.numel() for p in model.parameters())
    trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Params total={tot:,} | trainable(last {k})={trainable:,}")
    return trainable

# Example: freeze all but last 1–3 layers for Part 1 RO
trainable = freeze_all_but_last_k(model, k=2)
assert trainable <= 50_000, "RO parameter cap exceeded."

Params total=12,354 | trainable(last 2)=8,386


In [3]:
# --- Step 4: Losses & Metrics ---
# Define appropriate loss functions and metrics based on the task (classification or regression)
task = "classification"  # or "regression"
criterion = nn.CrossEntropyLoss() if task == "classification" else nn.MSELoss()

# Add task-appropriate metrics (Accuracy/F1/AUROC vs. MAE/MSE/R2) consistent with your SL choices
def evaluate_classification(y_true, y_pred):
    """
    Evaluate classification performance using ROC-AUC, PR-AUC, and F1-Score.
    """
    roc_auc = roc_auc_score(y_true, y_pred)
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    pr_auc = auc(recall, precision)
    f1 = f1_score(y_true, y_pred)
    return roc_auc, pr_auc, f1

def evaluate_regression(y_true, y_pred):
    """
    Evaluate regression performance using MAE and MSE.
    """
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    return mae, mse

# --- Step 5: Training and Evaluation Loop ---
# Implement a minimal, transparent training/eval loop (counts “gradient evaluations” cleanly)
def run_epoch(model, loader, optimizer=None):
    is_train = optimizer is not None
    model.train(is_train)
    total_loss, n, grad_evals = 0.0, 0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        if is_train:
            optimizer.zero_grad(set_to_none=True)
        with torch.set_grad_enabled(is_train):
            pred = model(xb)
            loss = criterion(pred, yb)
            if is_train:
                loss.backward()
                optimizer.step()
                grad_evals += 1  # count one optimizer step = one gradient evaluation
        total_loss += loss.item() * xb.size(0)
        n += xb.size(0)
    return total_loss / n, grad_evals

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    total_loss, n = 0.0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        pred = model(xb)
        loss = criterion(pred, yb)
        total_loss += loss.item() * xb.size(0)
        n += xb.size(0)
    return total_loss / n

In [4]:
# --- Step 6: Optimizer Ablations (Part 2) ---
# Define the optimizers exactly as the assignment lists; keep everything else fixed (batch size, schedule form, seeds)
# Record time/steps-to-ℓ and stability over seeds
# Do not call AdamW “Adam baseline”

def make_opt(model, kind, lr, **kwargs):
    if kind == "sgd":
        return optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr)
    if kind == "momentum":
        return optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, momentum=kwargs.get("momentum", 0.9))
    if kind == "nesterov":
        return optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, momentum=kwargs.get("momentum", 0.9), nesterov=True)
    if kind == "adam":
        return optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, betas=(kwargs.get("beta1", 0.9), kwargs.get("beta2", 0.999)), eps=kwargs.get("eps", 1e-8))
    if kind == "adamw":
        return optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=lr, betas=(kwargs.get("beta1", 0.9), kwargs.get("beta2", 0.999)), eps=kwargs.get("eps", 1e-8), weight_decay=kwargs.get("wd", 1e-2))
    raise ValueError(kind)

# --- Step 7: Freezing and RO Hygiene (Part 1) ---
# For RO, call model.eval() for every objective (dropout off; BN uses stored stats)
# Freeze all but last 1–3 layers (≤ ~50k params)
# Define the objective as full-validation loss
# Count one function evaluation per full validation pass
# Do not interleave gradient steps in RO

@torch.no_grad()
def validation_objective(model, val_loader):
    model.eval()
    return evaluate(model, val_loader)  # one full pass = 1 function evaluation

In [5]:
# --- Step 8: Regularization Study (Part 3) ---
# Keep Adam hyperparams fixed to the best from Part 2 (no switching to AdamW; no retuning LR when adding regularization)
# Implement L2 (coupled) via loss term (not AdamW), early stopping rule, dropout placements (document where), label smoothing or target noise, and modest augmentation appropriate to the modality (off for val/test)
# Budget-match runs and report dispersion across seeds

def run_epoch_with_l2(model, loader, optimizer, l2_lambda=0.0):
    model.train()
    total_loss, n, grad_evals = 0.0, 0, 0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad(set_to_none=True)
        pred = model(xb)
        loss = criterion(pred, yb)
        if l2_lambda > 0:
            l2 = sum((p**2).sum() for p in model.parameters() if p.requires_grad)
            loss = loss + l2_lambda * l2
        loss.backward()
        optimizer.step()
        grad_evals += 1
        total_loss += loss.item() * xb.size(0)
        n += xb.size(0)
    return total_loss / n, grad_evals

# --- Step 9: Reporting & Accounting ---
# Compute accounting: gradient evals (updates), function evals (RO), wall-clock on the same hardware class
# Threshold ℓ once per dataset; show steps/time to ℓ; include failures as “> budget”

# Define the train_to_budget function
def train_to_budget(model, optimizer, train_loader, val_loader, max_updates=10000, L_threshold=None):
    """
    Train a model with a given optimizer and dataset loaders within a specified budget of gradient evaluations.

    Args:
        model (torch.nn.Module): The model to train.
        optimizer (torch.optim.Optimizer): The optimizer to use for training.
        train_loader (torch.utils.data.DataLoader): DataLoader for the training set.
        val_loader (torch.utils.data.DataLoader): DataLoader for the validation set.
        max_updates (int): Maximum number of gradient evaluations (updates) allowed.
        L_threshold (float, optional): Validation loss threshold to stop training early.

    Returns:
        dict: A dictionary containing the best validation loss, total gradient evaluations, training time, and whether the threshold was reached.
    """
    grad_evals_total, best_val, t0 = 0, float("inf"), time()
    reached = None

    while grad_evals_total < max_updates:
        # Train for one epoch
        tr_loss, ge = run_epoch(model, train_loader, optimizer)
        grad_evals_total += ge

        # Evaluate on the validation set
        val_loss = evaluate(model, val_loader)
        best_val = min(best_val, val_loss)

        # Check if the validation loss threshold is met
        if L_threshold is not None and reached is None and val_loss <= L_threshold:
            reached = (grad_evals_total, time() - t0)

    return {
        "best_val": best_val,
        "grad_evals": grad_evals_total,
        "time_sec": time() - t0,
        "reached_L": reached
    }

# Example usage
optimizer = make_opt(model, "adam", lr=0.001)
results = train_to_budget(model, optimizer, train_loader, val_loader, max_updates=10000)
print(results)

# --- Step 10: SL Code Integration ---
# Train and evaluate models using sklearn pipelines
def train_decision_tree(X_train, y_train):
    """
    Train a Decision Tree classifier with hyperparameter tuning.
    """
    param_grid = {
        'max_depth': [8, 16],
        'min_samples_leaf': [100, 200]
    }
    tree = DecisionTreeClassifier(random_state=42)
    grid_search = GridSearchCV(tree, param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_

def train_shallow_nn(X_train, y_train, input_dim):
    """
    Train a shallow neural network with SGD optimizer.
    """
    class ShallowNN(nn.Module):
        def __init__(self, input_dim):
            super(ShallowNN, self).__init__()
            self.fc1 = nn.Linear(input_dim, 512)
            self.fc2 = nn.Linear(512, 512)
            self.fc3 = nn.Linear(512, 1)

        def forward(self, x):
            x = torch.relu(self.fc1(x))
            x = torch.relu(self.fc2(x))
            x = torch.sigmoid(self.fc3(x))
            return x

    model = ShallowNN(input_dim)
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(15):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
    return model

def train_deep_nn(X_train, y_train, input_dim):
    """
    Train a deep neural network with SGD optimizer.
    """
    class DeepNN(nn.Module):
        def __init__(self, input_dim):
            super(DeepNN, self).__init__()
            self.fc1 = nn.Linear(input_dim, 256)
            self.fc2 = nn.Linear(256, 256)
            self.fc3 = nn.Linear(256, 128)
            self.fc4 = nn.Linear(128, 128)
            self.fc5 = nn.Linear(128, 1)

        def forward(self, x):
            x = torch.relu(self.fc1(x))
            x = torch.relu(self.fc2(x))
            x = torch.relu(self.fc3(x))
            x = torch.relu(self.fc4(x))
            x = torch.sigmoid(self.fc5(x))
            return x

    model = DeepNN(input_dim)
    criterion = nn.BCELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(15):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
    return model


{'best_val': 0.302001476544952, 'grad_evals': 10098, 'time_sec': 15.679988861083984, 'reached_L': None}


In [6]:
# Check for NaNs in the target variable
print(y_hotel_train.isna().sum())  # Count the number of NaN values
print(y_hotel_train[y_hotel_train.isna()])  # Display the NaN values
print(y_hotel_train.shape)  # Check the shape of the target variable
print(y_hotel_train.head())  # Check the first few rows of the target variable

# if y_hotel_train.empty or y_hotel_train.shape[0] == 0:
#     y_hotel_train = hotel_data_preprocessed['is_canceled']


# Handle missing values (if any)
if y_hotel_train.isna().sum() > 0:
    # Option 1: Drop NaNs
    # y_hotel_train = y_hotel_train.dropna()

    # Option 2: Impute NaNs (uncomment one of the following)
    # from sklearn.impute import SimpleImputer
    # imputer = SimpleImputer(strategy='most_frequent')  # or 'mean', 'median', 'constant'
    # y_hotel_train = imputer.fit_transform(y_hotel_train.values.reshape(-1, 1))

    # Option 3: Fill NaNs with a default value
    y_hotel_train = y_hotel_train.fillna(0)  # or the most frequent class

# Validate the target variable
print(y_hotel_train.isna().sum())  # Should be 0

# Train models
decision_tree_model = train_decision_tree(X_hotel_train, y_hotel_train)
knn_model = train_knn(X_hotel_train, y_hotel_train)
svm_model = train_svm(X_hotel_train, y_hotel_train)


shallow_nn_model = train_shallow_nn(X_hotel_train, y_hotel_train, input_dim=X_hotel_train.shape[1])
deep_nn_model = train_deep_nn(X_hotel_train, y_hotel_train, input_dim=X_hotel_train.shape[1])

# Evaluate models
roc_auc, pr_auc, f1 = evaluate_classification(y_hotel_test, decision_tree_model.predict(X_hotel_test))
print(f"Decision Tree - ROC-AUC: {roc_auc}, PR-AUC: {pr_auc}, F1-Score: {f1}")

# Plot learning curves
plot_learning_curve(decision_tree_model, X_hotel_train, y_hotel_train, 'Learning Curve for Decision Tree')
plot_residuals(y_accidents_test, svm_model.predict(X_accidents_test), 'Residuals for SVM Regressor')

0
Series([], Name: is_canceled, dtype: float64)
(95512,)
67702     1.303712
115851   -0.767040
57345     1.303712
11622     1.303712
33333    -0.767040
Name: is_canceled, dtype: float64
0


ValueError: 
All the 20 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
20 fits failed with the following error:
Traceback (most recent call last):
  File "/Users/AC81199/Library/CloudStorage/OneDrive-ElevanceHealth/Documents/Masters/Fall 2025/SL_Projects/.venv/lib/python3.11/site-packages/sklearn/model_selection/_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/AC81199/Library/CloudStorage/OneDrive-ElevanceHealth/Documents/Masters/Fall 2025/SL_Projects/.venv/lib/python3.11/site-packages/sklearn/base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/AC81199/Library/CloudStorage/OneDrive-ElevanceHealth/Documents/Masters/Fall 2025/SL_Projects/.venv/lib/python3.11/site-packages/sklearn/tree/_classes.py", line 1024, in fit
    super()._fit(
  File "/Users/AC81199/Library/CloudStorage/OneDrive-ElevanceHealth/Documents/Masters/Fall 2025/SL_Projects/.venv/lib/python3.11/site-packages/sklearn/tree/_classes.py", line 294, in _fit
    check_classification_targets(y)
  File "/Users/AC81199/Library/CloudStorage/OneDrive-ElevanceHealth/Documents/Masters/Fall 2025/SL_Projects/.venv/lib/python3.11/site-packages/sklearn/utils/multiclass.py", line 221, in check_classification_targets
    raise ValueError(
ValueError: Unknown label type: continuous. Maybe you are trying to fit a classifier, which expects discrete classes on a regression target with continuous values.


In [None]:
# Check for NaNs in the target variable
print("Target variable diagnostics:")
print(f"NaN count: {y_hotel_train.isna().sum()}")
print(f"Shape: {y_hotel_train.shape}")
print(f"Data type: {y_hotel_train.dtype}")
print(f"Unique values: {np.unique(y_hotel_train)}")
print(f"Value counts:\n{y_hotel_train.value_counts()}")

# Fix the continuous target issue
print("\nFixing target variable...")

# The issue is that TargetEncoder in preprocessing is converting the target to continuous values
# We need to ensure the target remains as discrete binary classes (0, 1)
y_hotel_train = y_hotel_train.astype(int)
y_hotel_test = y_hotel_test.astype(int)

# Verify the fix
print(f"After conversion - dtype: {y_hotel_train.dtype}")
print(f"Unique values: {np.unique(y_hotel_train)}")
print(f"All values in [0,1]: {all(val in [0, 1] for val in y_hotel_train)}")

# Handle missing values (if any)
if y_hotel_train.isna().sum() > 0:
    y_hotel_train = y_hotel_train.fillna(0)
    y_hotel_test = y_hotel_test.fillna(0)

print(f"Final NaN count: {y_hotel_train.isna().sum()}")

# Add missing functions that are called but not defined
def train_knn(X_train, y_train):
    """Train a KNN classifier with hyperparameter tuning."""
    param_grid = {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance']
    }
    knn = KNeighborsClassifier()
    grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='roc_auc')
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_

def train_svm(X_train, y_train):
    """Train an SVM classifier with hyperparameter tuning."""
    param_grid = {
        'C': [0.1, 1, 10],
        'kernel': ['rbf', 'linear']
    }
    svm = SVC(probability=True, random_state=42)
    grid_search = GridSearchCV(svm, param_grid, cv=5, scoring='roc_auc')
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_

# Fix evaluation function - need to import auc
from sklearn.metrics import auc

def evaluate_classification_fixed(y_true, y_pred_proba):
    """
    Evaluate classification performance using proper probability predictions.
    """
    # Convert probabilities to binary predictions
    if len(y_pred_proba.shape) > 1:
        y_pred_proba = y_pred_proba[:, 1] if y_pred_proba.shape[1] > 1 else y_pred_proba.ravel()
    
    y_pred = (y_pred_proba > 0.5).astype(int)
    
    roc_auc = roc_auc_score(y_true, y_pred_proba)
    precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
    pr_auc = auc(recall, precision)
    f1 = f1_score(y_true, y_pred)
    return roc_auc, pr_auc, f1

# Add missing plotting functions
def plot_learning_curve(model, X_train, y_train, title):
    """Plot learning curve for the model."""
    train_sizes, train_scores, val_scores = learning_curve(
        model, X_train, y_train, cv=5, scoring='roc_auc',
        train_sizes=np.linspace(0.1, 1.0, 10)
    )
    
    plt.figure(figsize=(10, 6))
    plt.plot(train_sizes, np.mean(train_scores, axis=1), 'o-', label='Training Score')
    plt.plot(train_sizes, np.mean(val_scores, axis=1), 'o-', label='Validation Score')
    plt.title(title)
    plt.xlabel('Training Set Size')
    plt.ylabel('ROC-AUC Score')
    plt.legend()
    plt.grid(True)
    plt.show()

def plot_residuals(y_true, y_pred, title):
    """Plot residuals for regression."""
    residuals = y_true - y_pred
    plt.figure(figsize=(10, 6))
    plt.scatter(y_pred, residuals, alpha=0.6)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title(title)
    plt.xlabel('Predicted Values')
    plt.ylabel('Residuals')
    plt.grid(True)
    plt.show()

# Train models
print("Training models...")
decision_tree_model = train_decision_tree(X_hotel_train, y_hotel_train)
print("Decision Tree trained successfully")

knn_model = train_knn(X_hotel_train, y_hotel_train)
print("KNN trained successfully")

svm_model = train_svm(X_hotel_train, y_hotel_train)
print("SVM trained successfully")

# Convert data to tensors for neural networks
X_hotel_train_tensor = torch.FloatTensor(X_hotel_train.values)
y_hotel_train_tensor = torch.FloatTensor(y_hotel_train.values).unsqueeze(1)

shallow_nn_model = train_shallow_nn(X_hotel_train_tensor, y_hotel_train_tensor, input_dim=X_hotel_train.shape[1])
print("Shallow NN trained successfully")

deep_nn_model = train_deep_nn(X_hotel_train_tensor, y_hotel_train_tensor, input_dim=X_hotel_train.shape[1])
print("Deep NN trained successfully")

# Evaluate models
print("\nEvaluating models...")

# Decision Tree
if hasattr(decision_tree_model, "predict_proba"):
    dt_pred_proba = decision_tree_model.predict_proba(X_hotel_test)[:, 1]
else:
    dt_pred_proba = decision_tree_model.predict(X_hotel_test)
roc_auc, pr_auc, f1 = evaluate_classification_fixed(y_hotel_test, dt_pred_proba)
print(f"Decision Tree - ROC-AUC: {roc_auc:.4f}, PR-AUC: {pr_auc:.4f}, F1-Score: {f1:.4f}")

# Plot learning curves (only for successful models)
try:
    plot_learning_curve(decision_tree_model, X_hotel_train, y_hotel_train, 'Learning Curve for Decision Tree')
except Exception as e:
    print(f"Could not plot learning curve: {e}")

# For regression plot (US accidents)
try:
    # Train a simple regressor for accidents data
    from sklearn.ensemble import RandomForestRegressor
    rf_regressor = RandomForestRegressor(n_estimators=10, random_state=42)
    
    # Select only numeric columns for accidents
    X_accidents_numeric = X_accidents_test.select_dtypes(include=[np.number])
    if len(X_accidents_numeric.columns) > 0:
        rf_regressor.fit(X_accidents_numeric, y_accidents_test)
        y_accidents_pred = rf_regressor.predict(X_accidents_numeric)
        plot_residuals(y_accidents_test, y_accidents_pred, 'Residuals for Random Forest Regressor')
    else:
        print("No numeric columns available for accidents regression")
except Exception as e:
    print(f"Could not create regression plot: {e}")

Target variable diagnostics:
NaN count: 0
Shape: (95512,)
Data type: float64
Unique values: [-0.76704049  1.30371214]
Value counts:
is_canceled
-0.767040    60259
 1.303712    35253
Name: count, dtype: int64

Fixing target variable...
After conversion - dtype: int64
Unique values: [0 1]
All values in [0,1]: True
Final NaN count: 0
Training models...
Decision Tree trained successfully
KNN trained successfully


In [None]:
# Randomized Optimization
model = ShallowNN(input_dim=X_hotel_train.shape[1])
best_state_rhc, best_fitness_rhc = rhc_optimizer(model, objective_function, max_iterations=1000)
best_state_sa, best_fitness_sa = sa_optimizer(model, objective_function, max_iterations=1000)
best_state_ga, best_fitness_ga = ga_optimizer(model, objective_function, max_iterations=1000)

# Adam Variants Comparison
optimizers = [
    torch.optim.SGD(model.parameters(), lr=0.01),
    torch.optim.Adam(model.parameters(), lr=0.001),
    torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08),
    torch.optim.AdamW(model.parameters(), lr=0.001, weight_decay=0.01)
]

for optimizer in optimizers:
    training_time = train_model_with_optimizer(model, optimizer, nn.BCELoss(), dataloader, num_epochs=15)
    print(f"Training time with {optimizer}: {training_time}")

# Regularization Techniques Evaluation
regularization_techniques = ["L2", "Dropout", "Label Smoothing", "Data Augmentation"]
results = evaluate_regularization(model, dataloader, nn.BCELoss(), regularization_techniques)
print(f"Regularization results: {results}")

# Integrated Approach
integrated_training_time = integrate_best_combination(model, dataloader, nn.BCELoss())
print(f"Integrated training time: {integrated_training_time}")