# Neural Network

In [1]:
import pandas as pd 
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import wandb
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
np.random.seed(1)


In [None]:
df = pd.read_csv(
    "/Users/leachen/Docs/uni/masters/stanford_mse/fall25/mse226/data_science_project/cleaned_data/train_data.csv"
)
X = df.drop("visits", axis=1)
y = df["visits"]

categorical_cols = [
    "interlibrary_relation_code",
    "fscs_definition_code",
    "overdue_policy",
    "beac_code",
    "locale_code",
]
numerical_cols = [
    "population_lsa",
    "county_population",
    "print_volumes",
    "ebook_volumes",
    "num_lib_branches",
    "num_bookmobiles",
]

In [23]:
label_encoders = {}
category_sizes = {}

for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le
    category_sizes[col] = X[col].nunique()

kfold = KFold(n_splits=10, shuffle=True, random_state=1)

In [None]:
# # don't run this if you are using k-fold CV
# x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# X_train_num = torch.tensor(np.log1p(x_train[numerical_cols].to_numpy()), dtype=torch.float32)
# X_val_num = torch.tensor(np.log1p(x_val[numerical_cols].to_numpy()), dtype=torch.float32)
# X_train_cat = torch.tensor(x_train[categorical_cols].to_numpy(), dtype=torch.long)
# X_val_cat = torch.tensor(x_val[categorical_cols].to_numpy(), dtype=torch.long)
# y_train = torch.tensor(np.log1p(y_train.to_numpy()), dtype=torch.float32).unsqueeze(1)
# y_val = torch.tensor(np.log1p(y_val.to_numpy()), dtype=torch.float32).unsqueeze(1)

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, category_sizes, num_numeric):
        super().__init__()
        embed_dim = 8
        self.embeddings = nn.ModuleList(
            [
                nn.Embedding(num_categories, embed_dim)
                for num_categories in category_sizes.values()
            ]
        )
        total_emb_dim = embed_dim * len(category_sizes)

        self.fc = nn.Sequential(
            nn.Linear(total_emb_dim + num_numeric, 64), nn.ReLU(), nn.Linear(64, 1)
        )

    def forward(self, x_cat, x_num):
        embedded = [emb(x_cat[:, i]) for i, emb in enumerate(self.embeddings)]
        x_cat_emb = torch.cat(embedded, dim=1)
        x = torch.cat([x_cat_emb, x_num], dim=1)
        return self.fc(x)

In [None]:
# # don't run this if you are using k-fold CV
# wandb.init(project="mse226_neural_network",
#            config = {
#                "epochs": 5000,
#                "lr": 0.001,
#                "embedding_dim": 8,
#            })

# config = wandb.config
# model = NeuralNetwork(category_sizes, len(numerical_cols))
# criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr = config.lr)

# for epoch in range(config.epochs):
#     optimizer.zero_grad()
#     output = model(X_train_cat, X_train_num)
#     loss = torch.sqrt(criterion(output, y_train))
#     loss.backward()
#     optimizer.step()
#     with torch.no_grad():
#             val_output = model(X_val_cat, X_val_num)
#             val_loss = torch.sqrt(criterion(val_output, y_val))

#     wandb.log({
#             "epoch": epoch,
#             "train_rmse": loss.item(),
#             "val_rmse": val_loss.item()
#         })
#     # print(f"Epoch {epoch+1}/{config.epochs}, Train Loss: {loss.item()}, Val Loss: {val_loss.item()}")

# torch.save(model.state_dict(), "model.pt")
# wandb.save("model.pt")
# wandb.finish()

In [None]:
val_rmse_list = []

for fold, (train_idx, val_idx) in enumerate(kfold.split(X)):
    print(f"Fold {fold+1}")

    X_train_cat = torch.tensor(
        X[categorical_cols].iloc[train_idx].to_numpy(), dtype=torch.long
    )
    X_val_cat = torch.tensor(
        X[categorical_cols].iloc[val_idx].to_numpy(), dtype=torch.long
    )
    X_train_num = torch.tensor(
        np.log1p(X[numerical_cols].iloc[train_idx].to_numpy()), dtype=torch.float32
    )
    X_val_num = torch.tensor(
        np.log1p(X[numerical_cols].iloc[val_idx].to_numpy()), dtype=torch.float32
    )
    y_train_fold = torch.tensor(
        np.log1p(y.iloc[train_idx].to_numpy()), dtype=torch.float32
    ).unsqueeze(1)
    y_val_fold = torch.tensor(
        np.log1p(y.iloc[val_idx].to_numpy()), dtype=torch.float32
    ).unsqueeze(1)

    model = NeuralNetwork(category_sizes, len(numerical_cols))
    criterion = nn.MSELoss()
    model_config = {
        "epochs": 5000,
        "lr": 0.001,
        "embedding_dim": 8,
    }

    wandb.init(
        project="mse226_neural_network",
        config=model_config,
        name=f"fold_{fold}_lr_{model_config['lr']}_emb_{model_config['embedding_dim']}",
    )

    optimizer = optim.Adam(model.parameters(), lr=model_config['lr'])

    for epoch in range(model_config["epochs"]):
        optimizer.zero_grad()
        # predicted outcome for training set
        output = model(X_train_cat, X_train_num)
        loss = torch.sqrt(criterion(output, y_train_fold))
        loss.backward()
        optimizer.step()
        with torch.no_grad():
            val_output = model(X_val_cat, X_val_num)
            val_loss = torch.sqrt(criterion(val_output, y_val_fold))

        wandb.log(
            {"epoch": epoch, "train_rmse": loss.item(), "val_rmse": val_loss.item()}
        )
        if epoch == model_config["epochs"] - 1:
            val_rmse_list.append(val_loss.item())

    torch.save(model.state_dict(), f"model_{fold}.pt")
    wandb.save(f"model_{fold}.pt")
    wandb.finish()

print(sum(val_rmse_list) / len(val_rmse_list))

Fold 1


0,1
epoch,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇██
train_rmse,█▆▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▄▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,4999.0
train_rmse,0.62711
val_rmse,0.62317


Fold 2


0,1
epoch,▁▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
train_rmse,█▆▆▆▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,██▅▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,4999.0
train_rmse,0.62502
val_rmse,0.68154


Fold 3


0,1
epoch,▁▁▁▁▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
train_rmse,███▅▄▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▄▄▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,4999.0
train_rmse,0.62232
val_rmse,0.70147


Fold 4


0,1
epoch,▁▁▁▁▁▁▁▂▂▂▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇▇▇██
train_rmse,█▆▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,4999.0
train_rmse,0.62939
val_rmse,0.65651


Fold 5


In [20]:
print(val_rmse_list)
print(len(val_rmse_list))

[0.6846031546592712]
1
