In [1]:
# Clear all variables
%reset -f

In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import math
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from torch.distributions import Normal
import nnwosd as wosd
import importlib
importlib.reload(wosd)
import pickle
import pandas as pd

from sklearn.model_selection import LeaveOneOut
import torch.nn.utils as utils

In [14]:
with open('rice92_data.pkl', 'rb') as f:
    loaded_dict = pickle.load(f)

In [15]:
loaded_dict.columns

Index(['YEARDUM', 'FMERCODE', 'PROD', 'AREA', 'LABOR', 'NPK', 'OTHER', 'PRICE',
       'AREAP', 'LABORP', 'NPKP', 'OTHERP', 'AGE', 'EDYRS', 'HHSIZE', 'NADULT',
       'BANRAT'],
      dtype='object')

In [16]:
# loaded_dict

x1= torch.tensor(np.log(np.array(loaded_dict['AREA']))).reshape(-1,1)
x2= torch.tensor(np.log(np.array(loaded_dict['LABOR']))).reshape(-1,1)
x3= torch.tensor(np.log(np.array(loaded_dict['NPK']))).reshape(-1,1)
x_tensor = torch.cat((x1,x2,x3), dim=1)
y = torch.tensor(np.log(np.array(loaded_dict['PROD']))).reshape(-1,1)

In [17]:
# 2. Standardize input and output data
scaler_X = StandardScaler()
scaler_y = StandardScaler()

# Fit scalers on the data and transform
X_standardized = torch.tensor(scaler_X.fit_transform(x_tensor), dtype=torch.float32)
y_standardized = torch.tensor(scaler_y.fit_transform(y), dtype=torch.float32)

# LOOCV for model selection

In [20]:
# ------------------ MLP with Dropout ------------------
class MLPWithDropout(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, activation_func, dropout=0.1):
        super().__init__()
        layers = []
        in_size = input_size
        self.linear_layers = nn.ModuleList()
        for h in hidden_sizes:
            linear = nn.Linear(in_size, h)
            self.linear_layers.append(linear)
            layers.append(linear)
            layers.append(activation_func)
            layers.append(nn.Dropout(dropout))  # Dropout after activation
            in_size = h
        self.output = nn.Linear(in_size, output_size)
        layers.append(self.output)
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

# ------------------ Train function ------------------
def train_model(X_train, y_train, X_val, y_val, hidden_sizes, sigma_v, sigma_u, 
                epochs=300, lr=0.005, dropout=0.1, weight_decay=1e-4):
    
    input_size = X_train.shape[1]
    output_size = 1

    activation_fun = wosd.FlippedELU(alpha=.8)
    model = MLPWithDropout(input_size, hidden_sizes, output_size, activation_func=activation_fun, dropout=dropout)

    nll_loss = wosd.GaussianNLLLoss(sigma_v=sigma_v, sigma_u=sigma_u)
    optimizer = optim.Adam(list(model.parameters()) + [nll_loss.log_std_v, nll_loss.log_std_u],
                           lr=lr, weight_decay=weight_decay)

    best_loss = float("inf")
    best_state = model.state_dict()

    for epoch in range(epochs):
        model.train()
        y_pred = model(X_train)
        loss = nll_loss(y_pred, y_train)

        optimizer.zero_grad()
        loss.backward()
        utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()

        # Clamp weights for monotonicity
        with torch.no_grad():
            for layer in model.linear_layers:
                layer.weight.data.clamp_(min=0)
            model.output.weight.data.clamp_(min=0)

        if loss.item() < best_loss:
            best_loss = loss.item()
            best_state = model.state_dict()

    # Validation
    model.load_state_dict(best_state)
    model.eval()
    with torch.no_grad():
        y_val_pred = model(X_val)
        val_loss = nll_loss(y_val_pred, y_val).item()

    return val_loss

# ------------------ LOOCV loop ------------------
architectures = {
    "2_layers": [[32,16], [32,8], [16,8], [8,4], [4,4]],
    "3_layers": [[16,8,4], [8,4,2], [4,2,2]]
}

loo = LeaveOneOut()
results = {}

for arch_name, configs in architectures.items():
    for hidden_sizes in configs:
        print(f"\n>>> Starting {arch_name} architecture: {hidden_sizes}")
        val_losses = []

        for fold, (train_idx, val_idx) in enumerate(loo.split(X_standardized), start=1):
            print(f"   Fold {fold}/{len(X_standardized)} ...", end="\r")
            X_train, X_val = X_standardized[train_idx], X_standardized[val_idx]
            y_train, y_val = y_standardized[train_idx], y_standardized[val_idx]

            loss_val = train_model(
                X_train, y_train, X_val, y_val,
                hidden_sizes,
                sigma_v=sigma_v_sfm, sigma_u=sigma_u_sfm,
                epochs=300, lr=0.005,
                dropout=0.1, weight_decay=1e-4
            )
            val_losses.append(loss_val)

        avg_loss = np.mean(val_losses)
        results[(arch_name, tuple(hidden_sizes))] = avg_loss
        print(f"Architecture {arch_name}, {hidden_sizes}: LOOCV Loss = {avg_loss:.4f}")

# ------------------ Print sorted results ------------------
results_sorted = sorted(results.items(), key=lambda x: x[1])
print("\nBest architectures by LOOCV:")
for arch, loss_val in results_sorted:
    print(arch, "->", loss_val)



>>> Starting 2_layers architecture: [32, 16]
Architecture 2_layers, [32, 16]: LOOCV Loss = 0.2113

>>> Starting 2_layers architecture: [32, 8]
Architecture 2_layers, [32, 8]: LOOCV Loss = 0.5879

>>> Starting 2_layers architecture: [16, 8]
Architecture 2_layers, [16, 8]: LOOCV Loss = 0.3158

>>> Starting 2_layers architecture: [8, 4]
Architecture 2_layers, [8, 4]: LOOCV Loss = 0.3527

>>> Starting 2_layers architecture: [4, 4]
Architecture 2_layers, [4, 4]: LOOCV Loss = 0.6421

>>> Starting 3_layers architecture: [16, 8, 4]
Architecture 3_layers, [16, 8, 4]: LOOCV Loss = nan

>>> Starting 3_layers architecture: [8, 4, 2]
Architecture 3_layers, [8, 4, 2]: LOOCV Loss = nan

>>> Starting 3_layers architecture: [4, 2, 2]
Architecture 3_layers, [4, 2, 2]: LOOCV Loss = 0.6833

Best architectures by LOOCV:
('2_layers', (32, 16)) -> 0.21126475283872634
('2_layers', (16, 8)) -> 0.31583843751947577
('2_layers', (8, 4)) -> 0.35271253015515525
('2_layers', (32, 8)) -> 0.5879406441546421
('2_layer