In [6]:
# data processing from https://www.kaggle.com/code/mohamedabdelaziz187/california-housing-prices-eda-preprocess-ml-dl
import matplotlib.pyplot as plt
import pandas as pd
from data_utils import splitter, seed_all, normalize
import numpy as np
import torch
from src.models import vanilla_predNet, MC_dropnet, Deep_Ensemble
from src.losses import *
from Experiments.EXP1.TestPerform import testPerform_muSigma



df = pd.read_csv('Dataset/CaliforniaHousing/housing.csv')
df = df.dropna(axis = 0)

# log transformation 
t = 9e-1
df['total_rooms'] = np.log(df['total_rooms'] + t)
df['total_bedrooms'] = np.log(df['total_bedrooms'] + t)
df['population']  = np.log(df['population'] +t)
df['households'] = np.log(df['households'] + t)
df['total_rooms'] = np.log(df['total_rooms'] + t)

for column in df.drop(columns=['ocean_proximity','median_house_value' ]).columns:
    df[column] = (df[column] - np.mean(df[column])) / np.std(df[column])
    
df = pd.get_dummies(df)

x = np.array(df.drop(columns = ['median_house_value']).values)
y = np.array(df.median_house_value.values) / 1E4





SEED = 5678

seed_all(SEED)

# x = x_normed

N_train = int(len(x) * 0.9)
N_test = int(len(x) * 0.1)

tr_idx = np.arange(len(x))[:N_train]

te_idx = np.arange(len(x))[N_train:N_train+N_test]


test_X, test_Y = x[te_idx], y[te_idx]



x_reshaped, y_reshaped = x[tr_idx], y[tr_idx]


N_model_train = int(len(y_reshaped) * 0.7)
N_recalibration = int(len(y_reshaped) * 0.3)

tr_new_idx, recal_idx = splitter(N_model_train, N_recalibration, seed = SEED)


recal_X = x_reshaped[recal_idx]
recal_Y = y_reshaped[recal_idx]



x_remain, y_remain = x_reshaped[tr_new_idx], y_reshaped[tr_new_idx]


split = 0.8
train_idx, val_idx = splitter(int(split * len(y_remain)), len(y_remain) - int(split * len(y_remain)), seed = SEED)


train_X, train_Y = x_remain[train_idx], y_remain[train_idx]
val_X, val_Y = x_remain[val_idx], y_remain[val_idx]

n_feature = x.shape[1]

epochs = 300


train_X = torch.Tensor(train_X)
train_Y = torch.Tensor(train_Y).view(-1).cuda()
val_X = torch.Tensor(val_X)
val_Y = torch.Tensor(val_Y).view(-1).cuda()
test_X = torch.Tensor(test_X)
test_Y = torch.Tensor(test_Y).view(-1).cuda()

recal_X = torch.Tensor(recal_X)
recal_Y = torch.Tensor(recal_Y).view(-1).cuda()

In [10]:
x.shape

(20433, 13)

# DeepEnsembleBenchmark

In [9]:


# benchmark, deep ensebmle
hidden = [100, 50]

print("Training model with hidden: ", hidden)
deepEnsemble_model = Deep_Ensemble(
    n_input = n_feature,
    hidden_layers = hidden
)
deepEnsemble_model.train(
    train_X, train_Y, val_X, val_Y,
    bat_size = 64,
    LR = 5E-3,

    N_Epoch = epochs,
    validate_times = 20,
    verbose = True,
    train_loss = mean_std_forEnsemble,
    val_loss_criterias = {
        "nll" : mean_std_forEnsemble,
        "rmse": rmse_loss,
        "MACE": MACE_muSigma
    },
    early_stopping = True,
    patience = 20,
    monitor_name = "rmse"
)

record = testPerform_muSigma(test_X, test_Y, model_name= "DeepEnsemble", model = deepEnsemble_model)

print(record)

Training model with hidden:  [100, 50]
epoch  0
     loss: nll, 2.836230993270874
     loss: rmse, 11.289331436157227
     loss: MACE, 0.11837279796600342
epoch  15
     loss: nll, 2.1526119709014893
     loss: rmse, 6.147111415863037
     loss: MACE, 0.040913332253694534
epoch  30
     loss: nll, 2.068626880645752
     loss: rmse, 5.739211559295654
     loss: MACE, 0.03142993524670601
epoch  45
     loss: nll, 2.0333800315856934
     loss: rmse, 5.54274845123291
     loss: MACE, 0.039781469851732254
Early Stopped at Epoch  52
{'rmse_loss': 4.988325595855713, 'mean_std_norm_loss': 1.9994369745254517, 'MACE_muSigma': 0.04316180944442749, 'AGCE_muSigma': 0.056550152599811554, 'CheckScore_muSigma': 1.23610520362854}


# Regression Net

In [7]:
hidden = [100, 50]

print("Training model with hidden: ", hidden)
pred_model = vanilla_predNet(
    n_input = n_feature,
    hidden_layers = hidden
)
pred_model.train(
    train_X, train_Y, val_X, val_Y,
    bat_size = 64,
    LR = 5E-3,

    N_Epoch = epochs,
    validate_times = 20,
    verbose = True,
    train_loss = mse_loss,
    val_loss_criterias = {
        "mse": mse_loss,
        "rmse": rmse_loss,
    },
    early_stopping = True,
    patience = 20,
    monitor_name = "rmse"
)

rmse_loss(pred_model(test_X), test_Y)

Training model with hidden:  [100, 50]
epoch  0
     loss: mse, 43.080753326416016
     loss: rmse, 6.563592910766602
epoch  15
     loss: mse, 32.859764099121094
     loss: rmse, 5.732343673706055
epoch  30
     loss: mse, 29.50026512145996
     loss: rmse, 5.431414604187012
epoch  45
     loss: mse, 28.786014556884766
     loss: rmse, 5.365260124206543
Early Stopped at Epoch  52


tensor(5.0316, device='cuda:0', grad_fn=<SqrtBackward0>)

# Post processing

In [8]:
from Experiments.EXP1.TestPerform import testPerform_projKernel
from sklearn import random_projection

n_component = 13
transformer = random_projection.GaussianRandomProjection(n_components = n_component)
reformer = lambda x : torch.Tensor(transformer.fit_transform(x.cpu().numpy()))

for width in [5, 13, 26, 60]:
    print(width)
    
    record = testPerform_projKernel(
        test_X, test_Y, recal_X, recal_Y, 
        model_name = "vanillaKernel_RandomProj", model= pred_model, reformer= reformer, wid = width) 
    
    print(record)

5
{'MACE_Loss': 0.029458556324243546, 'AGCE_Loss': 0.05447420850396156, 'CheckScore': 1.2995365858078003}
13
{'MACE_Loss': 0.03027963638305664, 'AGCE_Loss': 0.0443401001393795, 'CheckScore': 1.3043346405029297}
26
{'MACE_Loss': 0.030641019344329834, 'AGCE_Loss': 0.04697367921471596, 'CheckScore': 1.3042606115341187}
60
{'MACE_Loss': 0.030587175861001015, 'AGCE_Loss': 0.06699167191982269, 'CheckScore': 1.3043314218521118}
