In [1]:
import sys
import time
import random
from os.path import exists
from typing import *

# Common data science imports
import numpy as np
import pandas as pd
import torch
from torch.utils.data import random_split, DataLoader, Dataset
from sklearn.cluster import KMeans

# Visualization tools
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from seaborn import heatmap
from tqdm import tqdm, notebook as tqdm_notebook

# GPyTorch and linear_operator imports
import gpytorch
from gpytorch.kernels import ScaleKernel, RBFKernel
from gpytorch.constraints import *
import linear_operator
from linear_operator.settings import max_cholesky_size
from linear_operator.operators.dense_linear_operator import DenseLinearOperator
from linear_operator.utils.cholesky import psd_safe_cholesky

# Custom soft GP and MLL imports
from gp.soft_gp.soft_gp import SoftGP
from gp.soft_gp.mll import HutchinsonPseudoLoss
from linear_solver.cg import linear_cg

# Data analysis and UCI dataset

# Utility functions for dataset handling
from gp.util import flatten_dataset, split_dataset, filter_param

# Experiment tracking
import wandb

# System path adjustments
sys.path.append("../")

In [2]:
def eval_gp(model, test_dataset: Dataset, device="cuda:0") -> float:
    preds = []
    neg_mlls = []
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False,num_workers=1)
    for x_batch, y_batch in test_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        preds += [(model.pred(x_batch) - y_batch).detach().cpu()**2]
        neg_mlls += [-model.mll(x_batch, y_batch).detach().cpu()]
    rmse = torch.sqrt(torch.sum(torch.cat(preds)) / len(test_dataset)).item()
    neg_mll = torch.sum(torch.tensor(neg_mlls))
            
    print("RMSE:", rmse, "NEG_MLL", neg_mll.item(), "NOISE", model.noise.cpu().item(), "LENGTHSCALE", model.get_lengthscale(), "OUTPUTSCALE", model.get_outputscale())# "T",model.T)
    
    return {
        "rmse": rmse,
        "nll": neg_mll,
    }   
   

In [3]:
#==================Dataset============================
from data.get_uci import ElevatorsDataset,PoleteleDataset,ProteinDataset
# # dataset = ElevatorsDataset("../data/uci_datasets/uci_datasets/elevators/data.csv")
# dataset = PoleteleDataset("../data/uci_datasets/uci_datasets/pol/data.csv")
dataset = ProteinDataset("../data/uci_datasets/uci_datasets/protein/data.csv")
# dataset = CTSlicesDataset("../data/uci_datasets/uci_datasets/slice/data.csv")
train_dataset, val_dataset, test_dataset = split_dataset(
    dataset,
    train_frac=9/10, #TODO change to real vals 
    val_frac=0/10
)

def plot_results(all_mean_rmse, all_mean_runtimes, all_std_rmse, all_std_runtimes, epochs,  legend_names):
    fig, axes = plt.subplots(1, 2, figsize=(12, 6))

    epochs_range = range(1, epochs + 1)

    # Plot RMSE per Epoch for each model (baseline + CG tolerances)
    for i in range(len(legend_names)):
        label = legend_names[i]  # Use the provided names from legend_names list
        axes[0].plot(epochs_range, all_mean_rmse[i], label=label)
        
        # Fill between the RMSE values for standard deviation
        axes[0].fill_between(epochs_range,
                             [m - s for m, s in zip(all_mean_rmse[i], all_std_rmse[i])],
                             [m + s for m, s in zip(all_mean_rmse[i], all_std_rmse[i])],
                             alpha=0.3)

    axes[0].set_title('RMSE per Epoch')
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('RMSE')
    axes[0].legend()

    # Plot Training Time per Epoch for each model (baseline + CG tolerances)
    for i in range(len(legend_names)):
        label = legend_names[i]  # Use the provided names from legend_names list
        axes[1].plot(epochs_range, all_mean_runtimes[i], label=label)
        
        # Fill between the runtime values for standard deviation
        axes[1].fill_between(epochs_range,
                             [m - s for m, s in zip(all_mean_runtimes[i], all_std_runtimes[i])],
                             [m + s for m, s in zip(all_mean_runtimes[i], all_std_runtimes[i])],
                             alpha=0.3)

    axes[1].set_title('Training Time per Epoch')
    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Time (s)')
    # axes[1].legend()

    plt.tight_layout()
    plt.savefig('protein_solvers.png')
    plt.show()
    

def train_gp(GP_class, inducing_points, test_dataset, train_features, train_labels, epochs, device, dtype, model_config=None):
    print(device)
    print(inducing_points.device)
    model_config = model_config or {}
    kernel = RBFKernel().to(device=device, dtype=dtype)
    # kernel = RBFKernel()
    learn_noise = model_config.get("learn_noise", False)
    lr = model_config.get("learning_rate", 0.01)
    batch_size = model_config.get("batch_size", 1024)

    model = GP_class(
        kernel,
        inducing_points,
        noise=model_config.get("noise", 1e-3),
        learn_noise=learn_noise,
        use_scale=model_config.get("use_scale", True),
        dtype=dtype,
        device=device,
        max_cg_iter=1000,
        solver=model_config.get("solver", "solve"),
        mll_approx=model_config.get("mll_approx", "hutchinson"),
        fit_chunk_size=model_config.get("fit_chunk_size", 1024),
        use_qr=model_config.get("use_qr", True),
        hutch_solver = model_config.get("hutch_solver", "solve"),
    )


    epoch_runtimes = []
    epoch_rmse = []

    # pbar = tqdm(range(epochs), desc="Optimizing MLL")
    if learn_noise:
        params = model.parameters()
    else:
        params = filter_param(model.named_parameters(), "likelihood.noise_covar.raw_noise")
    optimizer = torch.optim.Adam(params, lr=lr)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    def train_model():
        #==================Train============================
        for _ in tqdm(range(epochs)):
            print("training current epoch")
            epoch_start_time = time.time()

            for x_batch, y_batch in train_loader:
                x_batch = x_batch.clone().detach().to(dtype=dtype, device=device)
                y_batch = y_batch.clone().detach().to(dtype=dtype, device=device)
                optimizer.zero_grad()
                with gpytorch.settings.max_root_decomposition_size(100), max_cholesky_size(int(1.e7)), gpytorch.settings.max_preconditioner_size(15):
                    neg_mll = -model.mll(x_batch, y_batch)
                neg_mll.backward()
                optimizer.step()
            model.fit(train_features, train_labels)
            epoch_end_time = time.time()
            epoch_runtimes.append(epoch_end_time - epoch_start_time)

            #==================Evaluate============================
            print("Running eval")
            eval_results = eval_gp(model, test_dataset, device=device)
            epoch_rmse.append(eval_results['rmse'])
            print("eval finished")    
    train_model()
    return epoch_rmse, epoch_runtimes

SIZE (45730, 10)


In [4]:
def benchmark(train_dataset, test_dataset, epochs=2, seed=42, N=3, configs=None):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    num_inducing = 512
    dtype = torch.float32
    device = "cuda:1"
    
    all_mean_rmse = []
    all_mean_runtimes = []
    all_std_rmse = []
    all_std_runtimes = []

    #==================Inducing Points============================
    train_features, train_labels = flatten_dataset(train_dataset)
    kmeans = KMeans(n_clusters=num_inducing)
    kmeans.fit(train_features)
    centers = kmeans.cluster_centers_
    inducing_points = torch.tensor(centers).to(dtype=dtype, device=device)

    if configs is None:
        raise ValueError("You must provide a list of configurations in 'configs'")

    for config in configs:
        all_runs_rmse = []
        all_runs_runtimes = []

        solver_name = config.get("solver", "Unknown Solver")
        print(f"Running model with solver '{solver_name}'")

        for run in range(N):
            epoch_rmse, epoch_runtimes = train_gp(
                SoftGP,
                inducing_points,
                test_dataset,
                train_features,
                train_labels,
                epochs,
                device,
                dtype,
                model_config=config  # Pass current config
            )
            all_runs_rmse.append(epoch_rmse)
            all_runs_runtimes.append(epoch_runtimes)

        # Calculate mean and std deviation across the N runs for the current configuration
        mean_rmse = np.mean(all_runs_rmse, axis=0)
        std_rmse = np.std(all_runs_rmse, axis=0)
        mean_runtimes = np.mean(all_runs_runtimes, axis=0)
        std_runtimes = np.std(all_runs_runtimes, axis=0)

        all_mean_rmse.append(mean_rmse)
        all_mean_runtimes.append(mean_runtimes)
        all_std_rmse.append(std_rmse)
        all_std_runtimes.append(std_runtimes)

    return all_mean_rmse, all_mean_runtimes, all_std_rmse, all_std_runtimes

### CG Fit Testing 


In [5]:
configs = [
    {"solver": "cg", "hutch_solver": "solve","cg_tolerance": 1e-4,"use_qr": False},
    {"solver": "cg", "hutch_solver": "solve","cg_tolerance": 1e-3,"use_qr": False},
    {"solver": "cg", "hutch_solver": "solve","cg_tolerance": 1e-2,"use_qr": False},
    {"solver": "cg", "hutch_solver": "solve","cg_tolerance": 1e-1,"use_qr": False},
    {"solver": "solve", "hutch_solver": "solve","cg_tolerance": 1e-4,"use_qr": True},
    {"solver": "cholesky", "hutch_solver": "solve","use_qr": False},
    {"solver": "solve", "hutch_solver": "solve","use_qr": False},
]
legend_names = ['QR Solve', 'CG Solver 1e-4','CG Solver 1e-3','CG Solver 1e-2','CG Solver 1e-1','Cholesky Solver',"Direct Solver"]

epochs = 50
N = 1 # Number of runs
all_mean_rmse, all_mean_runtimes, all_std_rmse, all_std_runtimes = benchmark(train_dataset, test_dataset, epochs=epochs, seed=6535, N=N, configs=configs)

  0%|          | 0/6 [00:00<?, ?it/s]

Running model with solver 'cg'
cuda:1
cuda:1
Using softmax_interp


  0%|          | 0/50 [00:00<?, ?it/s]

training current epoch
USING PRECONDITIONER
Running eval


  2%|▏         | 1/50 [00:04<04:04,  4.99s/it]

RMSE: 0.7888864278793335 NEG_MLL -23808.74609375 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.5775]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(0.9086, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


  4%|▍         | 2/50 [00:09<03:41,  4.62s/it]

RMSE: 0.777450680732727 NEG_MLL -21494.501953125 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.6220]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(1.1509, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


  6%|▌         | 3/50 [00:13<03:22,  4.31s/it]

RMSE: 0.7741975784301758 NEG_MLL -19945.498046875 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.6307]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(1.4051, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


  8%|▊         | 4/50 [00:17<03:12,  4.19s/it]

RMSE: 0.7806186079978943 NEG_MLL -18787.7578125 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.6237]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(1.6664, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


 10%|█         | 5/50 [00:21<03:05,  4.12s/it]

RMSE: 0.7725435495376587 NEG_MLL -17924.861328125 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.6077]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(1.9269, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


 12%|█▏        | 6/50 [00:25<03:00,  4.10s/it]

RMSE: 0.7834170460700989 NEG_MLL -17095.306640625 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.6014]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(2.1895, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


 14%|█▍        | 7/50 [00:32<03:33,  4.97s/it]

RMSE: 0.771926999092102 NEG_MLL -16516.91796875 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.5778]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(2.4506, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


 16%|█▌        | 8/50 [00:37<03:40,  5.26s/it]

RMSE: 0.7870786786079407 NEG_MLL -15981.48046875 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.5661]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(2.7107, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch
USING PRECONDITIONER
Running eval


 18%|█▊        | 9/50 [00:41<03:17,  4.81s/it]

RMSE: 0.7860366106033325 NEG_MLL -15365.314453125 NOISE 0.0010000000474974513 LENGTHSCALE tensor([[0.5604]], grad_fn=<ToCopyBackward0>) OUTPUTSCALE tensor(2.9656, grad_fn=<ToCopyBackward0>)
eval finished
training current epoch


 18%|█▊        | 9/50 [00:43<03:20,  4.88s/it]


KeyboardInterrupt: 

In [7]:
def plot_results2(all_mean_rmse, all_mean_runtimes, all_std_rmse, all_std_runtimes, epochs,  legend_names):
    fig, axes = plt.subplots(1, 1, figsize=(6, 6))

    epochs_range = range(1, epochs + 1)

    # Plot RMSE per Epoch for each model (baseline + CG tolerances)
    for i in range(len(legend_names)):
        label = legend_names[i]  # Use the provided names from legend_names list
        if "CG" in label:
            style = "-"
        elif "Cholesky" in label:
            style = "--"
        else:
            style = ":"
        if "QR" in label:
            width = 3
        elif "CG" in label:
            width = 1
        else:
            width = 1
        axes.plot(epochs_range, all_mean_rmse[i], label=label, linestyle=style, linewidth=width)
        
        # Fill between the RMSE values for standard deviation
        axes.fill_between(epochs_range,
                             [m - s for m, s in zip(all_mean_rmse[i], all_std_rmse[i])],
                             [m + s for m, s in zip(all_mean_rmse[i], all_std_rmse[i])],
                             alpha=0.3)

    axes.set_title('RMSE per Epoch')
    axes.set_xlabel('Epoch', fontsize=12)
    axes.set_ylabel('Test RMSE', fontsize=12)
    axes.tick_params(axis='both', which='major', labelsize=12)
    axes.legend()

    plt.tight_layout()
    plt.savefig('protein_solvers.png')
    plt.show()

In [None]:
legend_names = ['CG Solver 1e-4','CG Solver 1e-3','CG Solver 1e-2','CG Solver 1e-1', 'QR Solve', 'Cholesky Solver',"Direct Solver"]
# tolerance_values = [None,1e-4, 1e-3, 1e-2, 1e-1]
plot_results2(all_mean_rmse, all_mean_runtimes, all_std_rmse, all_std_runtimes, epochs, legend_names)