In [None]:
import time
import numpy as np
import torch
from sklearn.neural_network import MLPRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from scipy.interpolate import CubicSpline
from sklearn.metrics import mean_squared_error
import tracemalloc  # For memory usage

def benchmark_models(data_loader, model_type='mlp'):
    mse_list = []
    total_time = 0
    max_memory_usage = 0

    for batch in data_loader:
        # Extract the data from the batch
        input_surface = batch['Input Surface']
        query_points = batch['Query Points']

        X_train = np.column_stack((input_surface['Log Moneyness'].numpy(), 
                                   input_surface['Time to Maturity'].numpy()))
        y_train = input_surface['Total Variance'].numpy()

        X_test = np.column_stack((query_points['Log Moneyness'].numpy(), 
                                  query_points['Time to Maturity'].numpy()))
        y_test = query_points['Total Variance'].numpy()

        # Start memory and time tracking
        tracemalloc.start()
        start_time = time.time()

        if model_type == 'mlp':
            model = MLPRegressor(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
        elif model_type == 'gpr':
            kernel = C(1.0, (1e-4, 1e1)) * RBF(1, (1e-4, 1e1))
            model = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10, random_state=42)
        elif model_type == 'quadratic':
            poly = PolynomialFeatures(degree=2)
            X_train_poly = poly.fit_transform(X_train)
            model = LinearRegression()
            model.fit(X_train_poly, y_train)
            X_test_poly = poly.transform(X_test)
            y_pred = model.predict(X_test_poly)
        elif model_type == 'cubic_spline':
            cs = CubicSpline(X_train[:, 0], y_train, bc_type='natural')  # Univariate cubic spline
            y_pred = cs(X_test[:, 0])
        else:
            raise ValueError("Unsupported model type.")

        if model_type in ['mlp', 'gpr']:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)

        # Calculate the MSE
        mse = mean_squared_error(y_test, y_pred)
        mse_list.append(mse)

        # End memory and time tracking
        current_memory, peak_memory = tracemalloc.get_traced_memory()
        elapsed_time = time.time() - start_time

        tracemalloc.stop()

        total_time += elapsed_time
        max_memory_usage = max(max_memory_usage, peak_memory)

    # Aggregate results
    avg_mse = np.mean(mse_list)
    print(f"Model: {model_type.upper()}")
    print(f"Average MSE: {avg_mse:.6f}")
    print(f"Total Computation Time: {total_time:.2f} seconds")
    print(f"Max Memory Usage: {max_memory_usage / 1024:.2f} KB")

    return avg_mse, total_time, max_memory_usage


# Example usage for the benchmark methods
pre_train_data_loader_test = ...  # Replace with your actual data loader
fine_tune_data_loader_test = ...  # Replace with your actual data loader

# MLP
benchmark_models(pre_train_data_loader_test, model_type='mlp')

# Gaussian Process Regression
benchmark_models(pre_train_data_loader_test, model_type='gpr')

# Quadratic Regression
benchmark_models(pre_train_data_loader_test, model_type='quadratic')

# Cubic Spline
benchmark_models(pre_train_data_loader_test, model_type='cubic_spline')
