In [None]:
import sys
sys.path.append('../utils')

import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import time

from data_processing import load_and_process_data
from models import DropoutModel, train_mcd, predict_mcd
from metrics import calculate_metrics, calculate_observed_confidence
from post_processing import create_errorbar_plot, plot_abs_error_vs_std, plot_std_histogram, plot_calibration_curve

# Hyperparameters
BATCH_SIZE = 1
DROPOUT_RATE = 0.13
N_EPOCHS = 500
T = 300
LEARNING_RATE = 0.00011
num_zero_threshold = 3600
n_1, n_2, n_3 = 530, 602, 471

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load and process data
file_path = "../data/other_property/Tm.csv"
X_count, Y = load_and_process_data(file_path, num_zero_threshold)

In [None]:
# Split the data
xtrain, xtemp, ytrain, ytemp = train_test_split(X_count, Y, test_size=0.2, random_state=11)
xval, xtest, yval, ytest = train_test_split(xtemp, ytemp, test_size=0.5, random_state=42)

# Create DataLoaders
def create_dataloader(x, y, batch_size):
    tensor_x = torch.FloatTensor(x.values).to(device)
    tensor_y = torch.FloatTensor(y).unsqueeze(1).to(device)
    dataset = torch.utils.data.TensorDataset(tensor_x, tensor_y)
    return torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False)

train_loader = create_dataloader(xtrain, ytrain, BATCH_SIZE)
val_loader = create_dataloader(xval, yval, BATCH_SIZE)
test_loader = create_dataloader(xtest, ytest, BATCH_SIZE)

# Initialize model
model = DropoutModel(xtrain.shape[1], n_1, n_2, n_3, DROPOUT_RATE).to(device)

# Train the model
start_time = time.time()
train_mcd(model, train_loader, N_EPOCHS, LEARNING_RATE)
end_time = time.time()
print(f"Training took {end_time - start_time:.2f} seconds.")

In [None]:
# Make predictions
mean_train, std_train = predict_mcd(model, train_loader, T)
mean_test, std_test = predict_mcd(model, test_loader, T)
# Ensure predictions and standard deviations are 1D arrays
mean_train = mean_train.flatten()
std_train = std_train.flatten()
mean_test = mean_test.flatten()
std_test = std_test.flatten()

# Calculate metrics
confidence_levels = np.arange(0, 1.05, 0.05)
train_metrics = calculate_metrics(ytrain, mean_train, std_train, confidence_levels)
test_metrics = calculate_metrics(ytest, mean_test, std_test, confidence_levels)

# Create a DataFrame to store the metrics
metrics_df = pd.DataFrame({
    'Dataset': ['Training', 'Test'],
    'MAE': [train_metrics['MAE'], test_metrics['MAE']],
    'RMSE': [train_metrics['RMSE'], test_metrics['RMSE']],
    'R2': [train_metrics['R2'], test_metrics['R2']],
    'Spearman': [train_metrics['Spearman'], test_metrics['Spearman']],
    'Calibration Area': [train_metrics['Calibration Area'], test_metrics['Calibration Area']]
})

# Print the DataFrame
print(metrics_df)

In [None]:
# Plot results
create_errorbar_plot(ytrain, mean_train, std_train, 'blue', 'Training')
create_errorbar_plot(ytest, mean_test, std_test, 'green', 'Test')

In [None]:
# Plot additional figures
abs_error_train = np.abs(ytrain - mean_train)
abs_error_test = np.abs(ytest - mean_test)

plot_abs_error_vs_std(abs_error_train, std_train, 'Training', 'blue')
plot_abs_error_vs_std(abs_error_test, std_test, 'Test', 'green')

In [None]:
plot_std_histogram(std_train, 'Training', 'blue')
plot_std_histogram(std_test, 'Test', 'green')

In [None]:
# Calculate observed confidence
observed_confidence_train = calculate_observed_confidence(ytrain, mean_train, std_train, confidence_levels)
observed_confidence_test = calculate_observed_confidence(ytest, mean_test, std_test, confidence_levels)

# Plot calibration curves
plot_calibration_curve(confidence_levels, observed_confidence_train, 'Training')
plot_calibration_curve(confidence_levels, observed_confidence_test, 'Test')