In [None]:
import sys
sys.path.append('../utils')

# Import necessary libraries
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import time

# Import custom modules
from data_processing import load_and_process_data
from models import BayesianNeuralNetwork, train_bnn, predict_bnn
from metrics import calculate_metrics, calculate_observed_confidence
from post_processing import create_errorbar_plot, plot_abs_error_vs_std, plot_std_histogram, plot_calibration_curve

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [None]:
# Define hyperparameters
HYPERPARAMETERS = {
    'batch_size': 45,
    'hidden_layers': [656, 956, 87],
    'epochs': 650,
    'learning_rate': 0.00014,
    'weight_init_std': 0.11525,
    'log_std_init_mean': -3.0908,
    'log_std_init_std': 0.32564,
    'log_std_clamp': (-5.83775, 4.93684),
    'grad_clip_norm': 1.0,
    'n_samples_uncertainty': 500,
    'num_zero_threshold': 3600
}

# Load and process data
file_path = "../data/other_property/Tm.csv"  # Update with your file path
X_count, Y = load_and_process_data(file_path, HYPERPARAMETERS['num_zero_threshold'])

# Split data into training, validation, and test sets
xtrain, xtemp, ytrain, ytemp = train_test_split(X_count, Y, test_size=0.2, random_state=11)
xval, xtest, yval, ytest = train_test_split(xtemp, ytemp, test_size=0.5, random_state=42)

# Convert data to PyTorch tensors and move to device
xtrain_tensor = torch.tensor(xtrain.values).float().to(device)
ytrain_tensor = torch.tensor(ytrain).float().to(device)
xval_tensor = torch.tensor(xval.values).float().to(device)
yval_tensor = torch.tensor(yval).float().to(device)
xtest_tensor = torch.tensor(xtest.values).float().to(device)
ytest_tensor = torch.tensor(ytest).float().to(device)

# Create DataLoader for training data
train_data = TensorDataset(xtrain_tensor, ytrain_tensor)
train_loader = DataLoader(dataset=train_data, batch_size=HYPERPARAMETERS['batch_size'], shuffle=True)

# Initialize and train the BNN model
model = BayesianNeuralNetwork(
    n_features=xtrain.shape[1],
    hidden_layers=HYPERPARAMETERS['hidden_layers'],
    weight_init_std=HYPERPARAMETERS['weight_init_std'],
    log_std_init_mean=HYPERPARAMETERS['log_std_init_mean'],
    log_std_init_std=HYPERPARAMETERS['log_std_init_std'],
    log_std_clamp=HYPERPARAMETERS['log_std_clamp']
)

# Train the model
start_time = time.time()

model = model.to(device)
train_bnn(model, train_loader, HYPERPARAMETERS['epochs'], HYPERPARAMETERS['learning_rate'], HYPERPARAMETERS['grad_clip_norm'])

end_time = time.time()

# Print elapsed time
elapsed_time = end_time - start_time
print(f"Training took {elapsed_time:.2f} seconds.")

In [None]:

# Make predictions with uncertainty on training and test data
mean_train, std_train = predict_bnn(model, xtrain_tensor.to(device), HYPERPARAMETERS['n_samples_uncertainty'])
mean_test, std_test = predict_bnn(model, xtest_tensor.to(device), HYPERPARAMETERS['n_samples_uncertainty'])

# Convert predictions to numpy for metric calculation
mean_train = mean_train.cpu().numpy()
std_train = std_train.cpu().numpy()
mean_test = mean_test.cpu().numpy()
std_test = std_test.cpu().numpy()

# Calibration curve
confidence_levels = np.arange(0, 1.05, 0.05)

# Calculate metrics
train_metrics = calculate_metrics(ytrain, mean_train, std_train, confidence_levels)
test_metrics = calculate_metrics(ytest, mean_test, std_test, confidence_levels)

# Create a DataFrame to store the metrics
metrics_df = pd.DataFrame({
    'Dataset': ['Training', 'Test'],
    'MAE': [train_metrics['MAE'], test_metrics['MAE']],
    'RMSE': [train_metrics['RMSE'], test_metrics['RMSE']],
    'R2': [train_metrics['R2'], test_metrics['R2']],
    'Spearman': [train_metrics['Spearman'], test_metrics['Spearman']],
    'Calibration Area': [train_metrics['Calibration Area'], test_metrics['Calibration Area']]
})

# Print the DataFrame
print(metrics_df)

In [None]:
# Plot results
create_errorbar_plot(ytrain, mean_train, std_train, 'blue', 'Training')
create_errorbar_plot(ytest, mean_test, std_test, 'green', 'Test')

In [None]:
# Plot additional figures
abs_error_train = np.abs(ytrain - mean_train)
abs_error_test = np.abs(ytest - mean_test)

plot_abs_error_vs_std(abs_error_train, std_train, 'Training', 'blue')
plot_abs_error_vs_std(abs_error_test, std_test, 'Test', 'green')

In [None]:
plot_std_histogram(std_train, 'Training', 'blue')
plot_std_histogram(std_test, 'Test', 'green')

In [None]:
# Calculate observed confidence
observed_confidence_train = calculate_observed_confidence(ytrain, mean_train, std_train, confidence_levels)
observed_confidence_test = calculate_observed_confidence(ytest, mean_test, std_test, confidence_levels)

# Plot calibration curves
plot_calibration_curve(confidence_levels, observed_confidence_train, 'Training')
plot_calibration_curve(confidence_levels, observed_confidence_test, 'Test')