In [9]:
import sys
sys.path.append('../functions')

# Import necessary libraries
import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import time
from sklearn.metrics import r2_score, mean_absolute_error

# Import custom modules
from data_processing import load_and_process_data
from models import QR_Model, quantile_loss, predictions_QR
from metrics import calculate_metrics, calculate_observed_confidence
from post_processing import create_errorbar_plot, plot_abs_error_vs_std, plot_std_histogram, plot_calibration_curve

In [None]:
# Hyperparameters
BATCH_SIZE = 32
EPOCHS = 800
HIDDEN_LAYERS = [1024, 256, 128]
LEARNING_RATE = 0.000015
QUANTILES = [0.1, 0.5, 0.9]  # We'll use 0.5 as median (our prediction) and 0.1, 0.9 to create uncertainty bounds
num_zero_threshold = 3600

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load and process data
file_path = "../data/other_property/Tm.csv"
X_count, Y = load_and_process_data(file_path, num_zero_threshold)

In [2]:
# Split the data
xtrain, xtemp, ytrain, ytemp = train_test_split(X_count, Y, test_size=0.2, random_state=11)
xval, xtest, yval, ytest = train_test_split(xtemp, ytemp, test_size=0.5, random_state=42)

# Create DataLoaders
def create_dataloader(x, y, batch_size):
    tensor_x = torch.FloatTensor(x.values).to(device)
    tensor_y = torch.FloatTensor(y).to(device)
    dataset = torch.utils.data.TensorDataset(tensor_x, tensor_y)
    return torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False)

train_loader = create_dataloader(xtrain, ytrain, BATCH_SIZE)
val_loader = create_dataloader(xval, yval, BATCH_SIZE)
test_loader = create_dataloader(xtest, ytest, BATCH_SIZE)

In [None]:
# Initialize model
input_shape = xtrain.shape[1]
model = QR_Model(input_shape, QUANTILES).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Training loop
start_time = time.time()

for epoch in range(EPOCHS):
    model.train()
    epoch_loss = 0
    
    for batch_x, batch_y in train_loader:
        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
        
        # Forward pass
        predictions = model(batch_x)
        loss = quantile_loss(predictions, batch_y, QUANTILES)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}, Loss: {epoch_loss:.4f}')

total_time = time.time() - start_time
print(f'Training completed in {total_time:.2f} seconds')

In [None]:
# Make predictions
mean_train, std_train = predictions_QR(model, train_loader)
mean_test, std_test = predictions_QR(model, test_loader)

# Calculate metrics
confidence_levels = np.arange(0, 1.05, 0.05)
train_metrics = calculate_metrics(ytrain, mean_train, std_train, confidence_levels)
test_metrics = calculate_metrics(ytest, mean_test, std_test, confidence_levels)

# Create a DataFrame to store the metrics
metrics_df = pd.DataFrame({
    'Dataset': ['Training', 'Test'],
    'MAE': [train_metrics['MAE'], test_metrics['MAE']],
    'RMSE': [train_metrics['RMSE'], test_metrics['RMSE']],
    'R2': [train_metrics['R2'], test_metrics['R2']],
    'Spearman': [train_metrics['Spearman'], test_metrics['Spearman']],
    'Calibration Area': [train_metrics['Calibration Area'], test_metrics['Calibration Area']]
})

# Print the DataFrame
print(metrics_df)

In [None]:
# Plot results
create_errorbar_plot(ytrain, mean_train, std_train, 'blue', 'Training')
create_errorbar_plot(ytest, mean_test, std_test, 'green', 'Test')

In [None]:
# Plot additional figures
abs_error_train = np.abs(ytrain - mean_train)
abs_error_test = np.abs(ytest - mean_test)

plot_abs_error_vs_std(abs_error_train, std_train, 'Training', 'blue')
plot_abs_error_vs_std(abs_error_test, std_test, 'Test', 'green')

In [None]:
plot_std_histogram(std_train, 'Training', 'blue')
plot_std_histogram(std_test, 'Test', 'green')

In [None]:
# Calculate observed confidence
observed_confidence_train = calculate_observed_confidence(ytrain, mean_train, std_train, confidence_levels)
observed_confidence_test = calculate_observed_confidence(ytest, mean_test, std_test, confidence_levels)

# Plot calibration curves
plot_calibration_curve(confidence_levels, observed_confidence_train, 'Training')
plot_calibration_curve(confidence_levels, observed_confidence_test, 'Test')