# Evidential deep learning

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from scipy.stats import spearmanr
import scipy
import pickle
from rdkit import Chem
from rdkit.Chem import AllChem

## Step 1: Split the Dataset

In [None]:
# Load the dataset
data = pd.read_csv(r'chemprop-confidence-evidential\data\Tg_UQ\Tg.csv')

# Split the data into train, test, and validation sets
train_data, temp_data = train_test_split(data, test_size=0.2, random_state=11)
test_data, val_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Save the datasets
train_data.to_csv(r'chemprop-confidence-evidential\data\Tg_UQ\MFF\Tg_train.csv', index=False)
test_data.to_csv(r'chemprop-confidence-evidential\data\Tg_UQ\MFF\Tg_test.csv', index=False)
val_data.to_csv(r'chemprop-confidence-evidential\data\Tg_UQ\MFF\Tg_validation.csv', index=False)

### Data processing for MFF

In [None]:
Corr_df = pickle.load(open("../data/Tg_UQ/MFF/Corr_All.pickle","rb"))
unique_list = pickle.load(open("../data/Tg_UQ/MFF/unique_list_All.pickle","rb"))
Columns = pickle.load(open("../data/Tg_UQ/MFF/Columns_All.pickle","rb"))
Substructure_list = pickle.load(open("../data/Tg_UQ/MFF/polymer.keys_All.pickle","rb"))

data_train = pd.read_csv('../data/Tg_UQ/MFF/Tg_train.csv')

molecules = data_train.Smiles.apply(Chem.MolFromSmiles)
fp = molecules.apply(lambda m: AllChem.GetMorganFingerprint(m, radius=3))
fp_n = fp.apply(lambda m: m.GetNonzeroElements())
MY_finger = []
for polymer in fp_n:
    my_finger = [0] * len(unique_list)
    for key in polymer.keys():
        if key in list(Corr_df[0]):
            index = Corr_df[Corr_df[0] == key]['index'].values[0]
            my_finger[index] = polymer[key]         
    MY_finger.append(my_finger)
X_train = pd.DataFrame(MY_finger)
X_train = X_train[Columns]

X_train.to_csv("../data/Tg_UQ/MFF/train_MFF.csv", index=False)

data_test = pd.read_csv('../data/Tg_UQ/MFF/Tg_test.csv')

molecules = data_test.Smiles.apply(Chem.MolFromSmiles)
fp = molecules.apply(lambda m: AllChem.GetMorganFingerprint(m, radius=3))
fp_n = fp.apply(lambda m: m.GetNonzeroElements())
MY_finger = []
for polymer in fp_n:
    my_finger = [0] * len(unique_list)
    for key in polymer.keys():
        if key in list(Corr_df[0]):
            index = Corr_df[Corr_df[0] == key]['index'].values[0]
            my_finger[index] = polymer[key]         
    MY_finger.append(my_finger)
X_test = pd.DataFrame(MY_finger)
X_test = X_test[Columns]

X_test.to_csv("../data/Tg_UQ/MFF/test_MFF.csv", index=False)


In [None]:
print(X_test)

# 1.5 Hyperparameter optimization

## optimization

python hyperparameter_optimization.py --data_path data/Tg_UQ/MF/Tg_train.csv --dataset_type regression --num_iters 100 --config_save_path configs/Tg_MF_configs

## train

python train.py --data_path <data_path> --dataset_type <type> --config_path <config_path>

## Step 2:Train the Model

### MFF

python train.py --confidence evidence --epochs 250 --new_loss --regularizer_coeff 0.2 --save_dir results/evidence_demo --save_confidence conf.txt --confidence_evaluation_methods cutoff --split_type random --split_sizes 0.8 0.1 0.1 --seed 0 --dataset_type regression --data_path data/Tg_UQ/MFF/Tg_train.csv --features_path data/Tg_UQ/MFF/train_MFF.csv --no_features_scaling

## Step 3: Run Prediction
### command line and need to change the name of folder every time: for the model


## 3.1 training set

### MFF

python predict.py --test_path data/Tg_UQ/MFF/Tg_train.csv --features_path data/Tg_UQ/MFF/train_MFF.csv --preds_path results/Tg_UQ/MFF/Tg_train_result.csv --checkpoint_path results\evidence_demo\240201-210537535451_Tg_train_evidence_MFF\fold_0\model_0\model.pt

## 3.2 test set

### MFF
python predict.py --test_path data/Tg_UQ/MFF/Tg_test.csv --features_path data/Tg_UQ/MFF/test_MFF.csv --preds_path results/Tg_UQ/MFF/Tg_test_result.csv --checkpoint_path results\evidence_demo\240201-210537535451_Tg_train_evidence_MFF\fold_0\model_0\model.pt

### 3.3 OOD_ME data set

### MFF

In [None]:
# Load the dataset
data_OOD = pd.read_csv(r'chemprop-confidence-evidential\data\Tg_UQ\MF\Tg_OOD_ME.csv')

molecules = data_OOD.Smiles.apply(Chem.MolFromSmiles)
fp = molecules.apply(lambda m: AllChem.GetMorganFingerprint(m, radius=3))
fp_n = fp.apply(lambda m: m.GetNonzeroElements())
MY_finger = []
for polymer in fp_n:
    my_finger = [0] * len(unique_list)
    for key in polymer.keys():
        if key in list(Corr_df[0]):
            index = Corr_df[Corr_df[0] == key]['index'].values[0]
            my_finger[index] = polymer[key]         
    MY_finger.append(my_finger)
X_OOD = pd.DataFrame(MY_finger)
X_EXP = X_OOD[Columns]

X_EXP.to_csv("data/Tg_UQ/MFF/OOD_EXP_MFF.csv", index=False)

data_OOD = pd.read_csv(r'chemprop-confidence-evidential\data\Tg_UQ\MF\Tg_OOD_MD.csv')

molecules = data_OOD.Smiles.apply(Chem.MolFromSmiles)
fp = molecules.apply(lambda m: AllChem.GetMorganFingerprint(m, radius=3))
fp_n = fp.apply(lambda m: m.GetNonzeroElements())
MY_finger = []
for polymer in fp_n:
    my_finger = [0] * len(unique_list)
    for key in polymer.keys():
        if key in list(Corr_df[0]):
            index = Corr_df[Corr_df[0] == key]['index'].values[0]
            my_finger[index] = polymer[key]         
    MY_finger.append(my_finger)
X_OOD = pd.DataFrame(MY_finger)
X_MD = X_OOD[Columns]

X_MD.to_csv("../data/Tg_UQ/MFF/OOD_MD_MFF.csv", index=False)



In [None]:
data_OOD = pd.read_csv(r'chemprop-confidence-evidential\data\Tg_UQ\MFF\high_Tg.csv')

molecules = data_OOD.Smiles.apply(Chem.MolFromSmiles)
fp = molecules.apply(lambda m: AllChem.GetMorganFingerprint(m, radius=3))
fp_n = fp.apply(lambda m: m.GetNonzeroElements())
MY_finger = []
for polymer in fp_n:
    my_finger = [0] * len(unique_list)
    for key in polymer.keys():
        if key in list(Corr_df[0]):
            index = Corr_df[Corr_df[0] == key]['index'].values[0]
            my_finger[index] = polymer[key]         
    MY_finger.append(my_finger)
X_OOD = pd.DataFrame(MY_finger)
X_MD = X_OOD[Columns]

X_MD.to_csv("../data/Tg_UQ/MFF/high_Tg_MFF.csv", index=False)

### Prediction

### EXP

python predict.py --test_path data/Tg_UQ/MFF/Tg_OOD_ME.csv --features_path data/Tg_UQ/MFF/OOD_EXP_MFF.csv --preds_path results/Tg_UQ/MFF/Tg_OOD_EXP_result.csv --checkpoint_path results\evidence_demo\240201-210537535451_Tg_train_evidence_MFF\fold_0\model_0\model.pt

### MD

python predict.py --test_path data/Tg_UQ/MFF/Tg_OOD_MD.csv --features_path data/Tg_UQ/MFF/OOD_MD_MFF.csv --preds_path results/Tg_UQ/MFF/Tg_OOD_MD_result.csv --checkpoint_path results\evidence_demo\240201-210537535451_Tg_train_evidence_MFF\fold_0\model_0\model.pt

### High Tg

python predict.py --test_path data/Tg_UQ/MFF/high_Tg.csv --features_path data/Tg_UQ/MFF/high_Tg_MFF.csv --preds_path results/Tg_UQ/MFF/Tg_high_Tg_result.csv --checkpoint_path results\evidence_demo\240201-210537535451_Tg_train_evidence_MFF\fold_0\model_0\model.pt

## Step 4: Postprocessing

### 4.1 results processing

In [None]:

# train data
# Load predictions and actual values
train_result = pd.read_csv(r'chemprop-confidence-evidential\results\Tg_UQ\MFF\Tg_train_result.csv')
# Drop rows with any empty cells
train_result.dropna(inplace=True)
# Rename the columns as per the new names
train_result.columns = ['smiles', 'Tg', 'true_Tg', 'uncertainty', 'std']
ytrain = train_result['true_Tg']
mean_train = train_result['Tg']
std_train = train_result['std']

# test data
# Load predictions and actual values
test_result = pd.read_csv(r'chemprop-confidence-evidential\results\Tg_UQ\MFF\Tg_test_result.csv')
# Drop rows with any empty cells
test_result.dropna(inplace=True)
# Rename the columns as per the new names
test_result.columns = ['smiles', 'Tg', 'true_Tg', 'uncertainty', 'std']
ytest = test_result['true_Tg']
mean_test = test_result['Tg']
std_test = test_result['std']

### 4.2 postprocessing

In [None]:
# Calculate absolute errors and Spearman's Rank Correlation Coefficient for the training set
abs_error_train = abs(ytrain - mean_train)
spearman_corr_train, p_value_train = spearmanr(abs_error_train, std_train)

# Calculate absolute errors and Spearman's Rank Correlation Coefficient for the test set
abs_error_test = abs(ytest - mean_test)
spearman_corr_test, p_value_test = spearmanr(abs_error_test, std_test)

# Organize the results in a dictionary
spearman_results = {
    'Spearman_Correlation': [spearman_corr_train, spearman_corr_test],
    'p_value': [p_value_train, p_value_test]
}

# Convert the dictionary to a DataFrame with 'Train' and 'Test' as index
spearman_df = pd.DataFrame(spearman_results, index=['Train', 'Test'])

# Display the DataFrame
spearman_df

In [None]:
# Create the plot
plt.figure(figsize=(5, 5))
plt.scatter(abs_error_train, std_train, alpha=0.5)

# Add labels and title
plt.xlabel('Absolute Error (train)', fontsize=14)
plt.ylabel('Standard Deviation (train)', fontsize=14)
plt.title('Absolute Error vs Standard Deviation (train Set)', fontsize=16)

# Optionally, add grid for better readability
plt.grid(True)

# Show the plot+
plt.show()

In [None]:
# Create the plot
plt.figure(figsize=(5, 5))
plt.scatter(abs_error_test, std_test, alpha=0.5)

# Add labels and title
plt.xlabel('Absolute Error (Test)', fontsize=14)
plt.ylabel('Standard Deviation (Test)', fontsize=14)
plt.title('Absolute Error vs Standard Deviation (test Set)', fontsize=16)

# Optionally, add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

In [None]:
# Ensure that ytest, mean_predictions, and std_dev_predictions are 1D arrays
ytrain_1d = np.ravel(ytrain)
ytest_1d = np.ravel(ytest)
mean_train_1d = np.ravel(mean_train)
std_train_1d = np.ravel(std_train)
mean_test_1d = np.ravel(mean_test)
std_test_1d = np.ravel(std_test)

# Metric calculation
mae_train = mean_absolute_error(ytrain_1d, mean_train_1d)
rmse_train = np.sqrt(mean_squared_error(ytrain_1d, mean_train_1d))
r2_train = r2_score(ytrain_1d, mean_train_1d)

mae_test = mean_absolute_error(ytest_1d, mean_test_1d)
rmse_test = np.sqrt(mean_squared_error(ytest_1d, mean_test_1d))
r2_test = r2_score(ytest_1d, mean_test_1d)

# Organize the metrics into a dictionary with three keys for the three metrics
metrics = {
    'MAE': [mae_train, mae_test],
    'RMSE': [rmse_train, rmse_test],
    'R2': [r2_train, r2_test]
}

# Convert the dictionary to a DataFrame
metrics_df = pd.DataFrame(metrics, index=['Train', 'Test'])

# Display the DataFrame
metrics_df

In [None]:
# Set up the matplotlib figure with two subplots: one for train and one for test
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(11, 5))

# Plotting for the training set on the left subplot
axes[0].errorbar(ytrain_1d, mean_train_1d, 
                 yerr=std_train_1d, 
                 fmt='o', ecolor='lightgray', mec='blue', mfc='skyblue', 
                 alpha=0.7, capsize=5, label='Train Prediction')

# Plot a line for perfect predictions for reference
axes[0].plot(ytrain_1d, ytrain_1d, 'r--', label='Perfect predictions')

# Define the ticks for the x and y axes
axes[0].set_xticks(np.arange(-100, 401, 100))
axes[0].set_yticks(np.arange(-100, 401, 100))

# Add labels and title
axes[0].set_xlabel('Actual Values', fontsize=14)
axes[0].set_ylabel('Predicted Values', fontsize=14)
axes[0].set_title('Training Set Predictions - Tg', fontsize=14)

# Add legend
axes[0].legend(fontsize=14)

# Plotting for the test set on the right subplot
axes[1].errorbar(ytest_1d, mean_test_1d, 
                 yerr=std_test_1d, 
                 fmt='o', ecolor='lightblue', mec='green', mfc='lightgreen', 
                 alpha=0.7, capsize=5, label='Test Prediction')

# Plot a line for perfect predictions for reference
axes[1].plot(ytest_1d, ytest_1d, 'r--', label='Perfect predictions')

# Define the ticks for the x and y axes
axes[1].set_xticks(np.arange(-100, 401, 100))
axes[1].set_yticks(np.arange(-100, 401, 100))

# Add labels and title
axes[1].set_xlabel('Actual Values', fontsize=14)
axes[1].set_title('Test Set Predictions - Tg', fontsize=14)

# Add legend
axes[1].legend(fontsize=14)

# Improve the layout
plt.tight_layout()

# Show the plot
plt.show()

### Evaluation of Uncertainty

In [None]:

# Assuming x(c) is an array of confidence levels from 0 to 1 at intervals of 0.01
confidence_levels = np.arange(0, 1.01, 0.01)

# Function to calculate the observed confidence
def calculate_observed_confidence(y_true, mean_pred, std_pred, z_value):
    lower_bound = mean_pred - z_value * std_pred / 2
    upper_bound = mean_pred + z_value * std_pred / 2
    return np.mean((y_true >= lower_bound) & (y_true <= upper_bound))

# Calculate the z-scores for the given confidence levels (two-tailed)
z_scores = [scipy.stats.norm.ppf((1 + cl) / 2) for cl in confidence_levels]

# Calculate the observed confidence for each z-score
observed_confidence = [calculate_observed_confidence(ytrain, mean_train, 1*std_train, z) for z in z_scores]

# Plot the calibration curve
plt.figure(figsize=(6, 5))
plt.plot(confidence_levels, observed_confidence, label='Calibration curve')
plt.plot(confidence_levels, confidence_levels, 'k--', label='Perfect calibration')
plt.xlabel('Expected confidence')
plt.ylabel('Observed confidence')
plt.legend()
plt.show()

# Save the observed confidence data to a CSV file
EDL_calibration_data = pd.DataFrame({
    'Expected_Confidence': confidence_levels,
    'EDL_Observed_Confidence_Train': observed_confidence
})


In [None]:
# Calculate the observed confidence for each z-score
observed_confidence = [calculate_observed_confidence(ytest, mean_test, 1*std_test, z) for z in z_scores]

# Plot the calibration curve
plt.figure(figsize=(6, 5))
plt.plot(confidence_levels, observed_confidence, label='Calibration curve')
plt.plot(confidence_levels, confidence_levels, 'k--', label='Perfect calibration')
plt.xlabel('Expected confidence')
plt.ylabel('Observed confidence')
plt.legend()
plt.show()

# Save the data
EDL_calibration_data['EDL_Observed_Confidence_Test'] = observed_confidence

### Sparsification plots

In [None]:
# Function to calculate RMSE using mean_squared_error from sklearn
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

# Step 1: Sort samples by descending order of predictive uncertainty (standard deviation)
sorted_indices = np.argsort(-std_train)
sorted_ytrain = ytrain[sorted_indices]
sorted_mean_train = mean_train[sorted_indices]

# Step 2 and 3: Remove subsets of samples and calculate RMSE
rmse_values_train = []
fractions = np.arange(0, 1.00, 0.001)  # From 2% to 98% in steps of 2%

for fraction in fractions:
    # Calculate the number of samples to remove
    num_to_remove = int(fraction * len(sorted_ytrain))
    # Calculate RMSE on the remaining samples
    remaining_rmse = rmse(sorted_ytrain[num_to_remove:], sorted_mean_train[num_to_remove:])
    rmse_values_train.append(remaining_rmse)

# Step 4: Plot the error metric vs. fraction of removed samples
plt.figure(figsize=(6, 5))
plt.plot(fractions, rmse_values_train, marker='o')
plt.xlabel('Fraction of samples removed')
plt.ylabel('RMSE on remaining samples')
plt.title('Sparsification Plot')
plt.show()

# Define dataFrame
EDL_Sparsification_data = pd.DataFrame({
    'Sparsification_fractions': fractions,
    'EDL_rmse_values_Train': rmse_values_train
})

In [None]:
# Step 1: Sort samples by descending order of predictive uncertainty (standard deviation)
sorted_indices_test = np.argsort(-std_test)
sorted_ytest = ytest[sorted_indices_test]
sorted_mean_test = mean_test[sorted_indices_test]

# Step 2 and 3: Remove subsets of samples and calculate RMSE
rmse_values_test = []

for fraction in fractions:
    # Calculate the number of samples to remove
    num_to_remove = int(fraction * len(sorted_ytest))
    # Calculate RMSE on the remaining samples
    remaining_rmse_test = rmse(sorted_ytest[num_to_remove:], sorted_mean_test[num_to_remove:])
    rmse_values_test.append(remaining_rmse_test)

# Step 4: Plot the error metric vs. fraction of removed samples
plt.figure(figsize=(6, 5))
plt.plot(fractions, rmse_values_test, marker='o')
plt.xlabel('Fraction of samples removed')
plt.ylabel('RMSE on remaining samples')
plt.title('Sparsification Plot')
plt.show()

# Save the data
EDL_Sparsification_data['EDL_rmse_values_Test'] = rmse_values_test

### OOD_ME data postprocessing

In [None]:
# OOD data
# Load predictions and actual values
OOD_result = pd.read_csv(r'chemprop-confidence-evidential\results\Tg_UQ\MFF\Tg_OOD_EXP_result.csv')
# Drop rows with any empty cells
OOD_result.dropna(inplace=True)
# Rename the columns as per the new names
OOD_result.columns = ['smiles', 'Tg', 'true_Tg', 'uncertainty', 'std']
y_OOD = OOD_result['true_Tg']
mean_OOD = OOD_result['Tg']
std_OOD = OOD_result['std']

print(mean_OOD.shape)

In [None]:
# Calculate Spearman's Rank Correlation Coefficient for OOD data
abs_error_OOD = abs(y_OOD - mean_OOD)
spearman_corr_OOD, p_value_OOD = spearmanr(abs_error_OOD, std_OOD)

# Organize the results in a dictionary
spearman_results_OOD = {
    'Spearman_Correlation': [spearman_corr_OOD],
    'P_value': [p_value_OOD]
}

# Convert the dictionary to a DataFrame
spearman_df_OOD = pd.DataFrame(spearman_results_OOD, index=['OOD'])

# Display the DataFrame
spearman_df_OOD

In [None]:
# Create the plot
plt.figure(figsize=(6, 5))
plt.scatter(abs_error_OOD, std_OOD, alpha=0.5)

# Add labels and title
plt.xlabel('Absolute Error (OOD)', fontsize=14)
plt.ylabel('Standard Deviation (OOD)', fontsize=14)
plt.title('Absolute Error vs Standard Deviation (OOD)', fontsize=16)

# Optionally, add grid for better readability
plt.grid(True)

# Show the plot
plt.show()

In [None]:
# Ensure that ytest, mean_predictions, and std_dev_predictions are 1D arrays
y_OOD_1d = np.ravel(y_OOD)
mean_OOD_1d = np.ravel(mean_OOD)
std_OOD_1d = np.ravel(std_OOD)

# Metric calculation
mae_OOD = mean_absolute_error(y_OOD_1d, mean_OOD_1d)
rmse_OOD = np.sqrt(mean_squared_error(y_OOD_1d, mean_OOD_1d))
r2_OOD = r2_score(y_OOD_1d, mean_OOD_1d)

# Organize the metrics into a dictionary with three keys for the three metrics
metrics_OOD = {
    'MAE': mae_OOD,
    'RMSE': rmse_OOD,
    'R2': r2_OOD
}

# Convert the dictionary to a DataFrame
metrics_OOD_df = pd.DataFrame(metrics_OOD, index=['OOD test'])

# Display the DataFrame
metrics_OOD_df

In [None]:
# Create a figure for the OOD set plot
fig, ax = plt.subplots(figsize=(5.5, 5))

# Plotting for the test set
ax.errorbar(y_OOD_1d, mean_OOD_1d, 
            yerr=std_OOD_1d, 
            fmt='o', ecolor='lightblue', mec='green', mfc='lightgreen', 
            alpha=0.7, capsize=5, label='OOD data Prediction')

# Plot a line for perfect predictions for reference
ax.plot(y_OOD_1d, y_OOD_1d, 'r--', label='Perfect predictions')

# Define the ticks for the x and y axes
ax.set_xticks(np.arange(-100, 401, 100))
ax.set_yticks(np.arange(-100, 401, 100))

# Add labels and title
ax.set_xlabel('Actual Values', fontsize=14)
ax.set_ylabel('Predicted Values', fontsize=14)
ax.set_title('OOD data Predictions - Tg', fontsize=14)

# Add legend
ax.legend(fontsize=14)

# Improve the layout
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Calculate the observed confidence for each z-score
observed_confidence = [calculate_observed_confidence(y_OOD, mean_OOD, 1*std_OOD, z) for z in z_scores]

# Plot the calibration curve
plt.figure(figsize=(6, 5))
plt.plot(confidence_levels, observed_confidence, label='Calibration curve')
plt.plot(confidence_levels, confidence_levels, 'k--', label='Perfect calibration')
plt.xlabel('Expected confidence')
plt.ylabel('Observed confidence')
plt.legend()
plt.show()

# Save the data
EDL_calibration_data['EDL_Observed_Confidence_OOD_EXP'] = observed_confidence

### Sparsification plots

In [None]:
# Function to calculate RMSE using mean_squared_error from sklearn
# Step 1: Sort samples by descending order of predictive uncertainty (standard deviation)
sorted_indices_OOD = np.argsort(-std_OOD)
sorted_y_OOD = y_OOD[sorted_indices_OOD]
sorted_mean_OOD = mean_OOD[sorted_indices_OOD]

# Step 2 and 3: Remove subsets of samples and calculate RMSE
rmse_values_OOD = []

for fraction in fractions:
    # Calculate the number of samples to remove
    num_to_remove = int(fraction * len(sorted_y_OOD))
    # Calculate RMSE on the remaining samples
    remaining_rmse_OOD = rmse(sorted_y_OOD[num_to_remove:], sorted_mean_OOD[num_to_remove:])
    rmse_values_OOD.append(remaining_rmse_OOD)

# Step 4: Plot the error metric vs. fraction of removed samples
plt.figure(figsize=(6, 5))
plt.plot(fractions, rmse_values_OOD, marker='o')
plt.xlabel('Fraction of samples removed')
plt.ylabel('RMSE on remaining samples')
plt.title('Sparsification Plot')
plt.show()

# Save the data
EDL_Sparsification_data['EDL_rmse_values_OOD_EXP'] = rmse_values_OOD

## OOD_MD data set

In [None]:
# OOD data
# Load predictions and actual values
OOD_MD_result = pd.read_csv(r'D:\htang\OneDrive - UW-Madison\Research\UQ\Tg\UQ_model\chemprop-confidence-evidential\results\Tg_UQ\MFF\Tg_OOD_MD_result.csv')
# Drop rows with any empty cells
OOD_MD_result.dropna(inplace=True)
# Rename the columns as per the new names
OOD_MD_result.columns = ['smiles', 'Tg', 'true_Tg', 'uncertainty', 'std']
y_OOD_MD = OOD_MD_result['true_Tg']
mean_OOD_MD = OOD_MD_result['Tg']
std_OOD_MD = OOD_MD_result['std']

print(mean_OOD_MD.shape)

In [None]:
# Ensure that ytest, mean_predictions, and std_dev_predictions are 1D arrays
y_OOD_MD_1d = np.ravel(y_OOD_MD)
mean_OOD_MD_1d = np.ravel(mean_OOD_MD)
std_OOD_MD_1d = np.ravel(std_OOD_MD)

# Metric calculation
mae_OOD_MD = mean_absolute_error(y_OOD_MD_1d, mean_OOD_MD_1d)
rmse_OOD_MD = np.sqrt(mean_squared_error(y_OOD_MD_1d, mean_OOD_MD_1d))
r2_OOD_MD = r2_score(y_OOD_MD_1d, mean_OOD_MD_1d)

# Organize the metrics into a dictionary with three keys for the three metrics
metrics_OOD_MD = {
    'MAE': mae_OOD_MD,
    'RMSE': rmse_OOD_MD,
    'R2': r2_OOD_MD
}

# Convert the dictionary to a DataFrame
metrics_OOD_MD_df = pd.DataFrame(metrics_OOD_MD, index=['OOD_MD test'])

# Display the DataFrame
metrics_OOD_MD_df

In [None]:
# Create a figure for the OOD set plot
fig, ax = plt.subplots(figsize=(5.5, 5))

# Plotting for the test set
ax.errorbar(y_OOD_MD_1d, mean_OOD_MD_1d, 
            yerr=std_OOD_MD_1d, 
            fmt='o', ecolor='lightblue', mec='green', mfc='lightgreen', 
            alpha=0.7, capsize=5, label='OOD_MD data Prediction')

# Plot a line for perfect predictions for reference
ax.plot(y_OOD_MD_1d, y_OOD_MD_1d, 'r--', label='Perfect predictions')

# Define the ticks for the x and y axes
ax.set_xticks(np.arange(-100, 401, 100))
ax.set_yticks(np.arange(-100, 401, 100))

# Add labels and title
ax.set_xlabel('Actual Values', fontsize=14)
ax.set_ylabel('Predicted Values', fontsize=14)
ax.set_title('OOD data Predictions - Tg', fontsize=14)

# Add legend
ax.legend(fontsize=14)

# Improve the layout
plt.tight_layout()

# Show the plot
plt.show()


In [None]:
# Calculate Spearman's Rank Correlation Coefficient for OOD data
abs_error_OOD = abs(y_OOD_MD - mean_OOD_MD)
spearman_corr_OOD, p_value_OOD = spearmanr(abs_error_OOD, std_OOD_MD)

# Organize the results in a dictionary
spearman_results_OOD = {
    'Spearman_Correlation': [spearman_corr_OOD],
    'P_value': [p_value_OOD]
}

# Convert the dictionary to a DataFrame
spearman_df_OOD = pd.DataFrame(spearman_results_OOD, index=['OOD'])

# Display the DataFrame
spearman_df_OOD

In [None]:
# Calculate the observed confidence for each z-score
observed_confidence = [calculate_observed_confidence(y_OOD_MD, mean_OOD_MD, 1*std_OOD_MD, z) for z in z_scores]

# Plot the calibration curve
plt.figure(figsize=(6, 5))
plt.plot(confidence_levels, observed_confidence, label='Calibration curve')
plt.plot(confidence_levels, confidence_levels, 'k--', label='Perfect calibration')
plt.xlabel('Expected confidence')
plt.ylabel('Observed confidence')
plt.legend()
plt.show()

# Save the data
EDL_calibration_data['EDL Observed_Confidence OOD_MD'] = observed_confidence

In [None]:
# Function to calculate RMSE using mean_squared_error from sklearn
# Step 1: Sort samples by descending order of predictive uncertainty (standard deviation)
sorted_indices_OOD_MD = np.argsort(-std_OOD_MD)
sorted_y_OOD_MD = y_OOD_MD[sorted_indices_OOD_MD]
sorted_mean_OOD_MD = mean_OOD_MD[sorted_indices_OOD_MD]

# Step 2 and 3: Remove subsets of samples and calculate RMSE
rmse_values_OOD_MD = []

for fraction in fractions:
    # Calculate the number of samples to remove
    num_to_remove = int(fraction * len(sorted_y_OOD_MD))
    # Calculate RMSE on the remaining samples
    remaining_rmse_OOD_MD = rmse(sorted_y_OOD_MD[num_to_remove:], sorted_mean_OOD_MD[num_to_remove:])
    rmse_values_OOD_MD.append(remaining_rmse_OOD_MD)

# Step 4: Plot the error metric vs. fraction of removed samples
plt.figure(figsize=(6, 5))
plt.plot(fractions, rmse_values_OOD_MD, marker='o')
plt.xlabel('Fraction of samples removed')
plt.ylabel('RMSE on remaining samples')
plt.title('Sparsification Plot')
#plt.grid(True)
plt.show()

# Save the data
EDL_Sparsification_data['EDL_rmse_values_OOD_MD'] = rmse_values_OOD_MD

In [None]:

# Save to CSV
EDL_calibration_data.to_csv("results/Tg_UQ/MFF/EDL_calibration_data.csv", index=False)

EDL_calibration_data

In [None]:
EDL_Sparsification_data.to_csv("results/Tg_UQ/MFF/EDL_Sparsification_data.csv", index=False)
EDL_Sparsification_data 

### High Tg

In [None]:
# OOD data
# Load predictions and actual values
OOD_result = pd.read_csv(r'chemprop-confidence-evidential\results\Tg_UQ\MFF\Tg_high_Tg_result.csv')
# Drop rows with any empty cells
OOD_result.dropna(inplace=True)
# Rename the columns as per the new names
OOD_result.columns = ['smiles', 'Tg', 'true_Tg', 'uncertainty', 'std']
y_OOD = OOD_result['true_Tg']
mean_OOD = OOD_result['Tg']
std_OOD = OOD_result['std']

print(mean_OOD.shape)

In [None]:
# Ensure that ytest, mean_predictions, and std_dev_predictions are 1D arrays
y_OOD_1d = np.ravel(y_OOD)
mean_OOD_1d = np.ravel(mean_OOD)
std_OOD_1d = np.ravel(std_OOD)

# Metric calculation
mae_OOD = mean_absolute_error(y_OOD_1d, mean_OOD_1d)
rmse_OOD = np.sqrt(mean_squared_error(y_OOD_1d, mean_OOD_1d))
r2_OOD = r2_score(y_OOD_1d, mean_OOD_1d)

# Organize the metrics into a dictionary with three keys for the three metrics
metrics_OOD = {
    'MAE': mae_OOD,
    'RMSE': rmse_OOD,
    'R2': r2_OOD
}

# Convert the dictionary to a DataFrame
metrics_OOD_df = pd.DataFrame(metrics_OOD, index=['OOD test'])

# Display the DataFrame
metrics_OOD_df

In [None]:
import pandas as pd
import numpy as np

ci_multiplier = 1.96  # Multiplier for a 95% confidence interval in a normal distribution
lower_bound = mean_OOD - ci_multiplier * std_OOD
upper_bound = mean_OOD + ci_multiplier * std_OOD

# Create a DataFrame with the results
df = pd.DataFrame({
    'mean_OOD': mean_OOD,
    'std_OOD': std_OOD,
    '95% CI Lower': lower_bound,
    '95% CI Upper': upper_bound
})

# Output the DataFrame
print(df)

excel_file_path = 'results/Tg_UQ/MFF/high_Tg_EDL.csv'  # Path where the Excel file will be saved
df.to_csv(excel_file_path, index=False)

In [None]:
font_size = 16
# Create a figure for the OOD set plot
# plt.figure(figsize=(7, 6), dpi=1200)
fig, ax = plt.subplots(figsize=(5, 4.5), dpi=1200)
# Plotting for the test set
ax.errorbar(y_OOD_1d, mean_OOD_1d, 
            yerr=std_OOD_1d, 
            fmt='o', ecolor='lightblue', mec='green', mfc='lightgreen', 
            alpha=0.7, capsize=5, label='High Tg Prediction')

# Plot a line for perfect predictions for reference
ax.plot((250, 520), (250, 520), 'r--', label='Perfect predictions')

# Define the ticks for the x and y axes
ax.set_xticks(np.arange(250, 520, 40))
ax.set_yticks(np.arange(250, 520, 40))

# Add labels and title
ax.set_xlabel('Actual Values', fontsize=16, weight='bold')
ax.set_ylabel('Predicted Values', fontsize=16, weight='bold')
# Fixing the fontsize setting for ticks
ax.tick_params(axis='both', which='major', labelsize=16)
plt.rc('font', weight='bold')
plt.rc('axes', linewidth=2)
# Add legend
ax.legend(fontsize=font_size, frameon=False)


# Improve the layout
plt.tight_layout()
plt.savefig('../results/Tg_UQ/MFF/EDL.png', format='png', bbox_inches='tight')
# Show the plot
plt.show()