In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from tqdm import tqdm
import math
from utils import setup_seed,heteroscedastic_loss,extract_coords_to_csv,random_mini_batches


In [2]:
class UA_CNN(nn.Module):
    def __init__(self):
        super(UA_CNN, self).__init__()#N*2*4096
        
        self.conv1 = nn.Conv1d(in_channels=1,out_channels=2,kernel_size=3,padding=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool1d(2,2)#N*4*2048
        self.conv2 = nn.Conv1d(in_channels=2,out_channels=4,kernel_size=3,padding=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool1d(2,2)#N*4*1024
        self.conv3 = nn.Conv1d(in_channels=4,out_channels=2,kernel_size=3,padding=1)
        self.relu3 = nn.ReLU()
        self.maxpool3 = nn.MaxPool1d(2,2)#N*2*512
        self.conv4 = nn.Conv1d(in_channels=2,out_channels=1,kernel_size=3,padding=1)
        self.relu4 = nn.ReLU()
        self.maxpool4 = nn.MaxPool1d(2,2)#N*1*256
#        self.dropout_layer = nn.Dropout(0.1)
        self.fc1 = nn.Linear(256,64)
        self.relu5 = nn.ReLU()
        self.fc2 = nn.Linear(64,32)
        self.relu6 = nn.ReLU()
        self.fc3 = nn.Linear(32,16)
        self.relu7 = nn.ReLU()
        self.fc4 = nn.Linear(16,8)
        self.relu8 = nn.ReLU()
        self.fc5 = nn.Linear(8,1)
        
        self.fc1_var = nn.Linear(256,64)
        self.relu5_var = nn.ReLU()
        self.fc2_var = nn.Linear(64,32)
        self.relu6_var = nn.ReLU()
        self.fc3_var = nn.Linear(32,16)
        self.relu7_var = nn.ReLU()
        self.fc4_var = nn.Linear(16,8)
        self.relu8_var = nn.ReLU()
        self.fc5_var = nn.Linear(8,1)
         
    
    def forward(self, x):
        out = self.maxpool1(self.relu1(self.conv1(x)))
        out = self.maxpool2(self.relu2(self.conv2(out)))
        out = self.maxpool3(self.relu3(self.conv3(out)))
        out_vector = self.maxpool4(self.relu4(self.conv4(out)))
#        out = self.dropout_layer(out)
        out = self.fc1(out_vector)
        out = self.relu5(out)
        out = self.fc2(out)
        out = self.relu6(out)
        out = self.fc3(out)
        out = self.relu7(out)
        out = self.fc4(out)
        out = self.relu8(out)
        out = self.fc5(out)
        
        out_var = self.fc1_var(out_vector)
        out_var = self.relu5_var(out_var)
        out_var = self.fc2_var(out_var)
        out_var = self.relu6_var(out_var)
        out_var = self.fc3_var(out_var)
        out_var = self.relu7_var(out_var)
        out_var = self.fc4_var(out_var)
        out_var = self.relu8_var(out_var)
        out_var = self.fc5_var(out_var)
        
        return out, out_vector, out_var

In [None]:
import os
import datetime
parent_dir = './'
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = os.path.join(parent_dir, f'results/uncertainty/{timestamp}')
os.makedirs(results_dir, exist_ok=True)
log_file_path = os.path.join(results_dir, f'training_log_{timestamp}.txt')
# Function to log and print messages
def log_and_print(message, log_file_path):
    print(message)
    with open(log_file_path, 'a') as log_file:
        log_file.write(message + '\n')

physical_para = pd.read_csv('./data/934label-16_no-header.csv', header=None, low_memory=False)
XRD_descriptor = pd.read_csv('./data/XRD_descriptor_936.csv', header=None, low_memory=False)
data = pd.concat([XRD_descriptor, physical_para.iloc[:,0]], axis=1)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
heteroscedastic_loss_coefficient = 1e-3
grid_points = [[12,52]]
results_all = []
LR = 0.001
epochs = 1000
mb_size = 50
r2_record = -math.inf
seeds = [1992, 30, 27, 35, 81]
seeds_len = 5
indices = data.index
train_indices, test_indices = train_test_split(indices, test_size=0.2, random_state=0)
x_train = data.iloc[train_indices, :4096]
x_test = data.iloc[test_indices, :4096]
y_train = data.iloc[train_indices, 4096]
y_test = data.iloc[test_indices, 4096]

y_test_ = torch.tensor(y_test.values) 
idx_test = torch.nonzero(y_test_.squeeze() != 0, as_tuple=False)
x_test_len = len(idx_test)
log_and_print(f"x_test_len: {x_test_len}", log_file_path)

x_train = torch.from_numpy(x_train.values).float().to(device)
x_test = torch.from_numpy(x_test.values).float().to(device)
y_train = torch.from_numpy(y_train.values).float().to(device)
y_test = torch.from_numpy(y_test.values).float().to(device)

x_train = torch.unsqueeze(x_train, 1)
x_test = torch.unsqueeze(x_test, 1)

sum_preds = np.zeros((x_test_len, 1))
sum_ale_uncs = np.zeros((x_test_len, 1))
sum_epi_uncs = np.zeros((x_test_len, 1))
all_preds = np.zeros((x_test_len, 1, seeds_len))

for grid_point in tqdm(grid_points):
    results=[]
    result_r2_test = pd.DataFrame(np.zeros([10,11]),columns=['seed','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10'])
    result_mae_test = pd.DataFrame(np.zeros([10,11]),columns=['seed','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10'])
    result_loss_test = pd.DataFrame(np.zeros([10,11]),columns=['seed','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10'])
    result_r2_train = pd.DataFrame(np.zeros([10,11]),columns=['seed','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10'])
    result_mae_train = pd.DataFrame(np.zeros([10,11]),columns=['seed','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10'])
    result_loss_train = pd.DataFrame(np.zeros([10,11]),columns=['seed','F1','F2','F3','F4','F5','F6','F7','F8','F9','F10'])
    for i_seed in tqdm(range(seeds_len), position=0, leave=True): # Use seeds_len to match the initialization
        seed = seeds[i_seed] 
        result_r2_train.iloc[i_seed,0] = seed
        result_mae_train.iloc[i_seed,0] = seed
        result_loss_train.iloc[i_seed,0] = seed
        result_r2_test.iloc[i_seed,0] = seed
        result_mae_test.iloc[i_seed,0] = seed
        result_loss_test.iloc[i_seed,0] = seed
        
        setup_seed(seed)

        i = 0    
     
        input_size, feature_size = x_train.shape[0], x_train.shape[1]
  
        loss_train_log = []
        loss_test_log = []
        
        model = UA_CNN().to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=LR)
        loss_func = nn.MSELoss()        
                
        y_train_sub = y_train
        y_test_sub = y_test
        r2_best = -math.inf
        MAE_best = 0
        for epoch in range(epochs):
            epoch_loss = 0
            num_minibatches = int(input_size / mb_size) + 1
            minibatches = random_mini_batches(x_train, y_train_sub, mb_size)
            model.train()
            for minibatch in minibatches:
                batch_x, batch_y  = minibatch
                batch_y_pre, _, batch_y_pre_log_var = model(batch_x)
                idx = torch.nonzero(batch_y.squeeze()!=0,as_tuple=False)
                batch_y_pre1 = torch.index_select(batch_y_pre.squeeze(), dim=0, index = idx.squeeze())
                batch_y_pre_log_var1 = torch.index_select(batch_y_pre_log_var.squeeze(), dim=0, index = idx.squeeze())
                batch_y1 = torch.index_select(batch_y.squeeze(), dim=0, index = idx.squeeze())
                mse_loss = loss_func(batch_y_pre1.squeeze(), batch_y1.squeeze())    
                h_loss = heteroscedastic_loss(batch_y1.squeeze(),batch_y_pre1.squeeze(),batch_y_pre_log_var1.squeeze())
                loss = mse_loss + heteroscedastic_loss_coefficient * h_loss
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                epoch_loss = epoch_loss + (loss / num_minibatches)
            loss_train_log.append(torch.mean(epoch_loss).item())
            model.eval()
            with torch.no_grad():
                y_test_pre, _, y_test_pre_log_var = model(x_test)
                idx_test = torch.nonzero(y_test_sub.squeeze()!=0,as_tuple=False)
                y_test_pre1 = torch.index_select(y_test_pre.squeeze(), dim=0, index = idx_test.squeeze())
                y_test_pre_log_var1 = torch.index_select(y_test_pre_log_var.squeeze(), dim=0, index = idx_test.squeeze())
                y_test_sub1 = torch.index_select( y_test_sub.squeeze(), dim=0, index = idx_test.squeeze())        
               
                h_loss_test = heteroscedastic_loss(y_test_sub1.squeeze(),y_test_pre1.squeeze(),y_test_pre_log_var1.squeeze())
                mse_loss_test = loss_func(y_test_pre1.squeeze(), y_test_sub1.squeeze())
                loss_test = mse_loss_test + heteroscedastic_loss_coefficient * h_loss_test
                MAE2 = mean_absolute_error(y_test_sub1.cpu().numpy().squeeze(),y_test_pre1.cpu().numpy().squeeze())
                r2_score_v =  r2_score(y_test_sub1.cpu().numpy().squeeze(),y_test_pre1.cpu().numpy().squeeze()) 
                
                if epoch % 20 == 0:
                    log_message = f'Iter-{epoch}; Total loss: {loss_test.item():.4f}; MAE2: {MAE2:.4f}; r2_score_v: {r2_score_v:.4f}'
                    log_and_print(log_message, log_file_path)

                if r2_best < r2_score_v:
                    best_test_loss = loss_test
                    torch.save(model.state_dict(), os.path.join(results_dir, 'best_test_model_ale_epi_.pth'))
                    MAE_best = MAE2
                    r2_best = r2_score_v
                
            loss_test_log.append(torch.mean(loss_test).item())
        
        model_test = UA_CNN().to(device)
        model_test.load_state_dict(torch.load(os.path.join(results_dir, 'best_test_model_ale_epi_.pth')))
        y_test_pre, y_test_vector, y_test_pre_log_var = model_test(x_test)
        y_train_pre, y_train_vector, y_train_pre_log_var= model_test(x_train)
        with torch.no_grad():
            idx_train = torch.nonzero(y_train_sub.squeeze()!=0,as_tuple=False)
            idx_tst = torch.nonzero(y_test_sub.squeeze()!=0,as_tuple=False)
            y_train_sub1 = torch.index_select(y_train_sub.squeeze(), dim=0, index = idx_train.squeeze())
            y_train_pre1 = torch.index_select(y_train_pre.squeeze(), dim=0, index = idx_train.squeeze())
            y_train_pre_log_var1 = torch.index_select(y_train_pre_log_var.squeeze(), dim=0, index = idx_train.squeeze())
            y_train_vector1 = torch.index_select(y_train_vector.squeeze(), dim=0, index = idx_train.squeeze())
            y_test_pre_log_var1 = torch.index_select(y_test_pre_log_var.squeeze(), dim=0, index = idx_tst.squeeze())
            y_test_sub1 = torch.index_select(y_test_sub.squeeze(), dim=0, index = idx_tst.squeeze())
            y_test_pre1 = torch.index_select(y_test_pre.squeeze(), dim=0, index = idx_tst.squeeze())
            y_test_vector1 = torch.index_select(y_test_vector.squeeze(), dim=0, index = idx_tst.squeeze())
            
        mse_loss_test = loss_func(y_test_pre1.squeeze(), y_test_sub1.squeeze())
        h_loss_test = heteroscedastic_loss(y_test_sub1.squeeze(),y_test_pre1.squeeze(),y_test_pre_log_var1.squeeze())
        loss_test = mse_loss_test + heteroscedastic_loss_coefficient * h_loss_test
        MAE_test = mean_absolute_error(y_test_sub1.cpu().numpy().squeeze(),y_test_pre1.cpu().numpy().squeeze())
        r2_test = r2_score(y_test_sub1.cpu().numpy().squeeze(),y_test_pre1.cpu().numpy().squeeze())
        
        mse_loss_train = loss_func(y_train_pre1.squeeze(), y_train_sub1.squeeze())
        h_loss_train = heteroscedastic_loss(y_train_sub1.squeeze(),y_train_pre1.squeeze(),y_train_pre_log_var1.squeeze())
        loss_train = mse_loss_train + heteroscedastic_loss_coefficient * h_loss_train
        MAE_train = mean_absolute_error(y_train_sub1.cpu().numpy().squeeze(),y_train_pre1.cpu().numpy().squeeze())
        r2_train = r2_score(y_train_sub1.cpu().numpy().squeeze(),y_train_pre1.cpu().numpy().squeeze())
        
        test_preds_array = np.array([[x] for x in y_test_pre1.cpu().numpy()])
        log_and_print(f"Shape of y_test_pre: {y_test_pre.shape}", log_file_path)
        log_and_print(f"Shape of test_preds_array: {test_preds_array.shape}", log_file_path)
        log_and_print(f"Shape of sum_preds: {sum_preds.shape}", log_file_path)
        if test_preds_array.shape != sum_preds.shape:
            log_and_print(f"Shape mismatch: test_preds_array.shape = {test_preds_array.shape}, sum_preds.shape = {sum_preds.shape}", log_file_path)
        else:
            sum_preds += test_preds_array
        test_pred_log_vars_array = np.array([[x] for x in y_test_pre_log_var1.cpu().numpy()])
        test_pred_vars_array = np.exp(test_pred_log_vars_array)
        
        log_and_print(f"Shape of test_pred_log_vars_array: {test_pred_log_vars_array.shape}", log_file_path)
        log_and_print(f"Shape of sum_ale_uncs: {sum_ale_uncs.shape}", log_file_path)
        if test_pred_vars_array.shape != sum_ale_uncs.shape:
            log_and_print(f"Shape mismatch: test_pred_log_vars_array.shape = {test_pred_log_vars_array.shape}, sum_ale_uncs.shape = {sum_ale_uncs.shape}", log_file_path)
        else:
            sum_ale_uncs += test_pred_vars_array
        
        log_and_print(f"Shape of all_preds: {all_preds.shape}", log_file_path)
        log_and_print(f"Shape of test_preds_array: {test_preds_array.shape}", log_file_path)
        if test_preds_array.shape[0] == all_preds.shape[0] and test_preds_array.shape[1] == all_preds.shape[1]:
            all_preds[:, :, i_seed] = test_preds_array
        else:
            log_and_print(f"Shape mismatch for all_preds: test_preds_array.shape = {test_preds_array.shape}, all_preds.shape = {all_preds.shape}", log_file_path)
            
        labels = y_test_pre1.cpu().numpy()
        
        if r2_test > r2_record:
            r2_record = r2_test
            torch.save(y_train_sub1.squeeze(), os.path.join(results_dir, 'y_train_sub_ale_epi_.pth'))
            torch.save(y_train_pre1.squeeze(), os.path.join(results_dir, 'y_train_pre_ale_epi_.pth'))
            torch.save(y_test_sub1.squeeze(), os.path.join(results_dir, 'y_test_sub_ale_epi_.pth'))
            torch.save(y_test_pre1.squeeze(), os.path.join(results_dir, 'y_test_pre_ale_epi_.pth'))  
            torch.save(y_test_vector.squeeze(), os.path.join(results_dir, 'y_test_vector_ale_epi_.pth'))
            torch.save(y_train_vector.squeeze(), os.path.join(results_dir, 'y_train_vector_ale_epi_.pth')) 
        
        result_r2_test.iloc[i_seed,i+1] = r2_test
        result_mae_test.iloc[i_seed,i+1] = MAE_test
        result_loss_test.iloc[i_seed,i+1] = loss_test.detach().cpu().numpy()
        result_r2_train.iloc[i_seed,i+1] = r2_train
        result_mae_train.iloc[i_seed,i+1] = MAE_train
        result_loss_train.iloc[i_seed,i+1] = loss_train.detach().cpu().numpy()
        
        i = i+1
        
        current_results = (f"Seed {seed} results: R2 test = {r2_test:.4f}, MAE test = {MAE_test:.4f}, "
                           f"Loss test = {loss_test:.4f}, R2 train = {r2_train:.4f}, "
                           f"MAE train = {MAE_train:.4f}, Loss train = {loss_train:.4f}")
        log_and_print(current_results, log_file_path)
            
    results.append(result_r2_test)
    results.append(result_mae_test)
    results.append(result_loss_test)
    results.append(result_r2_train)
    results.append(result_mae_train)
    results.append(result_loss_train)
    results_all.append(results)
torch.save(results_all, os.path.join(results_dir, 'results.pth'))


In [None]:
print(sum_preds.shape)
print(x_test_len)
print(sum_ale_uncs.shape)
print(sum_epi_uncs.shape)
print(all_preds.shape)

In [None]:
avg_preds = sum_preds / seeds_len
avg_preds = avg_preds.tolist()

avg_ale_uncs = sum_ale_uncs / seeds_len
avg_ale_uncs = avg_ale_uncs.tolist()

avg_epi_uncs = np.var(all_preds, axis=2)
avg_epi_uncs = avg_epi_uncs.tolist()
print(avg_preds)
print(avg_ale_uncs)
print(avg_epi_uncs)

In [None]:
x=  [i for i in range(len(labels))]
y = labels
f = avg_preds
f = [item for sublist in f for item in sublist]
ale_var = avg_ale_uncs
ale_var = [item for sublist in ale_var for item in sublist]
epi_var = avg_epi_uncs
epi_var = [item for sublist in epi_var for item in sublist]

x = np.array(x)
y = np.array(y)
f = np.array(f)
epi_var = np.array(epi_var)
ale_var = np.array(ale_var)
total_std_2 = (epi_var + ale_var)**0.5
epi_std = epi_var**0.5
ale_std = ale_var**0.5
total_std = epi_std + ale_std
abs_error = np.abs(f - y)
mae = np.mean(abs_error)
print("MAE:", mae)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import uncertainty_toolbox as uct
savefig = True
pred_mean_list = [f]
pred_std_list = [
    epi_std,ale_std,total_std,total_std_2
]
idx_counter = 0
for i, pred_mean in enumerate(pred_mean_list):
    for j, pred_std in enumerate(pred_std_list):
        mace = uct.mean_absolute_calibration_error(pred_mean, pred_std, y)
        rmsce = uct.root_mean_squared_calibration_error(pred_mean, pred_std, y)
        ma = uct.miscalibration_area(pred_mean, pred_std, y)

        idx_counter += 1
        print(f"MACE: {mace}, RMSCE: {rmsce}, MA: {ma}")


In [None]:
output_dir = results_dir + '/corr_all/'
os.makedirs(output_dir, exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")

for i, pred_mean in enumerate(pred_mean_list):
    for j, pred_std in enumerate(pred_std_list):
        std_name = ["epi_std", "ale_std", "total_std", "total_std_2"][j]
        # Before recalibration
        exp_props, obs_props = uct.get_proportion_lists_vectorized(pred_mean, pred_std, y)
        mace = uct.mean_absolute_calibration_error(pred_mean, pred_std, y, recal_model=None)
        rmsce = uct.root_mean_squared_calibration_error(pred_mean, pred_std, y, recal_model=None)
        ma = uct.miscalibration_area(pred_mean, pred_std, y, recal_model=None)
        print("Before Recalibration:  ", end="")
        print("MACE: {:.5f}, RMSCE: {:.5f}, MA: {:.5f}".format(mace, rmsce, ma))

        fig, ax = plt.subplots(1, 1, figsize=(5, 5))
        uct.plot_calibration(pred_mean, pred_std, y, exp_props=exp_props, obs_props=obs_props, ax=ax)
        if savefig:
            csv_filename = f"{output_dir}before_recal_{std_name}_{timestamp}.csv"
            extract_coords_to_csv(ax, csv_filename)
            uct.viz.save_figure(f"{output_dir}before_recal_{std_name}_{timestamp}", "svg")

        # After recalibration
        recal_model = uct.iso_recal(exp_props, obs_props)
        recal_exp_props, recal_obs_props = uct.get_proportion_lists_vectorized(pred_mean, pred_std, y, recal_model=recal_model)
        mace = uct.mean_absolute_calibration_error(pred_mean, pred_std, y, recal_model=recal_model)
        rmsce = uct.root_mean_squared_calibration_error(pred_mean, pred_std, y, recal_model=recal_model)
        ma = uct.miscalibration_area(pred_mean, pred_std, y, recal_model=recal_model)
        print("After Recalibration:  ", end="")
        print("MACE: {:.5f}, RMSCE: {:.5f}, MA: {:.5f}".format(mace, rmsce, ma))

        fig, ax = plt.subplots(1, 1, figsize=(5, 5))
        uct.plot_calibration(pred_mean, pred_std, y, exp_props=recal_exp_props, obs_props=recal_obs_props, ax=ax)
        if savefig:
            csv_filename = f"{output_dir}after_recal_{std_name}_{timestamp}.csv"
            extract_coords_to_csv(ax, csv_filename)
            uct.viz.save_figure(f"{output_dir}after_recal_{std_name}_{timestamp}", "svg")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def calculate_cumulative_mae(y, f, uncertainty):
    sorted_indices = np.argsort(uncertainty)[::-1]
    sorted_uncertainty = uncertainty[sorted_indices]
    sorted_y = y[sorted_indices]
    sorted_f = f[sorted_indices]
    
    cumulative_mae = []
    n = len(y)
    for i in range(n):
        mae_i = np.mean(np.abs(sorted_f[i:] - sorted_y[i:]))
        cumulative_mae.append(mae_i)
    
    return cumulative_mae, sorted_uncertainty

cumulative_mae_ale, sorted_ale_std = calculate_cumulative_mae(y, f, ale_std)
cumulative_mae_epi, sorted_epi = calculate_cumulative_mae(y, f, epi_std)
cumulative_mae_total, sorted_total_std = calculate_cumulative_mae(y, f, total_std)
cumulative_mae_total_2, sorted_total_std = calculate_cumulative_mae(y, f, total_std_2)

n = len(y)
confidence_percentiles = np.arange(1, n + 1) / n

plt.figure(figsize=(10, 6))
plt.plot(confidence_percentiles, cumulative_mae_ale, marker='o', linestyle='-', label='ale_std')
plt.plot(confidence_percentiles, cumulative_mae_epi, marker='s', linestyle='-', label='epi_std')
plt.plot(confidence_percentiles, cumulative_mae_total, marker='^', linestyle='-', label='total_std')
plt.plot(confidence_percentiles, cumulative_mae_total_2, marker='*', linestyle='-', label='total_std_2')

plt.xlabel('Confidence Percentile')
plt.ylabel('Cumulative MAE')
plt.title('Cumulative MAE vs. Confidence Percentile')
plt.grid(True)
plt.legend()
plt.show()

def save_coords_to_csv(confidence_percentiles, cumulative_mae, uncertainty_type):
    df = pd.DataFrame({
        'Confidence Percentile': confidence_percentiles,
        'Cumulative MAE': cumulative_mae
    })
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    csv_filename = f"{output_dir}CumulativeMAE_vs_ConfidencePercentile_{uncertainty_type}_{timestamp}.csv"
    df.to_csv(csv_filename, index=False)
    print(f"Coordinates saved to {csv_filename}")

save_coords_to_csv(confidence_percentiles, cumulative_mae_ale, 'ale_std')
save_coords_to_csv(confidence_percentiles, cumulative_mae_epi, 'epi_std')
save_coords_to_csv(confidence_percentiles, cumulative_mae_total, 'total_std')
save_coords_to_csv(confidence_percentiles, cumulative_mae_total_2, 'total_std_2')

In [None]:
from scipy.stats import spearmanr
sorted_indices = np.argsort(epi_std)[::-1]
sorted_std = epi_std[sorted_indices]
sorted_y = y[sorted_indices]
sorted_f = f[sorted_indices]
correlation, p_value = spearmanr(epi_std, abs(y - f))
correlation_ale, p_value = spearmanr(ale_std, abs(y - f))
correlation_total, p_value = spearmanr(total_std, abs(y - f))
correlation_total_2, p_value = spearmanr(total_std_2, abs(y - f))
print("Spearman correlation epi coefficient:", correlation)
print("Spearman correlation_ale coefficient:", correlation_ale)
print("Spearman correlation_total coefficient2:", correlation_total)
print("Spearman correlation_total_2 coefficient2:", correlation_total_2)


In [None]:
from scipy.stats import pearsonr

correlation, p_value = pearsonr(epi_std, abs(y - f))
correlation_ale, p_value = pearsonr(ale_std, abs(y - f))
correlation_total, p_value = pearsonr(total_std, abs(y - f))
correlation_total_2, p_value = pearsonr(total_std_2, abs(y - f))
print("Pearson correlation coefficient:", correlation)
print("Pearson correlation coefficient:", correlation_ale)
print("Pearson correlation coefficient:", correlation_total)
print("Pearson correlation coefficient:", correlation_total_2)
