In [None]:
import os
import docplex
import pandas as pd
import tensorflow as tf
import numpy as np
import utility
import copy
import mlp_explainer
import mymetrics
import time
import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn import datasets
from sklearn.model_selection import train_test_split
from milp import codify_network
from teste import get_minimal_explanation
from sklearn.metrics import classification_report
from alibi.explainers import AnchorTabular
import joblib
import re
import ast

In [None]:
np.random.seed(50)
def load_data(dataset_name):
    if dataset_name == 'Iris':
        dataset = datasets.load_iris()
        df = pd.DataFrame(dataset.data, columns = dataset.feature_names)
        scaler = MinMaxScaler()
        scaler.fit(dataset.data)
        scaled_df = scaler.transform(dataset.data)
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns)
        targets = dataset.target
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Banknote':
        df = pd.read_csv('./datasets/banknote_authentication.csv') 
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        #targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
        targets = df['target'].values
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Blood_Transfusion':
        df = pd.read_csv('./datasets/blood_transfusion.csv')
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Breast_Cancer':
        dataset = datasets.load_breast_cancer()
        df = pd.DataFrame(dataset.data, columns = dataset.feature_names)
        scaler = MinMaxScaler()
        scaler.fit(dataset.data)
        scaled_df = scaler.transform(dataset.data) 
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns)
        targets = (utility.check_targets_0_1(np.where(dataset.target == dataset.target[0],0,1))).astype(np.int32)
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Climate':
        df = pd.read_csv('./datasets/climate_model_simulation_crashes.csv')
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = df['target'].values
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Column':
        df = pd.read_csv('./datasets/column_2C.dat', sep=" ", names=['pelvic_incidence', 'pelvic_tilt', 'lumbar_lordosis_angle', 'sacral_slope', 'pelvic_radius', 'degree_spondylolisthesis','target'])
        df['target']=np.where(df['target']=='AB',1,0)
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Glass':
        df = pd.read_csv('./datasets/glass.csv')
        unique_labels = sorted(df['target'].unique())
        label_map = {original: new for new, original in enumerate(unique_labels)}
        df['target'] = df['target'].map(label_map)
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = df['target'].values
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Ionosphere':
        df = pd.read_csv('./datasets/Ionosphere.csv')
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        lower_bound = scaled_df.min()
        upper_bound = scaled_df.max()
        print(lower_bound, upper_bound)
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Modeling':
        df = pd.read_csv('./datasets/User_Knowledge_Modeling.csv')
        unique_labels = sorted(df['target'].unique()) 
        label_map = {original: new for new, original in enumerate(unique_labels)}
        df['target'] = df['target'].map(label_map)
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = df['target'].values
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Parkinson':
        df = pd.read_csv('./datasets/parkinsons.csv')
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        lower_bound = scaled_df.min()
        upper_bound = scaled_df.max()
        print(lower_bound, upper_bound)
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Pima':
        df = pd.read_csv('./datasets/diabetes.csv')
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = (utility.check_targets_0_1(df.values[:,-1])).astype(np.int32)
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Sonar':
        df = pd.read_csv('./datasets/sonar.csv')
        scaler = MinMaxScaler()
        scaler.fit(df.values[:, :-1])
        scaled_df = scaler.transform(df.values[:, :-1])
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns[:-1])
        targets = df['target'].values
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    elif dataset_name == 'Wine':
        dataset = datasets.load_wine()
        df = pd.DataFrame(dataset.data, columns = dataset.feature_names)
        scaler = MinMaxScaler()
        scaler.fit(dataset.data)
        scaled_df = scaler.transform(dataset.data)
        lower_bound = scaled_df.min()
        upper_bound = scaled_df.max()
        print(lower_bound, upper_bound)
        df_scaled = pd.DataFrame(scaled_df, columns=df.columns)
        targets = dataset.target
        df_scaled['target'] = targets
        columns = df_scaled.columns
        X_train, X_test, y_train, y_test = train_test_split(scaled_df, targets, test_size=0.75,random_state=50,stratify=targets)
        return X_train, X_test, y_train, y_test, columns, np.unique(targets)
    else:
        print("Incorrect dataset name")
        
def parse_explanation(explanation, feature_names, epsilon=1e-6):
    bounds = [[0, 1] for _ in range(len(feature_names))]
    conditions = explanation

    for condition in conditions:
        condition_no_space = condition.replace(' ', '')  # for regex matching
        # Check for double inequality
        match = re.match(r'(\d+\.?\d*)\s*(<|<=)\s*([^\s<>]+)\s*(<|<=)\s*(\d+\.?\d*)', condition_no_space)
        
        if match:
            value_1, op1, feature_token, op2, value_2 = match.groups()
            value_1 = float(value_1)
            value_2 = float(value_2)
            lower_bound = value_1 if op1 == '<=' else value_1 + epsilon
            upper_bound = value_2 if op2 == '<=' else value_2
            upper_bound = upper_bound if op2 == '<=' else upper_bound - epsilon

            for idx, feature in enumerate(feature_names):
                if feature.replace(" ", "") in feature_token:
                    bounds[idx] = [lower_bound, upper_bound]
                    break
            continue  # go to next condition

        # Fallback to single operator logic
        for idx, feature in enumerate(feature_names):
            if feature in condition:
                cond_clean = condition.replace('<=', ' LESS_EQUAL ').replace('>=', ' GREATER_EQUAL ')
                cond_clean = cond_clean.replace('<', ' < ').replace('>', ' > ')
                tokens = cond_clean.split()

                tokens = ['<=' if token == 'LESS_EQUAL' else token for token in tokens]
                tokens = ['>=' if token == 'GREATER_EQUAL' else token for token in tokens]

                operator = None
                operator_pos = None
                for i, token in enumerate(tokens):
                    if token in ['>', '>=', '<', '<=']:
                        operator = token
                        operator_pos = i
                        break

                value = None
                if operator is not None and operator_pos is not None:
                    for i in range(operator_pos + 1, len(tokens)):
                        try:
                            value = float(tokens[i])
                            break
                        except ValueError:
                            continue

                if value is not None:
                    if operator == '>':
                        bounds[idx] = [value + epsilon, 1]
                    elif operator == '>=':
                        bounds[idx] = [value, 1]
                    elif operator == '<':
                        bounds[idx] = [0, value - epsilon]
                    elif operator == '<=':
                        bounds[idx] = [0, value]
                else:
                    print(f"Could not extract numeric value from condition: '{condition}'")

    return np.array(bounds)

def predict_fn(x):
        return clf.predict(x, verbose=0)

def train_anchors(dataset_name):
    print(dataset_name)
    clf = tf.keras.models.load_model(f'new_models/{dataset_name}.h5', compile=False)
    print(f'Loaded model')
    X_train, X_test, y_train, y_test,feature_names, class_names = load_data(dataset_name)
    if 'target' in feature_names:
        feature_names = feature_names[feature_names != 'target']
    print(f'feature names:\n {feature_names}')
    print(f'class names:\n {class_names}')
    #predict_fn = lambda x: clf.predict(x)
    explainer = AnchorTabular(predict_fn, feature_names)
    explainer.fit(X_train, disc_perc=(25, 50, 75))
    return explainer, feature_names, class_names

In [None]:
dataset_name = 'Sonar' #['Iris', 'Wine', 'Column', 'Pima', 'Parkinson', 'Breast_Cancer', 'Blood_Transfusion', 'Ionosphere', 'Glass', 'Climate', 'Modeling', 'Banknote', 'Sonar']
path_anchors = f'./Anchors_results/{dataset_name}_results.csv' 
path_twostep = dataset_name+ '_results/results_0.25.csv'
path_twostep_brute = dataset_name+ '_results/raw_metric_data_0.25.csv'
np.random.seed(50)

In [None]:
anchors_brute_results = pd.read_csv(path_anchors)
twostep_results = pd.read_csv(path_twostep)
twostep_results_brute = pd.read_csv(path_twostep_brute)

In [None]:
twostep_results_brute

In [None]:
anchors_brute_results

In [None]:
times_anchors = anchors_brute_results['time'].values
coverage_anchors = anchors_brute_results['coverage'].values
errors_anchors = anchors_brute_results['errors'].values
rsum_anchors = anchors_brute_results['rsum'].values
sizes_anchors = anchors_brute_results['size'].values

times_twostep = twostep_results_brute['times_twostep'].values
coverage_twostep = twostep_results_brute['coverage_twostep'].values
rsum_twostep = twostep_results_brute['rsum_twostep'].values
feature_sizes_twostep = twostep_results_brute['sizes_twostep'].values

In [None]:
# Compute means and standard deviations
def compute_mean_std(arr):
    return np.mean(arr), np.std(arr)

# Compute relative percentage differences
def relative_percentage_diff(new, old):
    if np.any(old == 0):
        print(f'Warning: found possible division by zero')
        return np.where(old != 0, ((new - old) / old) * 100, np.nan)
    return ((new - old) / old) * 100



# Ensure all lists are NumPy arrays
times_twostep = np.array(times_twostep)
coverage_twostep = np.array(coverage_twostep)
sizes_anchors = np.array(sizes_anchors)
feature_sizes_twostep = np.array(feature_sizes_twostep)
rsum_anchors = np.array(rsum_anchors)
rsum_twostep = np.array(rsum_twostep)



# Compute means and standard deviations
(time_mean_anchors, time_std_anchors) = compute_mean_std(times_anchors)
(time_mean_twostep, time_std_twostep) = compute_mean_std(times_twostep)

(coverage_mean_anchors, coverage_std_anchors) = compute_mean_std(coverage_anchors)
(coverage_mean_twostep, coverage_std_twostep) = compute_mean_std(coverage_twostep)

(sizes_mean_anchors, sizes_std_anchors) = compute_mean_std(sizes_anchors)
(sizes_mean_twostep, sizes_std_twostep) = compute_mean_std(feature_sizes_twostep)

(rsum_mean_anchors, rsum_std_anchors) = compute_mean_std(rsum_anchors)
(rsum_mean_twostep, rsum_std_twostep) = compute_mean_std(rsum_twostep)

# Compute relative percentage differences (Mean & Std)
time_mean_diff = relative_percentage_diff(time_mean_twostep, time_mean_anchors)
coverage_mean_diff = relative_percentage_diff(coverage_mean_twostep, coverage_mean_anchors)

time_std_diff = relative_percentage_diff(time_std_twostep, time_std_anchors)
coverage_std_diff = relative_percentage_diff(coverage_std_twostep, coverage_std_anchors)

sizes_mean_diff = relative_percentage_diff(sizes_mean_twostep, sizes_mean_anchors)
sizes_std_diff = relative_percentage_diff(sizes_std_twostep, sizes_std_anchors)

rsum_mean_diff = relative_percentage_diff(rsum_mean_twostep, rsum_mean_anchors)
rsum_std_diff = relative_percentage_diff(rsum_std_twostep, rsum_std_anchors)

# Compute pointwise relative differences
time_relative_pointwise = relative_percentage_diff(times_twostep, times_anchors)
coverage_relative_pointwise = relative_percentage_diff(coverage_twostep, coverage_anchors)

sizes_relative_pointwise = relative_percentage_diff(feature_sizes_twostep, sizes_anchors)
rsum_relative_pointwise = relative_percentage_diff(rsum_twostep, rsum_anchors)

# Compute pointwise means
time_relative_mean = np.mean(time_relative_pointwise) 
coverage_relative_mean = np.mean(coverage_relative_pointwise)
sizes_relative_mean = np.mean(sizes_relative_pointwise)
rsum_relative_mean = np.mean(rsum_relative_pointwise)

# Compute pointwise standard deviations
time_relative_std = np.std(time_relative_pointwise) 
coverage_relative_std = np.std(coverage_relative_pointwise)
sizes_relative_std = np.std(sizes_relative_pointwise)
rsum_relative_std = np.std(rsum_relative_pointwise)

# Organize Data
all_metrics_data = all_metrics_data = {
    'Metric': ['Time', 'Size', 'Ranges_Sum', 'Coverage'],
    'ANCHORS_MEAN': [time_mean_anchors, sizes_mean_anchors, rsum_mean_anchors, coverage_mean_anchors],
    'ANCHORS_STD': [time_std_anchors, sizes_std_anchors, rsum_std_anchors, coverage_std_anchors],
    'TWOSTEP_MEAN': [time_mean_twostep, sizes_mean_twostep, rsum_mean_twostep, coverage_mean_twostep],
    'TWOSTEP_STD': [time_std_twostep, sizes_std_twostep, rsum_std_twostep, coverage_std_twostep],
    'MEAN_DIFF_%': [time_mean_diff, sizes_mean_diff, rsum_mean_diff, coverage_mean_diff],
    'STD_DIFF_%': [time_std_diff, sizes_std_diff, rsum_std_diff, coverage_std_diff],
    'POINTWISE_MEAN_%': [time_relative_mean, sizes_relative_mean, rsum_relative_mean, coverage_relative_mean],
    'POINTWISE_STD_%': [time_relative_std, sizes_relative_std, rsum_relative_std, coverage_relative_std],
    'ANCHORS_ERROR_RATE_MEAN%': [None, None, None, np.mean(errors_anchors/coverage_anchors)*100],
    'ANCHORS_ERROR_RATE_STD%': [None, None, None, np.std(errors_anchors/coverage_anchors)*100],
    
}
# Display and save
all_metrics_df = pd.DataFrame(all_metrics_data)
display(all_metrics_df)
print(errors_anchors.sum())
all_metrics_df.to_csv(f'Anchors_vs_Twostep_Results/Original_{dataset_name}_results.csv', index=False)

In [None]:
# Load Dataset
#df_artificial = pd.read_csv('datasets/artificial/'+f'{dataset_name}_artificial.csv')
clf = tf.keras.models.load_model(f'new_models/{dataset_name}.h5', compile=False)
loaded_bounds = np.load(f'bounds/{dataset_name}_data_bounds.npz')
original_bounds = loaded_bounds['original_bounds']
lower_bound, upper_bound = original_bounds[:, 0], original_bounds[:, 1]
result_path = f'{dataset_name}_results'
twostep_exps = np.load(f'{result_path}/twostep_explanations0.25.npz')['twostep_explanations']
np.random.seed(50)

In [None]:
d = 1
p = 0.25
num_instances=100
_1, _2, _3, _4,feature_names, class_names = load_data(dataset_name)
if 'target' in feature_names:
        feature_names = feature_names[feature_names != 'target']
X_test = pd.read_csv(f'{dataset_name}_results/{dataset_name}_X_test.csv')
epsilon = 1e-6
# anchors_explainer, feature_names,class_names = train_anchors(dataset_name)
# anchors_explainer.save(f'Anchors_explainers/{dataset_name}_model')
anchors_explanations = pd.read_csv(f'./Anchors_results/{dataset_name}_explanations.csv')['Explanation']

In [None]:
twostep_coverages = []
anchors_coverages = []
anchors_errors = []
anchors_rsums = []
anchors_sizes = []
for i, sample in enumerate(X_test.values[:,:-1]):
    artificial_lower_bounds = sample-d
    artificial_lower_bounds = np.maximum(artificial_lower_bounds, lower_bound)
    artificial_upper_bounds = sample+d
    artificial_upper_bounds = np.minimum(artificial_upper_bounds, upper_bound)
    data = np.random.uniform(low=artificial_lower_bounds, high=artificial_upper_bounds, size=(num_instances, len(sample)))
    df_artificial = pd.DataFrame(data, columns=X_test.columns[:-1])
    df_artificial.loc[len(df_artificial)] = sample

    test_dataset_df = df_artificial
    test_dataset_df['target'] = np.argmax(clf.predict(df_artificial.values,verbose=0),axis=1)
    original_instance_df = pd.DataFrame(sample.reshape(1,-1), columns=X_test.columns[:-1])
    original_instance_df['target'] = test_dataset_df['target'].values[-1] #X_test.values[i,-1]
    prediction = test_dataset_df['target'].values[-1]
    twostep_coverages.append(len(mymetrics.calculate_coverage_with_tolerance(test_dataset_df.drop(columns=['target']),twostep_exps[i])))

    #Anchors
    explanation = ast.literal_eval(anchors_explanations[i])
    bounds = parse_explanation(explanation, feature_names, epsilon)
    rsum = mymetrics.range_sum(bounds)
    size = 0
    for feature_name in feature_names:
        for anchor in explanation:
            if feature_name in re.split(r' <= | >= | < | > ', anchor):
                size += 1
    #df_artificial['target'] = X_test.values[i,-1]
    #display(test_dataset_df)
    coverage_df = mymetrics.calculate_coverage_with_tolerance(test_dataset_df, bounds)
    #print(bounds)
    #display(coverage_df)
    anchor_coverage = len(coverage_df)
    error = len(coverage_df[coverage_df['target'] != prediction])

    instance_coverage = len(mymetrics.calculate_coverage(original_instance_df, bounds))
    if instance_coverage == 0:
        anchor_coverage += 1

    if anchor_coverage == 0:
        print('WARNING')
    
    anchors_coverages.append(anchor_coverage)
    anchors_errors.append(error)
    anchors_rsums.append(rsum)
    anchors_sizes.append(size)

# Compute means and standard deviations
def compute_mean_std(arr):
    return np.mean(arr), np.std(arr)

# Compute relative percentage differences
def relative_percentage_diff(new, old):
    if np.any(old == 0):
        print(f'Warning: found possible division by zero')
        return np.where(old != 0, ((new - old) / old) * 100, np.nan)
    return ((new - old) / old) * 100

# Ensure all lists are NumPy arrays
anchors_coverages = np.array(anchors_coverages)
twostep_coverages = np.array(twostep_coverages)
#Fixing floating point error due to npz
#twostep_coverages[twostep_coverages == 0] = 1
anchors_errors = np.array(anchors_errors)

(coverage_mean_anchors, coverage_std_anchors) = compute_mean_std(anchors_coverages)
(coverage_mean_twostep, coverage_std_twostep) = compute_mean_std(twostep_coverages)

coverage_mean_diff = relative_percentage_diff(coverage_mean_twostep, coverage_mean_anchors)
coverage_std_diff = relative_percentage_diff(coverage_std_twostep, coverage_std_anchors)
coverage_relative_pointwise = relative_percentage_diff(twostep_coverages, anchors_coverages)
coverage_relative_mean = np.mean(coverage_relative_pointwise)
coverage_relative_std = np.std(coverage_relative_pointwise)

# Organize Data
all_metrics_data = {
    'Metric': ['Coverage'],
    'ANCHORS_MEAN': [coverage_mean_anchors],
    'ANCHORS_STD': [coverage_std_anchors],
    'TWOSTEP_MEAN': [coverage_mean_twostep],
    'TWOSTEP_STD': [coverage_std_twostep],
    'MEAN_DIFF_%': [coverage_mean_diff],
    'STD_DIFF_%': [coverage_std_diff],
    'POINTWISE_MEAN_%': [coverage_relative_mean],
    'POINTWISE_STD_%': [coverage_relative_std],
    'ANCHORS_ERROR_RATE%': np.mean(anchors_errors/anchors_coverages)*100
}
# Display and save
all_metrics_df = pd.DataFrame(all_metrics_data)
display(all_metrics_df)
all_metrics_df.to_csv(f'Anchors_vs_Twostep_Results/Artificial_{dataset_name}_results.csv', index=False)

#Save Raw Metric Data
raw_df = pd.DataFrame({
    "coverage_anchors": anchors_coverages, 
    "coverage_twostep": twostep_coverages,
    "coverage_relative_%": coverage_relative_pointwise
})

display(raw_df)
raw_df.to_csv(f"{dataset_name}_results/Artificial_coverage_raw_metric_data_{p}.csv", index=False)