### *Module Loading*

In [None]:
import sys
from IPython.display import display as ip_display

### *External Module Loading*

In [None]:
external_modules_path = '..\\nn_likelihood_modules'
sys.path.append(external_modules_path)

In [None]:
from basic_network_structure import *
from common_imports import *
from common_use_functions import *
from constant import *
from defined_data_structure import *
from defined_network_structure import *
from distribution_calculation import *
from direct_inference_likelihood import *
from experim_neural_network import *
from experim_preparation import *
from generate_activation_level import *
from pytorch_model_predict import *
from vector_preprocessing import *
from ResNet import *
from experim_ResNet import *
from cifar_10_data_prep import *
from novelty_data_prep import *
from sensitivity_analysis import *

### *GPU verification*

In [None]:
# Get the GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
nb_gpu = torch.cuda.device_count()
if nb_gpu > 0:
    print(torch.cuda.get_device_name(0))
else:
    print("CPU")

### *Working directory*

In [None]:
# Current path
current_path = os.path.abspath(os.getcwd())

### *Load configurations and data*

In [None]:
"""
All the parameters in this part should be configured
"""
# Experience path
experim_path = current_path

# File extensions
json_ext = '.json'
np_ext = '.npy'
csv_ext = '.csv'

# ResNet model prefix
model_name_prefix = 'cifar10'

# Image max pixel value
image_max_pix_val = 255

# Tested sets name
train_set_name = 'train'
test_set_name = 'test'
valid_set_name = 'valid'
input_extension = 'X'
label_extension = 'Y'

# Save paths
model_save_path = path_join(experim_path, 'experim_models_resnet')

# Adversarial attack path
adv_attack_path = path_join(experim_path, 'experim_resnet_attack')

"""
The following parameters should be configured according to your experiments
"""

# Trained model name 
trained_resnet_name = 'cifar10_resnet18_9304' # You can select any model from the "experim_models_resnet" folder

# Indices filenames
train_indices_filename = 'train_indices_9304' # The train indices should be coherent with your chosen model
valid_indices_filename = 'valid_indices_9304' # The valid indices should be coherent with your chosen model

# ResNet related params
resnet_model_name = 'resnet18'# The model name should be coherent with your chosen model

# Cifar10-c datafolder path
cifar10_c_path = 'D:\\Doctorat\\research\\oms_detection_experim\\CIFAR-10-C\CIFAR-10-C\\'

# Dataset general informations
data_set_infos = {
    'nb_classes' : 10
}

# Distance decision filtering threshold
std_threshold_coeff = 2

# The method to determining the significant neurons (mean or most_common)
sobol_filter_method = 'most_common'

# Rscript launch params
Rscript_path = 'C:\\Program Files\\R\\R-4.3.3\\bin\\Rscript.exe'

"""
"""

# The output folder
output_path = path_join(experim_path, 'output')

# Distribution file
train_distribution_filename = path_join(output_path, 'distribution_' + train_set_name + '_set.csv')

# Train distance information registration path
train_dists_info_filename = 'train_dist_infos'
train_dists_info_path = path_join(output_path, train_dists_info_filename + json_ext)

# Build the class list
class_list = list(range(data_set_infos['nb_classes']))

# Batch size for the dataloader creation
torch_batch_size = 128

In [None]:
# Create the folders
create_directory(output_path)

### *Experiment preparation*

In [None]:
# Get the dataset
cifar10_train_dataset, cifar10_test_dataset = get_cifar10_dataset_without_transform()

In [None]:
# The column names of the filtering results
column_names_OOD_filtering = ['transformation', 'nb_examples', 'nb_OOD', 'nb_InD', 'total_acc', 'OOD_acc', 'InD_acc']

### *Load the trained ResNet*

In [None]:
# Create the resnet
trained_resnet = load_model_by_net_name(model_save_path, trained_resnet_name)

### *Cifar10 dataset preparation*

In [None]:
# Load train valid indices
train_indices = load_json(open(path_join(model_save_path, train_indices_filename+json_ext)))
valid_indices = load_json(open(path_join(model_save_path, valid_indices_filename+json_ext)))

In [None]:
# Build the subsets
cifar10_real_train_dataset = Subset(cifar10_train_dataset, train_indices)
cifar10_valid_dataset = Subset(cifar10_train_dataset, valid_indices)

In [None]:
# Dataloader building
train_loader = create_loader_from_torch_dataset(cifar10_real_train_dataset, batch_size=torch_batch_size, shuffle=False, num_workers=0)
valid_loader = create_loader_from_torch_dataset(cifar10_valid_dataset, batch_size=torch_batch_size, shuffle=False, num_workers=0)
test_loader = create_loader_from_torch_dataset(cifar10_test_dataset, batch_size=torch_batch_size, shuffle=False, num_workers=0)

In [None]:
# Convert the training set to numpy array
no_divide_into_batch_train_loader = create_loader_from_torch_dataset(cifar10_real_train_dataset, batch_size=len(cifar10_real_train_dataset), shuffle=False, num_workers=0)
cifar10_train_X = next(iter(no_divide_into_batch_train_loader))[0].numpy()
cifar10_train_y = next(iter(no_divide_into_batch_train_loader))[1].numpy()

In [None]:
# Convert the test set to numpy array
no_divide_into_batch_test_loader = create_loader_from_torch_dataset(cifar10_test_dataset, batch_size=len(cifar10_test_dataset), shuffle=False, num_workers=0)
X_test = next(iter(no_divide_into_batch_test_loader))[0].numpy()
y_test = next(iter(no_divide_into_batch_test_loader))[1].numpy()

### *Distribution and Training set likelihood distance information generation*

In [None]:
# Move to gpu
trained_resnet.cuda()

In [None]:
# Get the prediction
cifar10_train_y_pred = np.array(predict_gpu(trained_resnet, train_loader))

In [None]:
# Get the correctly predicted entries
bool_correct_examples = cifar10_train_y==cifar10_train_y_pred
cifar10_X_train_correct = cifar10_train_X[bool_correct_examples]
cifar10_y_train_correct = cifar10_train_y[bool_correct_examples]
# Data Loader
correct_train_loader = create_dataloader(cifar10_X_train_correct, cifar10_y_train_correct, torch_batch_size, shuffle=False) 

In [None]:
# Get the activation levels
train_actLevels = obtain_activation_levels(trained_resnet,
                                           correct_train_loader, train_set_name, with_predict_class=True, loss_type='cross_entropy')

In [None]:
## Distribution generation
# Generate the distribution
train_distributions = generate_distributions(train_actLevels, train_set_name, trained_resnet_name)
# Save the distribution
train_distribution_filename = save_distributions(output_path, train_distributions, train_set_name)

In [None]:
# Generate the likelihood information per class on the training set
# Last hidden layer index and activation levels
last_hidden_layerId = max(train_actLevels['actLevel'].keys())
# Calculate the likelihood on this layer
train_last_hidden_whole_distances = layer_whole_likelihood_experim(train_actLevels, last_hidden_layerId,
                               train_distribution_filename, class_list, use_absolute_module_path=True)

In [None]:
## Calculate the information and save it
# Information calculation
class_dist_infos = build_layer_train_set_infos(train_last_hidden_whole_distances, class_list)
# Register the calculated information
store_dict_as_json(train_dists_info_path, class_dist_infos)

### *Evaluate the likelihood on the whole training set*

In [None]:
# Get the activation levels 
#(We named this all_train_actLevels because we have already the activation levels for the correctly predicted examples)
all_train_actLevels = obtain_activation_levels(trained_resnet,
                                           train_loader, 'whole train', with_predict_class=True, loss_type='cross_entropy')

In [None]:
# Calculate the likelihood on the last hidden layer
all_train_whole_distances = layer_whole_likelihood_experim(all_train_actLevels, last_hidden_layerId,
                               train_distribution_filename, class_list, use_absolute_module_path=True)

In [None]:
# Normalize the whole distances
all_train_whole_distances = normalize_whole_distances(all_train_whole_distances, class_dist_infos)

In [None]:
# Create the mapped layer distances dataframe
all_train_distances = map_to_predicted_class_distance(all_train_whole_distances, class_list)

In [None]:
# Find the decisions to be filtered
all_train_filtered_index = filter_decision_based_on_train_infos_norm_ver(all_train_whole_distances, class_dist_infos, std_threshold_coeff)

In [None]:
# Visualize the filtered training set examples
if len(all_train_filtered_index) != 0:
    ip_display(all_train_distances.loc[all_train_filtered_index,:])
else:
    print('All the example images are identified as good inputs by the likelihood distance.')

### *Evaluate the likelihood on the test set*

In [None]:
# Get the activation levels
test_actLevels = obtain_activation_levels(trained_resnet,
                                           test_loader, 'test', with_predict_class=True, loss_type='cross_entropy')

In [None]:
# Calculate the likelihood on this layer
test_whole_distances = layer_whole_likelihood_experim(test_actLevels, last_hidden_layerId,
                               train_distribution_filename, class_list, use_absolute_module_path=True)

In [None]:
# Normalize the distances
test_whole_distances = normalize_whole_distances(test_whole_distances, class_dist_infos)

In [None]:
# Find the decisions to be filtered
test_filtered_index = filter_decision_based_on_train_infos_norm_ver(test_whole_distances, class_dist_infos, std_threshold_coeff)

In [None]:
# Create the mapped layer distances dataframe
test_distances = map_to_predicted_class_distance(test_whole_distances, class_list)

In [None]:
# Visualize the filtered test examples
if len(test_filtered_index) != 0:
    ip_display(test_distances.loc[test_filtered_index,:])
else:
    print('All the example images are identified as good inputs by the likelihood distance.')

### *Sobol index evaluation*

In [None]:
# Get the correctly predicted index
correctly_predicted_bools = all_train_actLevels['class'] == all_train_actLevels['predict_class']
# Get the corresponding data
last_hidden_actLevels = all_train_actLevels['actLevel'][last_hidden_layerId][correctly_predicted_bools.reshape(-1)]

In [None]:
# Get the last layer parameters
model_params = get_model_parameters(trained_resnet, to_numpy=True)
final_linear_params = model_params['linear_out']

In [None]:
# Normalization of weight and input
# Copy the original activation levels and weights
normalized_last_hidden_actLevel = copy.deepcopy(last_hidden_actLevels)
upped_final_linear_params = copy.deepcopy(final_linear_params)
# Check the min and max values of each neuron
last_hidden_actLevel_max = np.max(last_hidden_actLevels, axis=0)
last_hidden_actLevel_min = np.min(last_hidden_actLevels, axis=0) # Not used, just for verification
# Iterate over the maximum values and normalize the input
for index, neuron_max in enumerate(last_hidden_actLevel_max):
    if neuron_max != 0:
        normalized_last_hidden_actLevel[:,index] = normalized_last_hidden_actLevel[:,index] / neuron_max
        upped_final_linear_params['weight'][:,index] = upped_final_linear_params['weight'][:,index] * neuron_max

In [None]:
# Build the final linear parameters per class and assign the real data
final_linear_param_per_class = build_per_class_linear_model(upped_final_linear_params)
data = normalized_last_hidden_actLevel
# Number of variables
nb_vars = data.shape[1]

In [None]:
## Build the X and y for the sobol index evaluation in R
# Get the neuron names
neuron_names = ['neuron_'+str(index) for index in range(last_hidden_actLevels.shape[1])]
# Build the X dataframe
R_X = pd.DataFrame(data, columns=neuron_names)

In [None]:
## Build the y for the sobol index evaluation in R
# Generate the y values
R_y_values_per_class = {}
for classId in final_linear_param_per_class:
    current_class_y_values = []
    for _, one_x in R_X.iterrows():
        current_class_y_values.append(evaluate_y(final_linear_param_per_class[classId], one_x))
    R_y_values_per_class[classId] = current_class_y_values
# Build the y dataframes
R_y_per_class = {}
for classId in R_y_values_per_class:
    R_y_per_class[classId] = pd.DataFrame(R_y_values_per_class[classId], columns=['output'])

In [None]:
# Save the X and y for R script execution
save_df_to_csv(path_join(output_path, 'R_X.csv'),R_X)
for classId in R_y_per_class:
    save_df_to_csv(path_join(output_path, 'R_y_'+str(classId)+'.csv'),R_y_per_class[classId])

In [None]:
# Build the dataframe that stores weights and bias
R_network_params_data = []
R_network_params_columns = ['classId', *(['weight_'+str(index) for index in range(len(final_linear_param_per_class[0]['weight']))]), 'bias']
for classId in final_linear_param_per_class:
    R_network_params_data.append([classId, *list(final_linear_param_per_class[classId]['weight']), final_linear_param_per_class[classId]['bias']])
R_network_params = pd.DataFrame(R_network_params_data, columns=R_network_params_columns)
save_df_to_csv(path_join(output_path, 'R_network_params.csv'),R_network_params)

In [None]:
# Sample size
N = 22000

In [None]:
# Generate two random samples from R_X
A_index = generate_sample_index(data, N, replace=False)
B_index = generate_sample_index_exclude_items(data, N, A_index, replace=False)
R_X_A = R_X.loc[A_index,:].copy(deep=True).reset_index(drop=True)
R_X_B = R_X.loc[B_index,:].copy(deep=True).reset_index(drop=True)
# Save the random samples from R_X
save_df_to_csv(path_join(output_path, 'R_X_A.csv'),R_X_A)
save_df_to_csv(path_join(output_path, 'R_X_B.csv'),R_X_B)

In [None]:
# You could choose sobolEff (first and total indices)
R_sobol_method = 'sobolEff'
R_sobol_script = path_join(experim_path, R_sobol_method+'_eval.R')

In [None]:
# Execute the R script (The path to read the data is given as arguments (i.e., experim_path+'\\'))
ouput_R = run([Rscript_path, '--vanilla', R_sobol_script, output_path+'\\', str(data_set_infos['nb_classes'])], shell=True) 

In [None]:
## Get the important variables per class
# Initialize the determined variables as None
important_variables_per_class = None
if R_sobol_method == 'sobolEff':
    # Load the sobol indices (first and total) and convert it to a dictionary
    R_first_order_sobol_per_class_dict = {}
    R_total_order_sobol_per_class_dict = {}
    for classId in class_list:
        R_current_class_first_order_sobol_indices = read_csv_to_pd_df(path_join(output_path, R_sobol_method+'_fs_'+str(classId)+csv_ext))
        R_current_class_total_order_sobol_indices = read_csv_to_pd_df(path_join(output_path, R_sobol_method+'_tt_'+str(classId)+csv_ext))
        R_first_order_sobol_per_class_dict[classId] = R_current_class_first_order_sobol_indices['S.original'].to_dict()
        R_total_order_sobol_per_class_dict[classId] = R_current_class_total_order_sobol_indices['S.original'].to_dict()
    # Get the important variables per class
    important_variables_per_class = important_variables_R_first_and_total_order_analysis(R_first_order_sobol_per_class_dict, 
                                                                                         R_total_order_sobol_per_class_dict,
                                                                                         class_list, filter_method=sobol_filter_method)

In [None]:
# Build the dataframe that contains the number of neurons
determined_nb_important_vars_per_class = []
for classId in important_variables_per_class:
    class_nb_important_vars = len(list(important_variables_per_class[classId].keys()))
    determined_nb_important_vars_per_class.append([classId, nb_vars, class_nb_important_vars])
determined_nb_important_vars_per_class_df = pd.DataFrame(determined_nb_important_vars_per_class, columns=['classId', 'nb_neurons', 'nb_important_neurons'])

In [None]:
# Save the important variables
store_dict_as_json(path_join(output_path, trained_resnet_name+'_important_neurons.json'), important_variables_per_class)
save_df_to_csv(path_join(output_path, trained_resnet_name+'_nb_important_neurons.csv'), determined_nb_important_vars_per_class_df)

### *Likelihood calculation based only on the important variables*

In [None]:
# Build the sorted total important variable(neuron) indices
sorted_important_var_by_class = {}
for classId in important_variables_per_class:
    sorted_important_var_by_class[classId] = sorted(list(important_variables_per_class[classId]))
# Build the mapping dictionary to modify neuron indices
important_var_map_dict_by_class = {}
for classId in sorted_important_var_by_class:
    important_var_map_dict_by_class[classId] = build_map_to_index_dict(sorted_important_var_by_class[classId])

In [None]:
# Take the activation levels of the important variables for each class
built_train_actLevels_by_class = build_actLevel_important_vars(all_train_actLevels, sorted_important_var_by_class, last_hidden_layerId)
built_test_actLevels_by_class = build_actLevel_important_vars(test_actLevels, sorted_important_var_by_class, last_hidden_layerId)

In [None]:
## Take the training set distribution only for the important neurons
# Read the training set distribution
train_whole_distribution = pd.read_csv(train_distribution_filename, sep=' ')
# Take the distribution for different classes
train_important_var_distrib_by_class = {}
for classId in sorted_important_var_by_class:
    # Get the important variables of the current class
    current_important_neuron_indices = sorted_important_var_by_class[classId]
    # Take only the last hidden layer distribution (We calculate the likelihood only based on this)
    train_whole_last_hidden_distribution = train_whole_distribution[train_whole_distribution['layerId'] == last_hidden_layerId].copy(deep=True)
    # Filter the distribution on the last hidden layer
    important_var_train_whole_last_hidden_distribution = train_whole_last_hidden_distribution[train_whole_last_hidden_distribution['nodeId'].isin(current_important_neuron_indices)]
    # Map the node Id
    train_important_var_distrib_by_class[classId] = important_var_train_whole_last_hidden_distribution.replace({"nodeId": important_var_map_dict_by_class[classId]}).reset_index(drop=True)

In [None]:
# Save the temporarily generated distribution files by class
for classId in train_important_var_distrib_by_class:
    save_df_to_csv(path_join(output_path, 'distribution_important_var_class_'+str(classId)+csv_ext), train_important_var_distrib_by_class[classId], sep=' ')

In [None]:
# Calculate the likelihood based on only the important variables
train_whole_likelihood_important_vars = calculate_likelihood_with_important_vars(output_path, built_train_actLevels_by_class,
                                             last_hidden_layerId)
test_whole_likelihood_important_vars = calculate_likelihood_with_important_vars(output_path, built_test_actLevels_by_class,
                                             last_hidden_layerId)

In [None]:
# Get only the correctly predicted examples
correct_train_whole_likelihood_important_var = train_whole_likelihood_important_vars[train_whole_likelihood_important_vars['classId'] == train_whole_likelihood_important_vars['predicted_classId']]
# Calculate the distance (with only important variables) information
class_dist_infos_important_var = build_layer_train_set_infos(correct_train_whole_likelihood_important_var, class_list)
# Register the calculated information
store_dict_as_json(path_join(output_path, 'train_dist_infos_important_var'+json_ext), class_dist_infos_important_var)

In [None]:
# Normalize the distances
train_whole_likelihood_important_vars = normalize_whole_distances(train_whole_likelihood_important_vars, class_dist_infos_important_var)
test_whole_likelihood_important_vars = normalize_whole_distances(test_whole_likelihood_important_vars, class_dist_infos_important_var)

In [None]:
# Find the decisions to be filtered
train_filtered_index_important_var = filter_decision_based_on_train_infos_norm_ver(train_whole_likelihood_important_vars, class_dist_infos_important_var, std_threshold_coeff)
test_filtered_index_important_var = filter_decision_based_on_train_infos_norm_ver(test_whole_likelihood_important_vars, class_dist_infos_important_var, std_threshold_coeff)

In [None]:
# Original CIFAR-10 dataset filtering results (with all neurons)
# Accuracy evaluation on the filtering
built_cifar10_original_important_vars_filtering_result = []
print('Evaluation with only the important neurons:')
print('Training set:')
built_cifar10_original_important_vars_filtering_result.append(['train',
                                                *evaluate_filtering(train_whole_likelihood_important_vars, train_filtered_index_important_var, 'train')])
print()
print('Test set:')
built_cifar10_original_important_vars_filtering_result.append(['test',
                                                *evaluate_filtering(test_whole_likelihood_important_vars, test_filtered_index_important_var, 'test')])
# Build the result dataframe
cifar10_original_important_vars_OOD_df = pd.DataFrame(built_cifar10_original_important_vars_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_cifar10_important_vars_OOD_result.csv'), cifar10_original_important_vars_OOD_df)

In [None]:
# Original CIFAR-10 dataset filtering results (with all neurons)
# Accuracy evaluation on the filtering
built_cifar10_original_filtering_result = []
print('Evaluation with all the neurons:')
print('Training set:')
built_cifar10_original_filtering_result.append(['train', *evaluate_filtering(all_train_whole_distances, all_train_filtered_index, 'train')])
print()
print('Test set:')
built_cifar10_original_filtering_result.append(['test', *evaluate_filtering(test_whole_distances, test_filtered_index, 'test')])
# Build the result dataframe
cifar10_original_OOD_df = pd.DataFrame(built_cifar10_original_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_cifar10_OOD_result.csv'), cifar10_original_OOD_df)

In [None]:
# Save the train and test set whole likelihood distances
save_df_to_csv(path_join(output_path, trained_resnet_name+'_train_sobol_whole_likelihood.csv'), train_whole_likelihood_important_vars)
save_df_to_csv(path_join(output_path, trained_resnet_name+'_test_sobol_whole_likelihood.csv'), test_whole_likelihood_important_vars)
save_df_to_csv(path_join(output_path, trained_resnet_name+'_train_whole_likelihood.csv'), all_train_whole_distances)
save_df_to_csv(path_join(output_path, trained_resnet_name+'_test_whole_likelihood.csv'), test_whole_distances)

### *Cifar10 novelty experiments*

In [None]:
# Get the svhn dataset
svhn_train_dataset, svhn_test_dataset = get_svhn_dataset_without_transform()

In [None]:
# Dataloader building
# svhn_train_loader = create_loader_from_torch_dataset(svhn_train_dataset, batch_size=torch_batch_size, shuffle=False, num_workers=0)
svhn_test_loader = create_loader_from_torch_dataset(svhn_test_dataset, batch_size=torch_batch_size, shuffle=False, num_workers=0)

In [None]:
# Get the dtd dataset
dtd_train_dataset, dtd_test_dataset = get_dtd_dataset_resized()

In [None]:
# # Convert the train set to numpy array
# no_divide_into_batch_dtd_train_loader = create_loader_from_torch_dataset(dtd_train_dataset, batch_size=len(dtd_train_dataset), shuffle=False, num_workers=0)
# X_train_dtd = next(iter(no_divide_into_batch_dtd_train_loader))[0].numpy()
# y_train_dtd = next(iter(no_divide_into_batch_dtd_train_loader))[1].numpy()

In [None]:
# Convert the test set to numpy array
no_divide_into_batch_dtd_test_loader = create_loader_from_torch_dataset(dtd_test_dataset, batch_size=len(dtd_test_dataset), shuffle=False, num_workers=0)
X_test_dtd = next(iter(no_divide_into_batch_dtd_test_loader))[0].numpy()
y_test_dtd = next(iter(no_divide_into_batch_dtd_test_loader))[1].numpy()

In [None]:
# Build the dtd loaders (using random original labels (because they are not important))
# dtd_train_loader = create_dataloader(X_train_dtd, np.random.randint(0, data_set_infos['nb_classes'], y_train_dtd.shape[0]), 
#                                      batch_size=torch_batch_size, shuffle=False, type_conversion=True)
dtd_test_loader = create_dataloader(X_test_dtd, np.random.randint(0, data_set_infos['nb_classes'], y_test_dtd.shape[0]), 
                                     batch_size=torch_batch_size, shuffle=False, type_conversion=True)

In [None]:
# Get the places365 dataset
places_test_dataset = get_places_test_dataset_resized()

In [None]:
# Convert the test set to numpy array
no_divide_into_batch_places_test_loader = create_loader_from_torch_dataset(places_test_dataset, batch_size=len(places_test_dataset), shuffle=False, num_workers=0)
X_test_places = next(iter(no_divide_into_batch_places_test_loader))[0].numpy()
y_test_places = next(iter(no_divide_into_batch_places_test_loader))[1].numpy()

In [None]:
# Build the places365 loaders (using random original labels (because they are not important))
places_test_loader = create_dataloader(X_test_places, np.random.randint(0, data_set_infos['nb_classes'], y_test_places.shape[0]), 
                                     batch_size=torch_batch_size, shuffle=False, type_conversion=True)

In [None]:
# Build the dictionary that contains all the OOD dataset loaders
novelty_loaders = {}
novelty_loaders['svhn'] = svhn_test_loader
novelty_loaders['dtd'] = dtd_test_loader
novelty_loaders['places'] = places_test_loader

In [None]:
# Iterate over the OMS datasets for generating the normalized feature vectors
novelty_actLevels = {}
for ood_type in novelty_loaders:
    novelty_actLevels[ood_type] = obtain_activation_levels(trained_resnet,
                                                           novelty_loaders[ood_type], ood_type + ' test',
                                                           with_predict_class=True, loss_type='cross_entropy')

In [None]:
## Whole likelihood distances experiment
# Calculate the likelihood on the last hidden layer
novelty_whole_distances = {}
for ood_type in novelty_actLevels:
    novelty_whole_distances[ood_type] = layer_whole_likelihood_experim(novelty_actLevels[ood_type], last_hidden_layerId,
                               train_distribution_filename, class_list, use_absolute_module_path=True)

In [None]:
# Normalize the distances
for ood_type in novelty_whole_distances:
    novelty_whole_distances[ood_type] = normalize_whole_distances(novelty_whole_distances[ood_type], class_dist_infos)

In [None]:
# Save the whole likelihood dataframes
for ood_type in novelty_whole_distances:
    save_df_to_csv(path_join(output_path, trained_resnet_name+'_'+ood_type+'_whole_likelihood.csv'), novelty_whole_distances[ood_type])

In [None]:
# Likelihood filtering
novelty_filtered_index = {}
for ood_type in novelty_whole_distances:
    novelty_filtered_index[ood_type] = filter_decision_based_on_train_infos_norm_ver(novelty_whole_distances[ood_type], class_dist_infos, std_threshold_coeff)

In [None]:
# Accuracy evaluation on the filtering
built_novelty_filtering_result = []
for ood_type in novelty_filtered_index:
    print(ood_type, 'set:')
    novelty_filtering_result = evaluate_filtering(novelty_whole_distances[ood_type], novelty_filtered_index[ood_type], ood_type)
    print()
    built_novelty_filtering_result.append([ood_type, *novelty_filtering_result])
# Build the result dataframe
novelty_OOD_df = pd.DataFrame(built_novelty_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_novelty_OOD_result.csv'), novelty_OOD_df)

In [None]:
# Take the activation levels of the important variables for each class
built_novelty_actLevels_by_class = {}
for ood_type in novelty_actLevels:
    built_novelty_actLevels_by_class[ood_type] = build_actLevel_important_vars(novelty_actLevels[ood_type], sorted_important_var_by_class, last_hidden_layerId)

In [None]:
# Generate the whole likelihood with the important variables
novelty_whole_likelihood_important_vars = {}
for ood_type in built_novelty_actLevels_by_class:
    novelty_whole_likelihood_important_vars[ood_type] = calculate_likelihood_with_important_vars(output_path, built_novelty_actLevels_by_class[ood_type],
                                                 last_hidden_layerId)

In [None]:
# Normalize the distances
for ood_type in novelty_whole_likelihood_important_vars:
    novelty_whole_likelihood_important_vars[ood_type] = normalize_whole_distances(novelty_whole_likelihood_important_vars[ood_type], class_dist_infos_important_var)

In [None]:
# Save the whole likelihood dataframes
for ood_type in novelty_whole_likelihood_important_vars:
    save_df_to_csv(path_join(output_path, trained_resnet_name+'_'+ood_type+'_sobol_whole_likelihood.csv'), novelty_whole_likelihood_important_vars[ood_type])

In [None]:
# Likelihood filtering
novelty_filtered_index_important_vars = {}
for ood_type in novelty_whole_likelihood_important_vars:
    novelty_filtered_index_important_vars[ood_type] = filter_decision_based_on_train_infos_norm_ver(novelty_whole_likelihood_important_vars[ood_type], class_dist_infos_important_var, std_threshold_coeff)

In [None]:
# Accuracy evaluation on the filtering
built_novelty_important_vars_filtering_result = []
for ood_type in novelty_filtered_index_important_vars:
    print(ood_type, 'set:')
    novelty_important_vars_filtering_result = evaluate_filtering(novelty_whole_likelihood_important_vars[ood_type], novelty_filtered_index_important_vars[ood_type], ood_type)
    print()
    built_novelty_important_vars_filtering_result.append([ood_type, *novelty_important_vars_filtering_result])
# Build the result dataframe
novelty_important_var_OOD_df = pd.DataFrame(built_novelty_important_vars_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_novelty_important_vars_OOD_result.csv'), novelty_important_var_OOD_df)

### *Cifar10-c experiments*

In [None]:
## Read the cifar10-c dataset
# Get the content in the folder
cifar10_c_data_files = [file for file in contents_of_folder(cifar10_c_path) if np_ext in file]
# The number of images at each level
nb_image_by_level = 10000
# Load all the files
load_cifar10_c = {}
for file in cifar10_c_data_files:
    # We always use the transformation_type as variable name for the ease of coding (even if it could be just "labels")  
    file_type = str_first_part_split_from_r(file)
    current_data = np.load(path_join(cifar10_c_path, file))
    nb_batchs = current_data.shape[0] / nb_image_by_level
    for index in range(int(nb_batchs)):
        current_batch_data = current_data[index*nb_image_by_level:(index+1)*nb_image_by_level]
        load_cifar10_c[file_type+'_s'+str(index+1)] = current_batch_data

In [None]:
# Show a CIFAR10-c example
plt.imshow(load_cifar10_c['frost_s5'][3400], interpolation='nearest')
plt.show()
print(load_cifar10_c['labels_s5'][3400])

In [None]:
# Build the dataloader and evaluate the activation levels
cifar10_c_actLevels = {}
for transformation_type in load_cifar10_c:
    if 'labels' not in transformation_type:
        # Get the severe level
        severity = str_second_part_split_from_r(transformation_type, delimiter='_')
        # Get the current image array
        transformed_image_array = load_cifar10_c[transformation_type]
        # Reshape the numpy array to satisfy pytorch model requirements     
        pytorch_transformed_image_array = transformed_image_array.transpose(0,3,1,2)
        # Normalize the pixel values to (0,1) range
        pytorch_transformed_image_array = pytorch_transformed_image_array / image_max_pix_val
        # Build the loader         
        current_transformed_loader = create_dataloader(pytorch_transformed_image_array, load_cifar10_c['labels_'+severity],
                                                                   torch_batch_size, shuffle=False, type_conversion=True)
        # Evaluate the activation levels
        cifar10_c_actLevels[transformation_type] = obtain_activation_levels(trained_resnet,
                                           current_transformed_loader, transformation_type, with_predict_class=True, loss_type='cross_entropy')

In [None]:
## Whole likelihood distances experiment
# Calculate the likelihood on the last hidden layer
cifar10_c_whole_distances = {}
for transformation_type in cifar10_c_actLevels:
    cifar10_c_whole_distances[transformation_type] = layer_whole_likelihood_experim(cifar10_c_actLevels[transformation_type], last_hidden_layerId,
                               train_distribution_filename, class_list, use_absolute_module_path=True)

In [None]:
# Normalize the distances
for transformation_type in cifar10_c_whole_distances:
    cifar10_c_whole_distances[transformation_type] = normalize_whole_distances(cifar10_c_whole_distances[transformation_type], class_dist_infos)

In [None]:
# Save the whole likelihood dataframes
for transformation_type in cifar10_c_whole_distances:
    save_df_to_csv(path_join(output_path, trained_resnet_name+'_'+transformation_type+'_whole_likelihood.csv'), cifar10_c_whole_distances[transformation_type])

In [None]:
# Likelihood filtering
cifar10_c_filtered_index = {}
for transformation_type in cifar10_c_whole_distances:
    cifar10_c_filtered_index[transformation_type] = filter_decision_based_on_train_infos_norm_ver(cifar10_c_whole_distances[transformation_type], class_dist_infos, std_threshold_coeff)

In [None]:
# Accuracy evaluation on the filtering
built_cifar10_c_filtering_result = []
for transformation_type in cifar10_c_filtered_index:
    print(transformation_type, 'set:')
    cifar10_c_filtering_result = evaluate_filtering(cifar10_c_whole_distances[transformation_type], cifar10_c_filtered_index[transformation_type], transformation_type)
    print()
    built_cifar10_c_filtering_result.append([transformation_type, *cifar10_c_filtering_result])
# Build the result dataframe
cifar10_c_OOD_df = pd.DataFrame(built_cifar10_c_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_cifar10_c_OOD_result.csv'), cifar10_c_OOD_df)

In [None]:
# Take the activation levels of the important variables for each class
built_cifar10_c_actLevels_by_class = {}
for transformation_type in cifar10_c_actLevels:
    built_cifar10_c_actLevels_by_class[transformation_type] = build_actLevel_important_vars(cifar10_c_actLevels[transformation_type], sorted_important_var_by_class, last_hidden_layerId)

In [None]:
# Generate the whole likelihood with the important variables
cifar10_c_whole_likelihood_important_vars = {}
for transformation_type in built_cifar10_c_actLevels_by_class:
    cifar10_c_whole_likelihood_important_vars[transformation_type] = calculate_likelihood_with_important_vars(output_path, built_cifar10_c_actLevels_by_class[transformation_type],
                                                 last_hidden_layerId)

In [None]:
# Normalize the distances
for transformation_type in cifar10_c_whole_likelihood_important_vars:
    cifar10_c_whole_likelihood_important_vars[transformation_type] = normalize_whole_distances(cifar10_c_whole_likelihood_important_vars[transformation_type], class_dist_infos_important_var) 

In [None]:
# Save the whole likelihood dataframes
for transformation_type in cifar10_c_whole_likelihood_important_vars:
    save_df_to_csv(path_join(output_path, trained_resnet_name+'_'+transformation_type+'_sobol_whole_likelihood.csv'), cifar10_c_whole_likelihood_important_vars[transformation_type])

In [None]:
# Likelihood filtering
cifar10_c_filtered_index_important_vars = {}
for transformation_type in cifar10_c_whole_likelihood_important_vars:
    cifar10_c_filtered_index_important_vars[transformation_type] = filter_decision_based_on_train_infos_norm_ver(cifar10_c_whole_likelihood_important_vars[transformation_type], class_dist_infos_important_var, std_threshold_coeff)

In [None]:
# Accuracy evaluation on the filtering
built_cifar10_c_important_vars_filtering_result = []
for transformation_type in cifar10_c_filtered_index_important_vars:
    print(transformation_type, 'set:')
    cifar10_c_important_vars_filtering_result = evaluate_filtering(cifar10_c_whole_likelihood_important_vars[transformation_type], cifar10_c_filtered_index_important_vars[transformation_type], transformation_type)
    print()
    built_cifar10_c_important_vars_filtering_result.append([transformation_type, *cifar10_c_important_vars_filtering_result])
# Build the result dataframe
cifar10_c_important_var_OOD_df = pd.DataFrame(built_cifar10_c_important_vars_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_cifar10_c_important_vars_OOD_result.csv'), cifar10_c_important_var_OOD_df)

### *Cifar10 adversarial attack experiments*

In [None]:
# The registrered original dataset for the attacks
original_X = None
original_y = None

In [None]:
## Read the adversarial attacks
# Find the loaded trained resnet attack path
trained_resnet_attack_path = None
for attack_folder in contents_of_folder(adv_attack_path):
    if trained_resnet_name in attack_folder:
        trained_resnet_attack_path = path_join(adv_attack_path, attack_folder)
        break
# Load all the attacks
loaded_attacks = {}
for attack_set in contents_of_folder(trained_resnet_attack_path):
    current_attack_type = str_first_part_split_from_l(attack_set)
    current_attack_set_path = path_join(trained_resnet_attack_path, attack_set)
    if current_attack_type == 'original':
        X_file_path = path_join(current_attack_set_path, 'X.npy')
        y_file_path = path_join(current_attack_set_path, 'y.npy')
        original_X = np.load(X_file_path)
        original_y = np.load(y_file_path)
    else:
        attack_file_path = path_join(current_attack_set_path, contents_of_folder(current_attack_set_path)[0])
        loaded_attacks[current_attack_type] = np.load(attack_file_path)

In [None]:
# Build the attack loaders
attack_loaders = {}
for attack_type in loaded_attacks:
    attack_loaders[attack_type] = create_dataloader(loaded_attacks[attack_type], original_y, torch_batch_size, shuffle=False, type_conversion=True)

In [None]:
# Evaluate the attacks
for attack_type in attack_loaders:
    accuracy_eval(trained_resnet, attack_loaders[attack_type], set_name=attack_type)

In [None]:
# Evaluate the activation levels
attack_actLevels = {}
for attack_type in attack_loaders:
    attack_actLevels[attack_type] = obtain_activation_levels(trained_resnet,
                                       attack_loaders[attack_type], attack_type, with_predict_class=True, loss_type='cross_entropy')

In [None]:
## Whole likelihood distances experiment
# Calculate the likelihood on the last hidden layer
attack_whole_distances = {}
for attack_type in attack_actLevels:
    attack_whole_distances[attack_type] = layer_whole_likelihood_experim(attack_actLevels[attack_type], last_hidden_layerId,
                               train_distribution_filename, class_list, use_absolute_module_path=True)

In [None]:
# Normalize the distances
for attack_type in attack_whole_distances:
    attack_whole_distances[attack_type] = normalize_whole_distances(attack_whole_distances[attack_type], class_dist_infos)

In [None]:
# Save the whole likelihood dataframes
for attack_type in attack_whole_distances:
    save_df_to_csv(path_join(output_path, trained_resnet_name+'_'+attack_type+'_whole_likelihood.csv'), attack_whole_distances[attack_type])

In [None]:
# Likelihood filtering
attack_filtered_index = {}
for attack_type in attack_whole_distances:
    attack_filtered_index[attack_type] = filter_decision_based_on_train_infos_norm_ver(attack_whole_distances[attack_type], class_dist_infos, std_threshold_coeff)

In [None]:
# Accuracy evaluation on the filtering
built_attack_filtering_result = []
for attack_type in attack_filtered_index:
    print(attack_type, 'set:')
    attack_filtering_result = evaluate_filtering(attack_whole_distances[attack_type], attack_filtered_index[attack_type], attack_type)
    print()
    built_attack_filtering_result.append([attack_type, *attack_filtering_result])
# Build the result dataframe
attack_OOD_df = pd.DataFrame(built_attack_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_attack_OOD_result.csv'), attack_OOD_df)

In [None]:
# Take the activation levels of the important variables for each class
built_attack_actLevels_by_class = {}
for attack_type in attack_actLevels:
    built_attack_actLevels_by_class[attack_type] = build_actLevel_important_vars(attack_actLevels[attack_type], sorted_important_var_by_class, last_hidden_layerId)

In [None]:
# Generate the whole likelihood with the important variables
attack_whole_likelihood_important_vars = {}
for attack_type in built_attack_actLevels_by_class:
    attack_whole_likelihood_important_vars[attack_type] = calculate_likelihood_with_important_vars(output_path, built_attack_actLevels_by_class[attack_type],
                                                 last_hidden_layerId)

In [None]:
# Normalize the distances
for attack_type in attack_whole_likelihood_important_vars:
    attack_whole_likelihood_important_vars[attack_type] = normalize_whole_distances(attack_whole_likelihood_important_vars[attack_type], class_dist_infos_important_var)

In [None]:
# Save the whole likelihood dataframes
for attack_type in attack_whole_likelihood_important_vars:
    save_df_to_csv(path_join(output_path, trained_resnet_name+'_'+attack_type+'_sobol_whole_likelihood.csv'), attack_whole_likelihood_important_vars[attack_type])

In [None]:
# Likelihood filtering
attack_filtered_index_important_vars = {}
for attack_type in attack_whole_likelihood_important_vars:
    attack_filtered_index_important_vars[attack_type] = filter_decision_based_on_train_infos_norm_ver(attack_whole_likelihood_important_vars[attack_type], class_dist_infos_important_var, std_threshold_coeff)

In [None]:
# Accuracy evaluation on the filtering
built_attack_important_vars_filtering_result = []
for attack_type in attack_filtered_index_important_vars:
    print(attack_type, 'set:')
    attack_important_vars_filtering_result = evaluate_filtering(attack_whole_likelihood_important_vars[attack_type], attack_filtered_index_important_vars[attack_type], attack_type)
    print()
    built_attack_important_vars_filtering_result.append([attack_type, *attack_important_vars_filtering_result])
# Build the result dataframe
attack_important_var_OOD_df = pd.DataFrame(built_attack_important_vars_filtering_result, columns=column_names_OOD_filtering)
# Save the result dataframe
save_df_to_csv(path_join(output_path, trained_resnet_name+'_attack_important_vars_OOD_result.csv'), attack_important_var_OOD_df)