# Test Parallel ExIFFI

Host capri Davide

    Hostname capri.dei.unipd.it
    User p1026u27

In [7]:
import sys
import numpy as np
import pandas as pd
from tqdm import trange
from append_dir import append_dirname
append_dirname('ExIFFI')
from utils.utils import partition_data
from utils.feature_selection import *
#from plot import *
#from simulation_setup import *
from models import *
from models.Extended_IF import *
from models.Extended_DIFFI_parallel import *
from models.Extended_DIFFI_original import *
import math
import seaborn as sns
sns.set()

from sklearn.preprocessing import StandardScaler
import time

import os
import pickle 
from scipy.io import loadmat
from glob import glob

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

## Set up file paths

In [15]:
path = os.getcwd()
path = os.path.dirname(path)
path_real = os.path.join(path, "data", "real")
mat_files_real = glob(os.path.join(path_real, "*.mat"))
mat_file_names_real = {os.path.basename(x).split(".")[0]: x for x in mat_files_real}
csv_files_real = glob(os.path.join(path_real, "*.csv"))
csv_file_names_real = {os.path.basename(x).split(".")[0]: x for x in csv_files_real}
dataset_names = list(mat_file_names_real.keys()) + list(csv_file_names_real.keys())
mat_file_names_real.update(csv_file_names_real)
dataset_paths = mat_file_names_real.copy()

## Utility Functions

Drop Duplicates from the loaded dataset 

In [16]:
def drop_duplicates(X, y):
    S = np.c_[X, y]
    S = pd.DataFrame(S).drop_duplicates().to_numpy()
    X, y = S[:, :-1], S[:, -1]
    return X, y

Load dataset coming from a `.mat` file 

In [17]:
def load_data(path):
    data = loadmat(path)
    X, y = data["X"], data["y"]
    y = np.hstack(y)
    X, y = drop_duplicates(X, y)
    return X, y

Load dataset coming from a `.csv` file

In [18]:
def load_data_csv(path):
    data = pd.read_csv(path, index_col=0)
    if "Unnamed: 0" in data.columns:
        data = data.drop(columns=["Unnamed: 0"])

    X = data[data.columns[data.columns != "Target"]]
    y = data["Target"]

    X, y = drop_duplicates(X, y)

    return X, y

Load the data (with `load_data` or with `load_data_csv`), scale the data and split it into train and test set obtaining `X_train`, `X_test` that will be passed to `compute_imps`. 

In [19]:
def pre_process(path):
    extension = os.path.splitext(path)[1]

    if extension == ".csv":
        X, y = load_data_csv(path)
    elif extension == ".mat":
        X, y = load_data(path)
    else:
        raise ValueError("Extension not supported")

    X_train, X_test = partition_data(X, y)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    X_test = np.r_[X_train, X_test]

    return X_train, X_test

Compute the Global Importance of a given dataset `n_runs` times. At the end a matrix with shape `(n_runs, n_features)` is returned. Each row contains the global importance of the features for a given run.

In [20]:
def compute_imps(model, X_train, X_test, n_runs):

    X_test=np.r_[X_train,X_test]

    imps = np.zeros(shape=(n_runs, X_train.shape[1]))
    for i in tqdm(range(n_runs)):
        model.fit(X_train)
        imps[i, :] = model.Global_importance(
            X_test, calculate=True, overwrite=False, depth_based=False
        )

    return imps

### `test_exiffi`

This is the function called in the `main` of `test_parallel.py` used to do the experiments on the CAPRI HPC server. For a given set of datasets it computes the global importance `n_runs` times using `Extended_DIFFI_parallel` or `Extended_DIFFI_original`and saves the importances matrices, the time stats obtained and the test arguments in a `.npz` file.

#### `test_exiffi` Parameters

- `X_train`: the train set
- `X_test`: the test set
- `savedir`: directory where to save the results in `.npz` format
- `n_runs`: number of runs to do
- `seed`: random seed to obtain reproducibile results and compare the importances matrices obtaind from the parallel and the serial version of the algorithm (they must be the same to certify the correctness of the parallel version)
- `parallel`: Boolean variable used to choose between the parallel and the serial version of the algorithm
- `n_cores`: Number of threads to use in the parallel version of the algorithm. This coincides with the number of cores set with the `--cpus-per-task` options in the `.job` file
- `num_trees`: Number of trees used by ExIFFI. The higher the more complex and more computationally expensive the algorithm is
- `name`: Name of the dataset

In [8]:
def test_exiffi(
    X_train,
    X_test,
    savedir,
    n_runs=10,
    seed=None,
    parallel=False,
    n_cores=2,
    num_trees=300,
    name="",
):
    args_to_avoid = ["X_train", "X_test", "savedir", "args_to_avoid", "args"]
    args = dict()
    for k, v in locals().items():
        if k in args_to_avoid:
            continue
        args[k] = v

    ex_time = []
    ex_imps = {}

    for i in trange(n_runs):
        seed = None if seed is None else seed + i

        if parallel:
            EDIFFI = Extended_DIFFI_parallel(
                n_trees=num_trees, max_depth=100, subsample_size=256, plus=1, seed=seed
            )
            EDIFFI.set_num_processes(n_cores, n_cores)
        else:
            EDIFFI = Extended_DIFFI_original(
                n_trees=num_trees, max_depth=100, subsample_size=256, plus=1, seed=seed
            )

        start = time.time()
        imps = compute_imps(EDIFFI, X_train, X_test, 10)
        ex_imps["Execution " + str(i)] = imps
        end = time.time()
        ex_time.append(end - start)

    # print(ex_imps)
    time_stat = {"mean": np.mean(ex_time), "std": np.std(ex_time)}
    filename = "test_stat_parallel.npz" if parallel else "test_stat_serial.npz"
    t = time.localtime()
    current_time = time.strftime("%d-%m-%Y_%H-%M-%S", t)
    filename = current_time + "_" + name + "_" + filename

    # if dir does not exist, create it
    if not os.path.exists(savedir):
        os.makedirs(savedir)
    filepath = os.path.join(savedir, filename)

    np.savez(
        filepath,
        execution_time_stat=time_stat,
        importances_matrix=ex_imps,
        arguments=args,
    )

## Load Data

## Wine Dataset

In [9]:
name='wine'
X,y=load_data(dataset_paths[name])
X_train,X_test=partition_data(X,y)
X.shape,y.shape

((129, 13), (129,))

### Serial ExIFFI

In [13]:
test_exiffi(
    X_train=X_train,
    X_test=X_test,
    savedir='../results/npz',
    n_runs=1,
    seed=120,
    parallel=False,
    n_cores=12,
    num_trees=10,
    name=name,
)

100%|██████████| 10/10 [00:01<00:00,  5.02it/s]
100%|██████████| 1/1 [00:01<00:00,  2.00s/it]


### Parallel ExIFFI

In [None]:
test_exiffi(
    X_train=X_train,
    X_test=X_test,
    savedir='../results/npz',
    n_runs=1,
    seed=120,
    parallel=True,
    n_cores=12,
    num_trees=200,
    name=name,
)

## Ionosphere Dataset

In [12]:
name='ionosphere'
X,y=load_data(dataset_paths[name])
X_train,X_test=partition_data(X,y)
X.shape,y.shape

((350, 33), (350,))

### Serial ExIFFI

In [None]:
test_exiffi(
    X_train=X_train,
    X_test=X_test,
    savedir='../results/npz',
    n_runs=1,
    seed=120,
    parallel=False,
    n_cores=12,
    num_trees=10,
    name=name,
)

### Parallel ExIFFI

In [None]:
test_exiffi(
    X_train=X_train,
    X_test=X_test,
    savedir='../results/npz',
    n_runs=1,
    seed=120,
    parallel=True,
    n_cores=12,
    num_trees=200,
    name=name,
)

## Moodify Dataset

In [26]:
name='moodify'
X,y=load_data_csv(dataset_paths[name])
X_train,X_test=partition_data(X,y)
X.shape,y.shape

((276260, 11), (276260,))

# Test Results 

## Parallel

In [9]:
stats=np.load('26-01-2024_17-42-35_test_stat_parallel_7000.npz',allow_pickle=True)
data_parallel=stats['importances_matrix'].tolist()
time_data_parallel=stats['execution_time_stat']
arguments_parallel=stats['arguments'].tolist()

In [12]:
print(arguments_parallel.keys())

args_to_avoid = ["X_train", "X_test", "X"]
for key in arguments_parallel.keys():
    if key not in args_to_avoid:
        print(key,arguments_parallel[key])

dict_keys(['X_train', 'X_test', 'X', 'n_runs', 'seed', 'parallel', 'n_cores'])
n_runs 2
seed None
parallel True
n_cores 8


In [33]:
time_data_parallel

array({'mean': 3.36362202167511, 'std': 0.23129910849918273}, dtype=object)

In [34]:
data_parallel.keys()

dict_keys(['Execution 0', 'Execution 1', 'Execution 2', 'Execution 3', 'Execution 4', 'Execution 5', 'Execution 6', 'Execution 7', 'Execution 8', 'Execution 9'])

## Serial

In [3]:
stats=np.load('test_stat_serial.npz',allow_pickle=True)
data_serial=stats['importances_matrix'].tolist()
time_data_serial=stats['execution_time_stat']

FileNotFoundError: [Errno 2] No such file or directory: 'test_stat_serial.npz'

In [30]:
time_data_serial

array({'mean': 3.5636572360992433, 'std': 0.5714168745774968},
      dtype=object)

In [31]:
data_serial.keys()

dict_keys(['Execution 0', 'Execution 1', 'Execution 2', 'Execution 3', 'Execution 4', 'Execution 5', 'Execution 6', 'Execution 7', 'Execution 8', 'Execution 9'])

Check if `data_parallel` and `data_serial` are equal


In [36]:
for k in data_serial.keys():
    print(np.sum(data_serial[k]-data_parallel[k]))

-5.995204332975845e-14
1.532107773982716e-13
-3.4638958368304884e-13
7.327471962526033e-14
5.306866057708248e-13
-3.774758283725532e-13
-3.197442310920451e-13
1.3522516439934407e-12
-1.2434497875801753e-13
-1.0769163338864018e-12


## Results Thyroid

### Parallel

In [9]:
path_to_load = (
    "../capri_code/results/npz/28-01-2024_17-45-18_annthyroid_test_stat_parallel.npz"
)

stats = np.load(path_to_load, allow_pickle=True)

display(stats['execution_time_stat'])
display(stats['arguments'].tolist())

array({'mean': 89.01771640777588, 'std': 0.0}, dtype=object)

{'n_runs': 1,
 'seed': 120,
 'parallel': True,
 'n_cores': 2,
 'num_trees': 10,
 'name': 'annthyroid',
 'args_to_avoid': ['X_train', 'X_test', 'savedir'],
 'args': {...}}

Execution Time

In [6]:
wine_stats['execution_time_stat']

array({'mean': 22.09242186546326, 'std': 2.2678216673392386}, dtype=object)

In [9]:
wine_stats['arguments']

array({'n_runs': 10, 'seed': 120, 'parallel': True, 'n_cores': 12, 'num_trees': 300, 'name': 'wine', 'args_to_avoid': ['X_train', 'X_test', 'savedir'], 'args': {...}},
      dtype=object)

In [13]:
imp_mat_wine=wine_stats['importances_matrix'].tolist()
imp_mat_wine['Execution 2']

array([[1.51150254, 0.94012808, 0.77765704, 0.85334869, 0.92756969,
        1.01631053, 1.27235724, 1.37869805, 0.93163375, 1.2656863 ,
        1.18378956, 1.39478296, 1.38035484],
       [1.51150254, 0.94012808, 0.77765704, 0.85334869, 0.92756969,
        1.01631053, 1.27235724, 1.37869805, 0.93163375, 1.2656863 ,
        1.18378956, 1.39478296, 1.38035484],
       [1.51150254, 0.94012808, 0.77765704, 0.85334869, 0.92756969,
        1.01631053, 1.27235724, 1.37869805, 0.93163375, 1.2656863 ,
        1.18378956, 1.39478296, 1.38035484],
       [1.51150254, 0.94012808, 0.77765704, 0.85334869, 0.92756969,
        1.01631053, 1.27235724, 1.37869805, 0.93163375, 1.2656863 ,
        1.18378956, 1.39478296, 1.38035484],
       [1.51150254, 0.94012808, 0.77765704, 0.85334869, 0.92756969,
        1.01631053, 1.27235724, 1.37869805, 0.93163375, 1.2656863 ,
        1.18378956, 1.39478296, 1.38035484],
       [1.51150254, 0.94012808, 0.77765704, 0.85334869, 0.92756969,
        1.01631053, 1.27235

# All results

We use the script `process_results.py` to read the stats of the experiments from the `.npz` files and display them on a `pd.DataFrame` that can be saved as a `.csv` file.

In [20]:
for data in stats:
    print()
    break

breastw


In [2]:
from append_dir import append_dirname
append_dirname('ExIFFI')

from capri_code.process_results import load_stats, display_stats, compute_cpu_efficiency


results_dirpath = "../../container/job4/results/"

stats = load_stats(results_dirpath, use_pkl=True)

for i, row in stats.iterrows():
    n_cores = max([row["n_cores_fit"], row["n_cores_importance"], row["n_cores_anomaly"]])
    stats.loc[i, "cpu_efficiency"] = compute_cpu_efficiency(row["real_time"], row["user_time"], n_cores)
    

# display_stats(stats)
display_stats(stats.groupby("parallel").get_group(True))
display_stats(stats.groupby("parallel").get_group(False))

Unnamed: 0_level_0,n_cores_fit,n_cores_importance,n_cores_anomaly,n_runs,seed,parallel,n_trees,name,n_runs_imps,mean_time,std_time,mean_MB,std_MB,max_MB,real_time,user_time,sys_time,cpu_efficiency
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2024-01-30 15:47:36.088014,1,1,4,5,120,True,300,wine,5,65.135395,1.78364,421.945016,3.925508,428.290048,327.901,340.537,9.254,25.963401
2024-01-30 15:35:08.255912,4,1,1,5,120,True,300,wine,5,36.076435,0.999008,443.505377,11.205296,466.96448,182.849,347.223,10.888,47.474009
2024-01-30 15:57:14.231006,1,1,4,5,120,True,300,glass,5,115.173798,1.689243,489.580954,6.211631,500.228096,578.169,602.999,12.623,26.073648
2024-01-30 15:40:41.691636,4,1,1,5,120,True,300,glass,5,65.843605,0.978784,421.421711,24.829751,494.87872,331.75,615.674,14.859,46.395931
2024-01-30 16:50:32.736658,4,1,1,5,120,True,300,cardio,5,380.160016,1.962829,736.371671,33.649745,762.687488,1908.615,2271.178,24.292,29.749033


Unnamed: 0_level_0,n_cores_fit,n_cores_importance,n_cores_anomaly,n_runs,seed,parallel,n_trees,name,n_runs_imps,mean_time,std_time,mean_MB,std_MB,max_MB,real_time,user_time,sys_time,cpu_efficiency
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2024-01-30 17:07:52.333205,1,1,1,5,120,False,300,cardio,5,437.642479,18.793363,591.404073,9.619989,608.17408,2190.869,2184.656,5.743,99.716414
2024-01-30 16:22:03.105891,1,1,1,5,120,False,300,wine,5,65.463616,4.144099,404.238664,2.465636,407.486464,329.567,327.989,1.48,99.52119
2024-01-30 16:31:21.686496,1,1,1,5,120,False,300,glass,5,111.261842,7.624729,470.430024,8.476952,480.997376,558.595,555.479,2.952,99.442172
2024-01-30 15:49:56.673645,1,1,1,5,120,False,100,cardio,5,142.643763,8.523221,393.009725,1.83851,395.968512,716.112,712.554,1.992,99.50315
2024-01-30 15:34:38.531939,1,1,1,5,120,False,100,wine,5,22.670529,1.310402,326.859817,7.030727,333.770752,120.767,113.376,0.583,93.879951
2024-01-30 15:37:59.648176,1,1,1,5,120,False,100,glass,5,38.201773,1.994781,258.565734,48.463303,355.602432,199.292,192.154,1.794,96.418321


In [14]:
names = [
    "Bimodal",
    "Xaxis",
    "Yaxis",
    "Bisect",
    "Bisec3D",
    "Bisec6D",
    "Annthyroid",
    "Breastw",
    "Cardio",
    "Glass",
    "Ionosphere",
    "Pendigits",
    "Pima",
    "Shuttle",
    "Wine",
    "Diabetes",
    "Moodify",
]

sample_n = [
    400,
    1100,
    1100,
    1100,
    1100,
    1100,
    7200,
    683,
    1831,
    213,
    351,
    6870,
    768,
    49097,
    129,
    85916,
    276260,
]

feat_n = [
    2,
    6,
    6,
    6,
    6,
    6,
    6,
    9,
    21,
    9,
    33,
    16,
    8,
    9,
    13,
    4,
    11,
]

size = [(n, s*f) for n, s, f in zip(names, sample_n, feat_n)]

# order size with the second element
size.sort(key=lambda x: x[1], reverse=True)

for n, s in size:
    print(n, s)

Moodify 3038860
Shuttle 441873
Diabetes 343664
Pendigits 109920
Annthyroid 43200
Cardio 38451
Ionosphere 11583
Xaxis 6600
Yaxis 6600
Bisect 6600
Bisec3D 6600
Bisec6D 6600
Breastw 6147
Pima 6144
Glass 1917
Wine 1677
Bimodal 800


In [21]:
import numpy as np
from append_dir import append_dirname
append_dirname('ExIFFI')

from capri_code.process_results import load_stats, display_stats


results_dirpath = "../capri_code/results/npz/new/new"

stats = load_stats(results_dirpath)

# display_stats(stats)
display(stats)

imps_mat = np.array(stats.loc[0, "importances_matrix"])

print("imps_mat.shape", imps_mat.shape)


imp_mat_ex_0 = imps_mat[0]
print("imp_mat_ex_0.shape", imp_mat_ex_0.shape)

# for i in range(len(imp_mat_ex_0)-1):
#     print(imp_mat_ex_0[i] - imp_mat_ex_0[i+1])

imp_mat_ex_1 = imps_mat[1]

# for i in range(len(imp_mat_ex_1)):
#     print(imp_mat_ex_1[i] - imp_mat_ex_0[i])

# print(imps_mat[0] - imps_mat[1])
# print(imps_mat[1] - imps_mat[2])

KeyError: 'importances_matrix'

In [18]:
import numpy as np

def func(x):
    return x+10

a = np.ones((3))

# add newaxis to a
a = a[:, np.newaxis]

print(a)
print(a.shape)

output = np.apply_along_axis(func, 1, a)

print("output\n", output)

[[1.]
 [1.]
 [1.]]
(3, 1)
output
 [[11.]
 [11.]
 [11.]]


In [24]:
import time

# current datetime
import datetime

# datetime compatible with pandas dataframe rows
import pandas as pd
datetime_row_pandas = pd.Timestamp.now()

datetime_row_pandas

Timestamp('2024-01-29 14:52:25.715026')

### Test Subprocess

In [2]:
os.chdir('../../container/job2/')
os.getcwd()

'/home/davidefrizzo/Desktop/PHD/PHD COURSES/Parallel Computing HPC/HPC-Project-AD/container/job2'

In [8]:
! ./exec_parallel.sh

Executing Parallel python script:
############################################################
TESTING PARALLEL ExIFFI
############################################################
TEST PARAMETERS:
Number of runs: 1
Number of trees: 100
Number of cores: fit 8, importance 8, anomaly 8
Seed: 120
Parallel: True
############################################################
dataset_names ['wine']
############################################################
DATASET: wine
############################################################
Experiment:   0%|                                         | 0/1 [00:00<?, ?it/s]Execution 1
Set up Extended_DIFFI_parallel
Finished setting up Extended_DIFFI_parallel
Call compute_imps
shape of X_train: (119, 13)
shape of X_test: (129, 13)

Fit & Importances:   0%|                                 | 0/10 [00:00<?, ?it/s][AStart fit
End fit
Start Global Importance
Start computing Anomaly Score
End computing Anomaly Score
Start computing Importances Score
self.num_proc

In [10]:
import subprocess

# Your Python script code here

# Execute the time command and capture the output
time_output = subprocess.check_output(["./exec_parallel.sh"], tsderr=subprocess.STDOUT, text=True)

# Save the time output to a file
with open("time_output_err.txt", "w") as file:
    file.write(time_output)


AttributeError: module 'subprocess' has no attribute 'STDERR'

In [23]:
[i.split('\t') for i in time_output.split("\n")[-4:-1]]

[['real', '0m13.487s'], ['user', '0m52.816s'], ['sys', '0m7.766s']]

In [4]:
os.chdir('../capri_code/')
os.getcwd()

'/home/davidefrizzo/Desktop/PHD/PHD COURSES/Parallel Computing HPC/HPC-Project-AD/ExIFFI/capri_code'

In [10]:
import subprocess

# Your shell command as a string
shell_command = 'time python test_parallel.py --n_runs 1 --savedir ./results/npz/new  --n_trees 300  --dataset_names wine --n_cores 12 --seed 123 --n_runs_imps 1'

# Execute the command and capture the output
output = subprocess.check_output(shell_command, shell=True, text=True)

# Print or use the captured output as needed
print(output)


Experiment:   0%|          | 0/1 [00:00<?, ?it/s]
Fit & Importances:   0%|          | 0/1 [00:00<?, ?it/s][A
Fit & Importances: 100%|██████████| 1/1 [00:03<00:00,  3.36s/it][A
Experiment: 100%|██████████| 1/1 [00:03<00:00,  3.36s/it]


############################################################
TESTING PARALLEL ExIFFI
############################################################
TEST PARAMETERS:
Number of runs: 1
Number of trees: 300
Number of cores: fit 12, importance 12, anomaly 12
Seed: 123
Parallel: True
############################################################
dataset_names ['wine']
############################################################
DATASET: wine
############################################################
Execution 1
Set up Extended_DIFFI_parallel
Finished setting up Extended_DIFFI_parallel
Call compute_imps
shape of X_train: (119, 13)
shape of X_test: (129, 13)
Start fit
End fit
Start Global Importance
Start computing Anomaly Score
End computing Anomaly Score
Start computing Importances Score
self.num_processes_importances: 12
segment_size: 25
Segments shapes: [(25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,)]
Stop computing Importances Score
End Global Importance

18.24user 2.70system 0:05.03elapsed 416%CPU (0avgtext+0avgdata 369980maxresident)k
0inputs+16outputs (2major+321682minor)pagefaults 0swaps


In [11]:
output

"############################################################\nTESTING PARALLEL ExIFFI\n############################################################\nTEST PARAMETERS:\nNumber of runs: 1\nNumber of trees: 300\nNumber of cores: fit 12, importance 12, anomaly 12\nSeed: 123\nParallel: True\n############################################################\ndataset_names ['wine']\n############################################################\nDATASET: wine\n############################################################\nExecution 1\nSet up Extended_DIFFI_parallel\nFinished setting up Extended_DIFFI_parallel\nCall compute_imps\nshape of X_train: (119, 13)\nshape of X_test: (129, 13)\nStart fit\nEnd fit\nStart Global Importance\nStart computing Anomaly Score\nEnd computing Anomaly Score\nStart computing Importances Score\nself.num_processes_importances: 12\nsegment_size: 25\nSegments shapes: [(25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,), (25,)]\nStop computing Importanc

### Test glob

In [2]:
import os 
from glob import glob
os.chdir('../capri_code/results/npz/new/')
os.getcwd()

'/home/davidefrizzo/Desktop/PHD/PHD COURSES/Parallel Computing HPC/HPC-Project-AD/ExIFFI/capri_code/results/npz/new'

In [5]:
search_word = "29-01-2024"
pattern = f"*{search_word}*"

matching_files = glob(pattern)
print(matching_files)

['29-01-2024_14-40-58_annthyroid_test_stat_parallel.npz', '29-01-2024_16-59-18_wine_test_stat_parallel.npz', '29-01-2024_14-12-35_wine_test_stat_parallel.npz', '29-01-2024_17-02-05_wine_test_stat_parallel.npz', '29-01-2024_12-51-07_cardio_test_stat_parallel.npz', '29-01-2024_19-17-38_test_stat_parallel_wine.npz', '29-01-2024_14-14-24_wine_test_stat_parallel.npz', '29-01-2024_16-59-47_wine_test_stat_parallel.npz', '29-01-2024_18-21-27_wine_test_stat_parallel.npz', '29-01-2024_18-22-40_wine_test_stat_parallel.npz', '29-01-2024_14-56-08_wine_test_stat_parallel.npz', '29-01-2024_18-10-56_wine_test_stat_parallel.npz', '29-01-2024_19-14-07_wine_test_stat_parallel.npz', '29-01-2024_14-13-43_wine_test_stat_parallel.npz', '29-01-2024_12-47-38_wine_test_stat_parallel.npz', '29-01-2024_19-15-44_test_stat_parallel_wine.npz', '29-01-2024_18-16-09_wine_test_stat_parallel.npz', '29-01-2024_14-07-57_wine_test_stat_parallel.npz', '29-01-2024_14-31-29_wine_test_stat_parallel.npz', '29-01-2024_16-49-37_w

In [8]:
stats=np.load('30-01-2024_10-07-13_test_stat_parallel_wine.npz',allow_pickle=True)

In [11]:
stats['execution_time_stat']

KeyError: 'execution_time_stat is not a file in the archive'