In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
from numba import njit, prange

In [2]:
cwt_path = '/Users/alfioleanza/progetto_tesi/dataset-eeg/miltiadous_deriv_uV_d1.0s_o0.0s/cwt'
labels_path = '/Users/alfioleanza/progetto_tesi/dataset-eeg/inference_20250327_171717/true_pred.csv'
output_file = '/Users/alfioleanza/progetto_tesi/dataset-eeg/miltiadous_deriv_uV_d1.0s_o0.0s/selected_sub_processed_test.csv'

In [3]:
df_labels = pd.read_csv(labels_path)
df_labels_test = df_labels[df_labels['dataset'] == 'test']
df_labels_test.shape

(14654, 9)

In [4]:
df_labels_test['original_rec'].unique()

array(['sub-064', 'sub-060', 'sub-061', 'sub-062', 'sub-084', 'sub-086',
       'sub-087', 'sub-029', 'sub-030', 'sub-031', 'sub-032', 'sub-034',
       'sub-035', 'sub-036', 'sub-063', 'sub-065', 'sub-085', 'sub-088',
       'sub-033'], dtype=object)

In [5]:
df_labels_test

Unnamed: 0,crop_file,activation_values,dataset,softmax_values,pred_label,true_label,original_rec,crop_start_sample,crop_end_sample
40474,00000000.npy,[ 2.4675148 -1.8711491 -2.2181666],test,[0.97820514 0.01276945 0.00902536],0,0,sub-064,0,499.0
40475,00000001.npy,[ 2.5092738 -2.3497164 -2.0147054],test,[0.98173577 0.00761661 0.01064769],0,0,sub-064,500,999.0
40476,00000002.npy,[ 1.9644415 -2.305623 -1.555663 ],test,[0.9582425 0.01339708 0.02836047],0,0,sub-064,1000,1499.0
40477,00000003.npy,[ 2.028386 -2.100873 -1.6919187],test,[0.9612414 0.01547099 0.02328759],0,0,sub-064,1500,1999.0
40478,00000004.npy,[ 2.0630686 -2.256164 -1.8919498],test,[0.9685526 0.01289152 0.01855583],0,0,sub-064,2000,2499.0
...,...,...,...,...,...,...,...,...,...
55123,00069789.npy,[-1.175102 -0.75652933 -0.2110595 ],test,[0.19447434 0.29556033 0.50996536],2,2,sub-033,350000,350499.0
55124,00069790.npy,[-0.19823408 -0.73456573 -0.7339342 ],test,[0.4607978 0.26951596 0.26968622],0,2,sub-033,350500,350999.0
55125,00069791.npy,[-1.221509 -0.7617841 -0.43042636],test,[0.20879349 0.33065337 0.4605532 ],2,2,sub-033,351000,351499.0
55126,00069792.npy,[-0.6501466 0.16503572 -0.51529855],test,[0.22706886 0.51308197 0.25984916],1,2,sub-033,351500,351999.0


In [None]:
#train 
#df_selected = df_labels_test[df_labels_test['original_rec'].isin(['sub-064', 'sub-060', 'sub-061', 'sub-062', 'sub-084', 'sub-086', 'sub-087', 'sub-029'])]
#test 
#df_selected = df_labels_test[df_labels_test['original_rec'].isin(['sub-087', 'sub-029'])]

In [7]:
df_selected.shape

(5865, 9)

In [8]:
labels_dict = dict(zip(df_selected['crop_file'], (df_selected['true_label'] == df_selected['pred_label']).astype(int)))

In [9]:
selected_files = set(df_selected['crop_file'])

# Cwt's csv linearized

In [9]:
cwt_matrices = []
file_labels = []

for file_name in tqdm(os.listdir(cwt_path)):
    if file_name.endswith(".npy") and file_name in labels_dict and file_name in selected_files:
        file_path = os.path.join(cwt_path, file_name)
        
        cwt_matrix = np.load(file_path).astype(np.float32)
        cwt_matrices.append(cwt_matrix)
        file_labels.append(labels_dict[file_name])

100%|██████████| 69794/69794 [00:06<00:00, 10764.56it/s]


In [10]:
@njit(parallel=True)
def linearizza_cwt(cwt_list):
    n = len(cwt_list)
    flattened_size = cwt_list[0].size 
    output_array = np.empty((n, flattened_size), dtype=np.float32)
    
    for i in prange(n):
        output_array[i, :] = cwt_list[i].ravel()
    
    return output_array
if cwt_matrices:
    cwt_flattened = linearizza_cwt(cwt_matrices)

    df_filtered = pd.DataFrame(cwt_flattened, dtype=np.float32)
    df_filtered['label'] = file_labels
    df_filtered.columns = df_filtered.columns.astype(str)

In [11]:
df_filtered

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,379991,379992,379993,379994,379995,379996,379997,379998,379999,label
0,1.161970,0.993835,0.792561,0.284450,0.819059,1.454924,1.171187,2.067214,1.373706,1.629615,...,15.772595,17.406263,16.316538,18.099115,14.292380,14.569317,14.545494,14.039417,13.977609,0
1,0.899270,0.926762,0.781408,0.850744,0.623189,0.536037,0.246469,0.225489,0.189672,0.475282,...,17.378490,14.812168,18.014044,16.883804,20.118614,20.112221,16.963453,15.775242,19.000305,0
2,0.483543,1.650610,0.750537,2.658968,0.195012,1.056220,0.806774,1.663154,1.093502,2.022532,...,4.896389,7.948851,7.271011,7.849089,9.264806,10.244619,6.415658,6.290147,9.360826,0
3,0.737548,0.723768,0.547696,0.531218,0.350944,0.680453,0.051028,0.484314,0.134288,0.294838,...,2.360500,0.534906,2.813836,0.372017,3.450114,3.050830,1.720482,1.351689,3.060199,0
4,0.605189,0.275645,0.218986,1.279843,0.189251,0.877868,0.257313,0.677829,0.066004,0.009324,...,5.081779,6.712727,7.658372,5.898130,8.324481,5.489568,2.788492,2.747128,4.359486,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333,2.245009,0.435461,3.501943,0.337071,2.277444,0.606849,1.483163,1.055564,1.871007,0.982200,...,10.181327,8.754773,5.731411,12.939702,3.343282,8.401784,5.580685,5.154557,3.679551,1
1334,0.723465,1.256215,0.558411,1.147775,0.188893,1.135221,0.350391,0.557577,0.498972,0.473000,...,5.103721,4.101523,3.940006,3.857345,3.527399,6.515562,6.347604,5.622764,4.564192,0
1335,2.492759,0.051256,0.432780,1.791795,0.740719,0.879437,0.428781,1.123468,0.051083,0.192416,...,4.281641,4.785915,6.112915,6.877346,5.746759,7.356068,6.262271,6.104834,7.131477,1
1336,1.057061,0.862756,0.808433,0.712948,0.398827,0.844386,0.723499,0.386358,0.675497,0.171817,...,2.133239,3.689166,2.183885,2.422354,3.937940,3.413527,3.373698,4.835664,3.572187,0


# Cwt's folder linearized

In [None]:
import numpy as np
import os
from tqdm import tqdm

cwt_path = "/Users/alfioleanza/progetto_tesi/dataset-eeg/miltiadous_deriv_uV_d1.0s_o0.0s/cwt"

output_folder = "/Users/alfioleanza/progetto_tesi/dataset-eeg/test_svm"
os.makedirs(output_folder, exist_ok=True)

selected_files = set(df_selected["crop_file"].tolist())

print(f"Numero di file selezionati: {len(selected_files)}")


Numero di file selezionati: 5865


In [None]:
for file_name in tqdm(os.listdir(cwt_path)):
    if file_name.endswith(".npy"):
        if file_name in selected_files:
            file_path = os.path.join(cwt_path, file_name)
            
            cwt_matrix = np.load(file_path)
            cwt_linearized = cwt_matrix.flatten()
            
            save_path = os.path.join(output_folder, file_name)
            np.save(save_path, cwt_linearized)

print(f"Numero di file linearizzati: {len(os.listdir(output_folder))}")

100%|██████████| 69794/69794 [00:39<00:00, 1774.89it/s]

Numero di file linearizzati: 5865



