In [3]:
import pandas as pd
import numpy as np
import re
import os
import glob

def parse_indices(indices_str):
    if isinstance(indices_str, str) and indices_str.strip() != '':
        try:
            return list(map(int, indices_str.split(',')))
        except ValueError:
            return []
    else:
        return []

def extract_subject_id(file_path):
    match = re.search(r'sub-\d+', file_path)
    if match:
        return match.group(0)
    return None

csv_files = glob.glob('/home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical/derivatives/sub-*/dwi/association_matrix_sub-*_*seeds.csv', recursive=True)  # Change this to your CSV file name
excel_file = '/home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical/index_nan.xlsx' 

for csv_file in csv_files:
    subject_id = extract_subject_id(csv_file)

    npy_file = f'/home/gabridele/Desktop/irbio_folder/spreading_dynamics_clinical/derivatives/{subject_id}/func/{subject_id}_rs_correlation_matrix.npy'
    
    if not os.path.exists(npy_file):
        print(f"NPY file not found: {npy_file}. Skipping...")
        continue

    df = pd.read_excel(excel_file)

    # Specify the subject ID to test
    test_subject_id = subject_id  # Replace with the actual subject ID you want to test

    # Load the CSV file
    df_csv = pd.read_csv(csv_file, header=None)
    print(f"Original CSV shape: {df_csv.shape}")

    # Load the NPY file
    data_npy = np.load(npy_file)
    print(f"Original NPY shape: {data_npy.shape}")

    # Initialize sets for indices to remove
    indices_to_remove_csv = set()
    indices_to_remove_npy = set()

    # Filter the DataFrame for the specified subject
    subject_df = df[df.iloc[:, 0] == subject_id]

    # Process the rows for the specified subject
    for idx, row in subject_df.iterrows():
        # Get indices from the row for CSV and NPY
        indices_str_y = row[2]  # Indices for CSV (third column)
        indices_str_z = row[4]  # Indices for NPY (fifth column)

        # Parse indices from column 3 and 5 for CSV and NPY
        #indices_to_remove_csv.update(parse_indices(indices_str_z))
        rmv5 = parse_indices(indices_str_z)
        rmv3 = parse_indices(indices_str_y)
        rmv = set(rmv5) - set(rmv3)
        indices_to_remove_csv.update(rmv)
        indices_to_remove_npy.update(parse_indices(indices_str_y) + parse_indices(indices_str_z))
        print('indices_to_remove_csv:', indices_to_remove_csv)
        print('indices_to_remove_npy:', indices_to_remove_npy)

    # Update CSV file
    if indices_to_remove_csv:
        try:
            # Remove rows
            df_csv_modified_rows = df_csv.drop(index=indices_to_remove_csv, errors='ignore')
            
            # Remove columns
            indices_to_remove_csv_cols = [col for col in indices_to_remove_csv if col < len(df_csv.columns)]
            if indices_to_remove_csv_cols:
                df_csv_modified = df_csv_modified_rows.drop(columns=df_csv.columns[indices_to_remove_csv_cols], errors='ignore')
            else:
                df_csv_modified = df_csv_modified_rows
            
            df_csv_modified.to_csv(f'file_y_modified_{subject_id}.csv', index=False)
            print(f"Modified CSV shape: {df_csv_modified.shape}")
            print(f"Modified CSV file saved as file_y_modified_{subject_id}.csv")
        except Exception as e:
            print(f"Error updating CSV file: {e}")

    # Update NPY file
    if indices_to_remove_npy:
        try:
            indices_to_remove_npy = list(indices_to_remove_npy)  # Convert to list for np.delete
            
            if data_npy.ndim == 2:  # Check if the NPY file is 2D
                # Remove rows
                data_npy_modified_rows = np.delete(data_npy, indices_to_remove_npy, axis=0)
                
                # Remove columns
                data_npy_modified = np.delete(data_npy_modified_rows, indices_to_remove_npy, axis=1)
            else:
                # Handle non-2D case (e.g., 1D array)
                data_npy_modified = np.delete(data_npy, indices_to_remove_npy, axis=0)
                
            np.save(f'file_z_modified_{subject_id}.npy', data_npy_modified)
            print(f"Modified NPY shape: {data_npy_modified.shape}")

            # Save NPY data as CSV for manual inspection
            df_npy = pd.DataFrame(data_npy_modified)
            df_npy.to_csv(f'file_z_modified_{subject_id}.csv', index=False)
            print(f"Modified NPY data saved as CSV file_z_modified_{subject_id}.csv")
        except Exception as e:
            print(f"Error updating NPY file: {e}")
    if data_npy_modified.shape == df_csv_modified.shape:
        print(f"Processing complete for subject {subject_id}.")
    else:
        print('something\'s wrong')

Original CSV shape: (454, 454)
Original NPY shape: (454, 454)
indices_to_remove_csv: {378, 172, 173}
indices_to_remove_npy: {378, 172, 173}
Modified CSV shape: (451, 451)
Modified CSV file saved as file_y_modified_sub-60030.csv
Modified NPY shape: (451, 451)
Modified NPY data saved as CSV file_z_modified_sub-60030.csv
Processing complete for subject sub-60030.
Original CSV shape: (454, 454)
Original NPY shape: (454, 454)
indices_to_remove_csv: {378, 172, 173}
indices_to_remove_npy: {378, 172, 173}
Modified CSV shape: (451, 451)
Modified CSV file saved as file_y_modified_sub-60030.csv
Modified NPY shape: (451, 451)
Modified NPY data saved as CSV file_z_modified_sub-60030.csv
Processing complete for subject sub-60030.
Original CSV shape: (454, 454)
Original NPY shape: (454, 454)
indices_to_remove_csv: {378, 172, 173}
indices_to_remove_npy: {378, 172, 173}
Modified CSV shape: (451, 451)
Modified CSV file saved as file_y_modified_sub-60030.csv
Modified NPY shape: (451, 451)
Modified NPY da

KeyboardInterrupt: 

In [34]:
df_csv_modified.shape

(451, 453)

In [35]:
df_csv_modified

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,443,444,445,446,447,448,449,450,451,452
0,0.0000,0.7017,0.3577,0.4372,0.6649,0.5865,0.4349,0.4050,0.3578,0.3682,...,0.0481,0.0922,0.0662,0.0276,0.0367,0.0198,0.0349,0.0095,0.0163,0.0169
1,0.7017,0.0000,0.2984,0.3491,0.6867,0.6433,0.5098,0.3623,0.4003,0.3164,...,0.0374,0.1323,0.0982,0.0434,0.0584,0.0332,0.0502,0.0158,0.0279,0.0276
2,0.3577,0.2984,0.0000,0.6624,0.1829,0.1735,0.1258,0.7634,0.4802,0.7543,...,0.1136,0.0140,0.0090,0.0054,0.0070,0.0058,0.0225,0.0127,0.0078,0.0137
3,0.4372,0.3491,0.6624,0.0000,0.2541,0.2486,0.1746,0.6676,0.3707,0.6948,...,0.0656,0.0251,0.0181,0.0096,0.0118,0.0097,0.0241,0.0078,0.0092,0.0150
4,0.6649,0.6867,0.1829,0.2541,0.0000,0.8215,0.6104,0.2275,0.2414,0.2342,...,0.0207,0.1621,0.1187,0.0556,0.0717,0.0387,0.0492,0.0138,0.0298,0.0225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
448,0.0198,0.0332,0.0058,0.0097,0.0387,0.0667,0.1552,0.0091,0.0105,0.0090,...,0.0068,0.5741,0.4313,0.7640,0.7583,0.0000,0.5813,0.3290,0.7952,0.5498
449,0.0349,0.0502,0.0225,0.0241,0.0492,0.0710,0.1406,0.0262,0.0277,0.0237,...,0.0362,0.3996,0.4194,0.4071,0.6513,0.5813,0.0000,0.6268,0.7298,0.6374
450,0.0095,0.0158,0.0127,0.0078,0.0138,0.0225,0.0592,0.0132,0.0200,0.0106,...,0.1001,0.1999,0.2868,0.1974,0.4068,0.3290,0.6268,0.0000,0.4432,0.4575
451,0.0163,0.0279,0.0078,0.0092,0.0298,0.0485,0.1176,0.0095,0.0108,0.0086,...,0.0149,0.4473,0.4243,0.5685,0.7040,0.7952,0.7298,0.4432,0.0000,0.6916
