# Seperating tsv into left and right sided patients, ignoring bilateral patients

# Left = ipsi, Right = cont #

In [1]:
import pandas as pd

# Read the input TSV file
df = pd.read_csv('../resources/merged_tabular_withconn.tsv',sep='\t')

# Filter the DataFrame
filtered_df = df[(df['lat_final_left'] == "1") & (df['lat_final_bilateral'] == 0)]

# Write the output TSV file
filtered_df.to_csv('../resources/tabular/tabular_withconn_left.tsv', sep='\t', index=False)

In [2]:
# Read the input TSV file
df = pd.read_csv('../resources/merged_tabular_withconn.tsv',sep='\t')

# Filter the DataFrame
filtered_df = df[(df['lat_final_right'] == "1") & (df['lat_final_bilateral'] == 0)]

# Write the output TSV file
filtered_df.to_csv('../resources/tabular/tabular_withconn_right.tsv', sep='\t', index=False)

In [4]:
df = pd.read_csv('../resources/tabular/tabular_withconn_left.tsv',sep='\t')

# Rename columns
df.columns = df.columns.str.replace('_L_', '_ipsi_', regex=False).str.replace('_R_', '_contra_', regex=False)

# Write the output TSV file
df.to_csv('../resources/tabular/tabular_withconn_left_renamed.tsv', sep='\t', index=False)

In [5]:
df = pd.read_csv('../resources/tabular/tabular_withconn_right.tsv',sep='\t')

# Rename columns
df.columns = df.columns.str.replace('_R_', '_ipsi_', regex=False).str.replace('_L_', '_contra_', regex=False)

# Write the output TSV file
df.to_csv('../resources/tabular/tabular_withconn_right_renamed.tsv', sep='\t', index=False)

In [7]:
# making a new tsv with just hcp so I can rename the columns

# Read the input TSV file
df = pd.read_csv('../resources/merged_tabular_withconn.tsv',sep='\t')

# Filter the DataFrame
filtered_df = df[df['dataset'] == 'HCP']

# Write the output TSV file
filtered_df.to_csv('../resources/tabular/tabular_withconn_hcp.tsv', sep='\t', index=False)

In [8]:
# rename HCP so that left = ipsi and right = contra

df = pd.read_csv('../resources/tabular/tabular_withconn_hcp.tsv',sep='\t')

# Rename columns
df.columns = df.columns.str.replace('_L_', '_ipsi_', regex=False).str.replace('_R_', '_contra_', regex=False)

# Write the output TSV file
df.to_csv('../resources/tabular/tabular_withconn_hcp_renamed.tsv', sep='\t', index=False)

In [19]:
# averaging left and right parcels to average across HCP

df = pd.read_csv('../resources/tabular/tabular_withconn_hcp_renamed.tsv',sep='\t')

# Loop over all columns to find pairs of 'ipsi' and 'contra' columns
for col in df.columns:
    # Split the column name into parts
    parts = col.split('_')
    
    # Ensure the column name has the expected format of 'prefix_side_suffix'
    if len(parts) == 3:
        prefix = parts[0]  # e.g., 'RSFC'
        side = parts[1]    # 'ipsi' or 'contra'
        suffix = parts[2]  # e.g., 'A8m'
        
        # Create a base name (e.g., 'RSFC_A8m')
        base_name = f"{prefix}_{suffix}"
        
        # Check if the opposite side column exists
        opposite_side = 'ipsi' if side == 'contra' else 'contra'
        opposite_col = f"{prefix}_{opposite_side}_{suffix}"
        
        # If both 'ipsi' and 'contra' columns exist, average the values and overwrite them
        if opposite_col in df.columns:
            # Average the values of both columns
            df[col] = (df[col] + df[opposite_col]) / 2
            df[opposite_col] = df[col]  # Also set the opposite side column to the same value

# Now, df contains the averaged values in place of the original columns
df.head()  # Display the first few rows of the updated DataFrame


# Optionally, save the updated DataFrame back to a TSV file
df.to_csv('../resources/tabular/tabular_withconn_hcp_averaged.tsv',sep='\t', index=False)


In [20]:
# put together hcp, left and right renamed

# Read the input TSV files
df1 = pd.read_csv('../resources/tabular/tabular_withconn_hcp_averaged.tsv', sep='\t')
df2 = pd.read_csv('../resources/tabular/tabular_withconn_left_renamed.tsv', sep='\t')
df3 = pd.read_csv('../resources/tabular/tabular_withconn_right_renamed.tsv', sep='\t')

# Concatenate the DataFrames
combined_df = pd.concat([df1, df2, df3], ignore_index=True)


In [21]:
combined_df

Unnamed: 0,participant_id,dataset,age,sex,age_seizure_onset,age_ep_diagnosis,ep_duration_first_scan,seizure_duration_first_scan,pnes,benign_rolandic,...,NFC_SomMot,NFC_Limbic,NFC_Vis,NSC_SalVentAttn,NSC_Cont,NSC_Default,NSC_DorsAttn,NSC_SomMot,NSC_Limbic,NSC_Vis
0,sub-HCD0001305,HCP,11.916667,M,,,,,,,...,0.228100,0.217443,0.110125,14.295435,15.703174,13.890784,17.820553,12.525241,7.464836,7.686001
1,sub-HCD0021614,HCP,9.166667,F,,,,,,,...,0.189836,0.027044,0.071886,14.956169,14.597272,13.982220,17.997952,11.131801,8.344783,8.136690
2,sub-HCD0026119,HCP,15.166667,F,,,,,,,...,0.193837,0.108192,0.117147,15.377986,16.290547,14.635713,19.319002,11.602176,8.445383,8.885017
3,sub-HCD0041822,HCP,17.416667,M,,,,,,,...,0.041392,0.024048,0.044534,15.107883,16.766070,14.348261,19.078719,11.899607,9.120181,7.977700
4,sub-HCD0042420,HCP,18.333333,M,,,,,,,...,0.109603,0.100180,0.181082,15.181964,16.899608,13.760974,20.733716,11.585073,7.933532,8.330949
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,sub-020,LOBE,15.000000,F,8.0,10.0,5.0,7.0,0.0,0.0,...,0.104976,0.077505,0.026792,14.755174,14.477456,12.691517,17.992928,11.456241,7.214316,7.499911
354,sub-025,LOBE,18.000000,F,8.0,17.0,1.0,10.0,0.0,0.0,...,0.121006,0.189346,0.226567,14.417348,16.963010,13.409346,19.820208,12.434146,9.593172,8.365793
355,sub-029,LOBE,16.000000,M,15.0,15.0,1.0,1.0,0.0,0.0,...,0.483952,0.425712,0.369441,14.052392,14.163463,11.906738,16.939946,11.221658,8.524584,7.956035
356,sub-035,LOBE,7.000000,M,2.0,4.0,3.0,5.0,0.0,0.0,...,0.025123,0.038279,0.034718,15.559057,15.762304,14.148990,16.890526,11.495121,7.699943,7.522167


In [22]:
# putting lobe into numerical order

# Read the merged TSV file (or the DataFrame you have)
df = combined_df

# Step 1: Filter the DataFrame for rows with dataset = 'LOBE'
lobe_df = df[df['dataset'] == 'LOBE']

# Step 2: Sort the filtered DataFrame by participant_id
lobe_df_sorted = lobe_df.sort_values(by='participant_id')

# Step 3: Combine the sorted DataFrame with the rest of the data
# (keep rows that do not have dataset = 'LOBE')
other_df = df[df['dataset'] != 'LOBE']
final_df = pd.concat([lobe_df_sorted, other_df], ignore_index=True)

# Write the final output TSV file
final_df.to_csv('../resources/merged_tabular_withconn_flipped.tsv', sep='\t', index=False)

In [6]:
pwd

'/home/ROBARTS/mtaylor/graham/projects/ctb-akhanf-ab/cfmm-bids/Khan/LOBE/analysis/LOBE_analysis/notebooks'

## Brainnetome ##

In [112]:
import pandas as pd

# Step 1: Load the brainnetome labels file (File 1)
file_path = '../resources/brainnetome/brainnetome_labels_nonet.txt'
df1 = pd.read_csv(file_path, header=None, names=['Label', 'Number', 'R', 'G', 'B', 'Alpha'], sep=" ", comment="#")

# Step 2: Load the network mapping file (File 2)
network_mapping = pd.read_csv('../resources/brainnetome/subregion_func_network_Yeo_updated.csv')

# Step 3: Inspect the columns of the network_mapping dataframe
print(network_mapping.columns)  # Check the column names to ensure correct usage

# Step 4: Clean the 'Number' column in df1 (handle NaN or invalid values)
df1 = df1.dropna(subset=['Number'])  # Drop rows where 'Number' is NaN

# Ensure 'Number' is converted to integers
df1['Number'] = df1['Number'].astype(int)

# Step 5: Merge the dataframes on 'Number' to get the 'Yeo_7network' values
# We need to confirm the exact column name for network in network_mapping
# Let's use 'Yeo_7network' if that's the column that contains the network values
df1 = pd.merge(df1, network_mapping[['Yeo_7network']], how='left', left_on='Number', right_on='Yeo_7network')

# Step 6: Modify the labels
# Now, append the Yeo_7network number to the label for every row starting with "L" or "R"
for index, row in df1.iterrows():
    if row['Label'].startswith(('L', 'R')):  # Check if the label starts with 'L' or 'R'
        # Append the Yeo_7network number to the label
        new_label = f"{row['Label']}_{row['Yeo_7network']}"
        df1.at[index, 'Label'] = new_label

# Step 7: Save the updated DataFrame with new labels back to a new file (or overwrite the original)
df1.to_csv('../resources/brainnetome/updated_brainnetome_labels.txt', index=False, header=False, sep=" ")

# Optional: Check the first few rows to verify
print(df1.head())


Index(['Label', 'subregion_name', 'region', 'Yeo_7network', 'Yeo_17network',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8', 'Unnamed: 9',
       'Unnamed: 10', 'Unnamed: 11'],
      dtype='object')
  Label  Number      R    G      B  Alpha  Yeo_7network
0     1       0  255.0  0.0  255.0    NaN           0.0
1     1       0  255.0  0.0  255.0    NaN           0.0
2     1       0  255.0  0.0  255.0    NaN           0.0
3     1       0  255.0  0.0  255.0    NaN           0.0
4     1       0  255.0  0.0  255.0    NaN           0.0


In [98]:
import pandas as pd

# Step 1: Load the CSV file containing the data
file_path = '../resources/brainnetome/subregion_func_network_Yeo_updated.csv'
network_mapping = pd.read_csv(file_path)

# Step 2: Inspect the columns of the network_mapping dataframe
print("Columns in network_mapping:", network_mapping.columns)

# Step 3: Create the new names based on the format L_A8m_6 or R_A8m_6
# We'll assume the columns are named 'region', 'subregion_name', and 'Yeo_17network'
# Modify this if the column names are different.

# Step 4: Combine the columns into the desired format
network_mapping['new_name'] = network_mapping['region'] + '_' + network_mapping['subregion_name'] + '_' + network_mapping['Yeo_7network'].astype(str)

# Step 5: Save the new names to a .txt file
output_file_path = '../resources/brainnetome/updated_brainnetome_labels.txt'

# Writing the 'new_name' column to a file, each name on a new line
network_mapping['new_name'].to_csv(output_file_path, index=False, header=False)

print(f"New labels have been written to {output_file_path}")


Columns in network_mapping: Index(['Label', 'subregion_name', 'region', 'Yeo_7network'], dtype='object')
New labels have been written to ../resources/brainnetome/updated_brainnetome_labels.txt


In [99]:
import pandas as pd

# Step 1: Load the CSV file containing the data
file_path = '../resources/brainnetome/subregion_func_network_Yeo_updated.csv'
network_mapping = pd.read_csv(file_path)

# Step 2: Combine the columns into the desired format (L_A8m_6, for example)
network_mapping['new_name'] = network_mapping['region'] + '_' + network_mapping['subregion_name'] + '_' + network_mapping['Yeo_7network'].astype(str)

# Step 3: Modify the name according to your specifications
# The names are in the format 'SFG_L_7_1_A8m_6', and we want to get 'L_A8m_6'

def modify_name(name):
    # Split the name by underscores
    parts = name.split('_')
    
    # We're interested in the region (second part), subregion name (last part), and network number (second-to-last part)
    return f"{parts[1]}_{parts[-2]}_{parts[-1]}"  # Take the region (L/R), subregion name, and network number

# Apply the modify_name function to each new_name
network_mapping['modified_name'] = network_mapping['new_name'].apply(modify_name)

# Step 4: Save the modified names to a new .txt file
output_file_path = '../resources/brainnetome/modified_brainnetome_labels.txt'

# Writing the modified names to the file, each name on a new line
network_mapping['modified_name'].to_csv(output_file_path, index=False, header=False)

print(f"Modified labels have been written to {output_file_path}")


Modified labels have been written to ../resources/brainnetome/modified_brainnetome_labels.txt


In [110]:
# File paths
labels_file_path = '../resources/brainnetome/brainnetome_labels.txt'
modified_labels_file_path = '../resources/brainnetome/modified_brainnetome_labels.txt'
output_file_path = '../resources/brainnetome/brainnetome_labels_net.txt'

# Step 1: Read the modified names from the modified_brainnetome_labels.txt
with open(modified_labels_file_path, 'r') as f:
    modified_names = f.read().splitlines()

# Step 2: Create a dictionary to map the base names (e.g., L_A8m) to modified names (e.g., L_A8m_6)
modified_names_dict = {}
for modified_name in modified_names:
    parts = modified_name.split('_')
    if len(parts) == 3:  # Only process names that have a '_number' format
        base_name = '_'.join(parts[:2])  # Extract base name (e.g., L_A8m)
        modified_names_dict[base_name] = modified_name  # Map base name to the full modified name (e.g., L_A8m_6)

# Debug: Print out the dictionary to confirm it's being created correctly
print("Modified Names Dictionary:", modified_names_dict)

# Step 3: Read the original labels file and modify the names
updated_lines = []
name_line = True  # Flag to indicate whether we are processing a name line or a data line

with open(labels_file_path, 'r') as f:
    original_lines = f.readlines()

# Step 4: Loop through the original lines, replace the names, and build the new lines
for i in range(0, len(original_lines), 2):  # We step by 2, assuming the name is followed by data on the next line
    name_line = original_lines[i].strip()  # Get the name (e.g., L_A8m)
    data_line = original_lines[i+1].strip()  # Get the associated data line (e.g., 1 0 255 0 255)

    # Debug: Print out each line being processed
    print("\nProcessing name line:", repr(name_line))
    print("Processing data line:", repr(data_line))

    # Check if it's a name line (i.e., doesn't contain numeric data)
    parts = name_line.split()  # Split the name line by whitespace

    if len(parts) == 1:  # This means the line is just a name (e.g., L_A8m)
        original_name = parts[0]  # Extract the base name (e.g., L_A8m)
        
        # Check if the original name exists in the dictionary
        if original_name in modified_names_dict:
            new_name = modified_names_dict[original_name]  # e.g., L_A8m_6
            print(f"Replacing {original_name} with {new_name}")
            updated_lines.append(new_name + '\n')  # Append the updated name line, followed by a newline
            updated_lines.append(data_line + '\n')  # Append the original data line with a newline
        else:
            print(f"No replacement found for {original_name}")
            updated_lines.append(name_line + '\n')  # If no modification, append the original name line
            updated_lines.append(data_line + '\n')  # Append the data line without modification
    else:
        # This should not happen in this loop, but we handle it for safety
        updated_lines.append(name_line + '\n')  # If no modification, append the original name line
        updated_lines.append(data_line + '\n')  # Append the data line without modification

# Step 5: Write the updated content to the new file (modified_brainnetome_labels_net.txt)
with open(output_file_path, 'w') as f:
    f.writelines(updated_lines)

print(f"\nUpdated labels have been written to {output_file_path}")


Modified Names Dictionary: {'L_A8m': 'L_A8m_6', 'R_A8m': 'R_A8m_4', 'L_A8dl': 'L_A8dl_7', 'R_A8dl': 'R_A8dl_6', 'L_A9l': 'L_A9l_7', 'R_A9l': 'R_A9l_7', 'L_A6dl': 'L_A6dl_3', 'R_A6dl': 'R_A6dl_3', 'L_A6m': 'L_A6m_2', 'R_A6m': 'R_A6m_2', 'L_A9m': 'L_A9m_7', 'R_A9m': 'R_A9m_6', 'L_A10m': 'L_A10m_7', 'R_A10m': 'R_A10m_7', 'L_A9/46d': 'L_A9/46d_4', 'R_A9/46d': 'R_A9/46d_6', 'L_IFJ': 'L_IFJ_6', 'R_IFJ': 'R_IFJ_6', 'L_A46': 'L_A46_6', 'R_A46': 'R_A46_6', 'L_A9/46v': 'L_A9/46v_6', 'R_A9/46v': 'R_A9/46v_6', 'L_A8vl': 'L_A8vl_7', 'R_A8vl': 'R_A8vl_6', 'L_A6vl': 'L_A6vl_3', 'R_A6vl': 'R_A6vl_3', 'L_A10l': 'L_A10l_5', 'R_A10l': 'R_A10l_6', 'L_A44d': 'L_A44d_6', 'R_A44d': 'R_A44d_3', 'L_IFS': 'L_IFS_6', 'R_IFS': 'R_IFS_6', 'L_A45c': 'L_A45c_7', 'R_A45c': 'R_A45c_7', 'L_A45r': 'L_A45r_7', 'R_A45r': 'R_A45r_6', 'L_A44op': 'L_A44op_4', 'R_A44op': 'R_A44op_4', 'L_A44v': 'L_A44v_4', 'R_A44v': 'R_A44v_4', 'L_A14m': 'L_A14m_7', 'R_A14m': 'R_A14m_7', 'L_A12/47o': 'L_A12/47o_7', 'R_A12/47o': 'R_A12/47o_7', 

# Updating names from numbered net to name of net #

In [114]:
# Define the number to network mapping
network_mapping = {
    1: 'Vis',
    2: 'SomMot',
    3: 'DorsAttn',
    4: 'SalVentAttn',
    5: 'Limbic',
    6: 'Cont',
    7: 'Default'
}

# Input and output file paths
input_file_path = '../resources/brainnetome/brainnetome_labels_numbernet.txt'
output_file_path = '../resources/brainnetome/brainnetome_labels.txt'

# Step 1: Read the content from the input file
with open(input_file_path, 'r') as f:
    lines = f.readlines()

# Step 2: Process each line
updated_lines = []
for i in range(0, len(lines), 2):  # Iterate by 2 (name, then data)
    name_line = lines[i].strip()  # Name (e.g., L_A8m_6)
    data_line = lines[i + 1].strip()  # Associated data (e.g., 1 0 255 0 255)
    
    # Step 3: Extract the number from the name
    parts = name_line.split('_')
    if len(parts) == 3:  # Expected format is <region>_<subregion>_<number>
        number = int(parts[2])  # The last part is the number (e.g., 6 from L_A8m_6)
        
        # Step 4: Replace the number with the corresponding network name
        if number in network_mapping:
            network_name = network_mapping[number]
            new_name = f"{parts[0]}_{parts[1]}_{network_name}"  # Create the new name
            updated_lines.append(new_name + '\n')  # Add the modified name
            updated_lines.append(data_line + '\n')  # Add the original data line
        else:
            # If the number doesn't exist in the mapping, keep the original name
            updated_lines.append(name_line + '\n')
            updated_lines.append(data_line + '\n')
    else:
        # In case the line format is unexpected, just add the original name and data
        updated_lines.append(name_line + '\n')
        updated_lines.append(data_line + '\n')

# Step 5: Write the updated lines to the output file
with open(output_file_path, 'w') as f:
    f.writelines(updated_lines)

print(f"Updated labels have been written to {output_file_path}")


Updated labels have been written to ../resources/brainnetome/brainnetome_labels.txt


## Adding network names to labels txt ##

In [120]:
# Open the file and read lines
file_path = "../resources/brainnetome/brainnetome_labels.txt"

with open(file_path, "r") as f:
    lines = f.readlines()

# Initialize an empty list to store the names
name_list = []

# Loop through odd lines (indexing starts from 0, so odd lines are at indices 0, 2, 4, etc.)
for i in range(0, len(lines), 2):  # step 2 to select odd lines (0-indexed)
    name = lines[i].split()[0]  # The name is the first part of the line
    name_after_underscore = name.split('_')[2]  # Extract the part after the underscore
    name_list.append(name_after_underscore)

# Define the output file path
output_file_path = "../resources/brainnetome/extracted_net_names.txt"

# Open the output file and write the names to it
with open(output_file_path, "w") as out_file:
    for name in name_list:
        out_file.write(name + "\n")  # Write each name on a new line

print(f"Names have been saved to {output_file_path}")


Names have been saved to ../resources/brainnetome/extracted_net_names.txt
