In [11]:
import csv
import pandas as pd
import re

# Define paths to input and output files
input_file_predrug = "correlation_files/predrug_sub-Linus_epi_CHARM_in_NMT_v2.1_sym_05mm_003.txt"
output_file_predrug = "correlation_files/csv_tables/correlation_matrix_long_format_predrug_003.csv"
input_file_postdrug = "correlation_files/postdrug_sub-Linus_epi_CHARM_in_NMT_v2.1_sym_05mm_003.txt"
output_file_postdrug = "correlation_files/csv_tables/correlation_matrix_long_format_postdrug_003.csv"

# Function to extract number of ROIs
def extract_number_from_first_line(input_file):
    with open(input_file, 'r') as file:
        first_line = file.readline()
        match = re.search(r'\d+', first_line)
        if match:
            return int(match.group())  # Extract number as an integer
        else:
            raise ValueError("No number was found in the first line.")

# Function to process a file and save the correlation matrix in long format
def process_correlation_file(input_file, output_file, header_line, start_line):
    number = extract_number_from_first_line(input_file)
    print(f"This number of ROIs was extracted from the file: {number}")
    end_line = start_line + number - 1  # Calculate last line of the matrix

    with open(input_file, 'r') as file:
        lines = file.readlines()

        # Extract header and matrix lines
        header = lines[header_line - 1].strip().split()
        matrix_lines = lines[start_line - 1:end_line]
        matrix_data = [line.strip().split() for line in matrix_lines]

        # Convert the matrix to long format
        long_format_data = []
        for i, row in enumerate(matrix_data):
            for j, value in enumerate(row):
                long_format_data.append({
                    'Variable1': header[i],
                    'Variable2': header[j],
                    'Correlation': float(value)
                })

        # Convert the data into a Pandas DataFrame and save as CSV
        long_format_df = pd.DataFrame(long_format_data)
        long_format_df.to_csv(output_file, index=False)
        print(f"The correlation matrix in long format has been saved to {output_file}.")

# Process predrug file
process_correlation_file(input_file_predrug, output_file_predrug, header_line=4, start_line=7)

# Process postdrug file
process_correlation_file(input_file_postdrug, output_file_postdrug, header_line=4, start_line=7)


This number of ROIs was extracted from the file: 108
The correlation matrix in long format has been saved to correlation_files/csv_tables/correlation_matrix_long_format_predrug_003.csv.
This number of ROIs was extracted from the file: 108
The correlation matrix in long format has been saved to correlation_files/csv_tables/correlation_matrix_long_format_postdrug_003.csv.
