# This script is designed to process data from a TSV file and categorize its column names. It utilizes the pandas library for data handling and the json library to save the output in a structured format. The primary function of the script is to create a dictionary where each main category (e.g., ContA) acts as a key, and its value is a list of all related label pairs (e.g., ['ContA:ContB', 'ContA:ContC']).

In [None]:
import pandas as pd
import json

def load_data(file_path):
    """Loads a TSV file into a pandas DataFrame."""
    return pd.read_csv(file_path, sep='\t')

def group_labels_by_category(df):
    """
    Groups labels of the format 'CategoryA:CategoryB' into a dictionary.
    
    The dictionary keys will be the main categories (e.g., 'ContA'), 
    and the values will be a list of related label pairs.
    """
    labels_dict = {}
    for col in df.columns:
        # Assuming the column names are in the format 'Prefix1-Prefix2...'
        parts = col.split('-')
        
        # We need at least two parts to form a pair
        if len(parts) >= 2:
            main_category = parts[0]
            label_pair = f"{parts[0]}:{parts[1]}"
            
            # Add the label pair to the list for the main category
            if main_category not in labels_dict:
                labels_dict[main_category] = []
            labels_dict[main_category].append(label_pair)
    
    return labels_dict

In [None]:
if __name__ == "__main__":
    file_path = 'yashoda_datasets_code/gc_network_wide_with_depression_filtered.tsv'
    
    try:
        # Load the data from the specified TSV file path
        data_df = load_data(file_path)
        
        # Call the function to group the labels into a dictionary
        grouped_labels = group_labels_by_category(data_df)
        
        # Print the resulting dictionary to the console
        print("Grouped labels by category:")
        print(json.dumps(grouped_labels, indent=4))
        
        # Define the output file path for the JSON file
        output_file_path = 'grouped_labels.json'
        
        # Save the dictionary to a JSON file for future use
        with open(output_file_path, 'w') as f:
            json.dump(grouped_labels, f, indent=4)
        
        # Print a success message to the console
        print(f"\nLabels saved to '{output_file_path}'")
        
    except FileNotFoundError:
        print(f"Error: File not found at the specified path: {file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")