#This script is designed to process data from a TSV file, extract specific values, and organize them into a structured JSON file. It uses the pandas library for data handling and the json library for saving the final output. The main goal is to create a dictionary where each key represents a unique combination of categories (e.g., ContA:ContB), and the corresponding value is a list of all data points from that specific column in the original TSV file.

In [None]:
import pandas as pd
import json

def load_data(file_path):
    """Loads a TSV file into a pandas DataFrame."""
    return pd.read_csv(file_path, sep='\t')

def create_labeled_data_dictionary(df):
    """
    Creates a dictionary with labels as keys and corresponding column values as lists.
    
    This function iterates through the DataFrame's column names, assuming they follow
    a 'Prefix1-Prefix2...' format. It creates a label like 'Prefix1:Prefix2' and
    assigns the entire list of values from that column to this new label in a dictionary.
    """
    labeled_data = {}
    for col in df.columns:
        # Splits the column name by the hyphen '-'
        parts = col.split('-')
        
        # Ensures there are at least two parts to form a valid label pair
        if len(parts) >= 2:
            label = f"{parts[0]}:{parts[1]}"
            
            # Extracts all values from the column and converts them into a list
            values = df[col].tolist()
            
            # Stores the list of values with the generated label as the key
            labeled_data[label] = values
            
    return labeled_data

In [None]:
if __name__ == "__main__":
    file_path = 'yashoda_datasets_code/gc_network_wide_with_depression_filtered.tsv'
    
    try:
        # Load the data from the specified TSV file path
        data_df = load_data(file_path)
        
        # Call the function to create the dictionary with labels and associated values
        labeled_data = create_labeled_data_dictionary(data_df)
        
        # Define the output file path for the JSON file
        output_file_path = 'labeled_data.json'
        
        # Save the resulting dictionary to a JSON file with pretty-printing (indent=4)
        with open(output_file_path, 'w') as f:
            json.dump(labeled_data, f, indent=4)
        
        # Print a success message to the console
        print(f"Labeled data has been saved to '{output_file_path}'")
        
    except FileNotFoundError:
        print(f"Error: The file was not found at the specified path: {file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")