# This script is designed to process data from a TSV file and extract a unique list of labels from its column names. It uses the pandas library to read the data and standard Python to handle the list of labels. The primary function of the script is to create an array-like list where each label is formatted as CategoryA:CategoryB, and then save this list to a text file.



In [None]:
import pandas as pd

def load_data(file_path):
    """Loads a TSV file into a pandas DataFrame."""
    return pd.read_csv(file_path, sep='\t')

def create_label_array(df):
    """
    Creates an array of labels based on the format 'CategoryA:CategoryB' 
    from the DataFrame's column names.
    """
    labels = []
    # Assuming the column names follow the format 'Prefix1-Prefix2...'
    for col in df.columns:
        # Splits the column name using the hyphen '-'
        parts = col.split('-')
        
        # Labels seem to be the combination of the first and second parts
        if len(parts) >= 2:
            label = f"{parts[0]}:{parts[1]}"
            labels.append(label)
        
    # To remove duplicates while preserving order, we use a dictionary
    unique_labels = list(dict.fromkeys(labels))
    return unique_labels

In [None]:
if __name__ == "__main__":
    file_path = 'yashoda_datasets_code/gc_network_wide_with_depression_filtered.tsv'
    
    try:
        # Load the data
        data_df = load_data(file_path)
        
        # Create the array of labels
        label_array = create_label_array(data_df)
        
        # Print the resulting array to the console
        print("Array of labels created:")
        print(label_array)
        
        # For example, you can get the number of unique labels
        print(f"\nTotal number of unique labels: {len(label_array)}")
        
        # Save the array to a new text file named 'output_labels.txt'
        with open('output_labels.txt', 'w') as f:
            for item in label_array:
                f.write(f"{item}\n")
        
        print("\nArray of labels saved to output_labels.txt")

    except FileNotFoundError:
        print(f"Error: File not found at the path: {file_path}")
    except Exception as e:
        print(f"An error occurred: {e}")