In [None]:
import os
import pandas as pd

def calculate_consensus_grn(adj_directory, results_foldername, dataset_id, edge_threshold=4, seeds = ['111', '222', '333', '444', '555']):
    """
    Calculate a consensus gene regulatory network (GRN) across multiple runs.
    
    Parameters:
        adj_directory (str): Directory containing adjacency files.
        seeds (list): List of seeds corresponding to different runs.
        results_foldername (str): Directory to save the consensus GRN and other results.
        dataset_id (str): Identifier for the dataset to filter files.
        edge_threshold (int): Minimum number of appearances for an edge to be included in the consensus.
    """
    files_in_directory = os.listdir(adj_directory)
    files_in_directory = [f for f in files_in_directory for seed in seeds if seed in f]

    if len(files_in_directory) == len(seeds):
        print(f'Inferring consensus GRN for {adj_directory}')

    # Get path for each file
    file_paths = [os.path.join(adj_directory, f) for f in files_in_directory if os.path.isfile(os.path.join(adj_directory, f))]

    # Initialize empty dataframe
    consensus_df = pd.DataFrame(columns=['TF', 'target', 'importance'])

    # Iterate through each file
    for f in file_paths:
        df = pd.read_csv(f, sep='\t')
        df['count'] = 1
        consensus_df = pd.concat([consensus_df, df]).groupby(['TF', 'target'], as_index=False).sum()

    # Filter out edges below threshold
    consensus_df = consensus_df[consensus_df['count'] >= edge_threshold]

    # Average importance
    consensus_df['importance'] = consensus_df['importance'] / consensus_df['count']
    consensus_df.drop('count', axis=1, inplace=True)

    # Save consensus network
    consensus_adj_path = os.path.join(results_foldername, 'consensus_adj')
    if not os.path.exists(consensus_adj_path):
        os.makedirs(consensus_adj_path)
    c_adj_fname = os.path.join(consensus_adj_path, '{}_consensus_adj.tsv'.format(dataset_id))
    consensus_df.to_csv(c_adj_fname, sep='\t', index=False)
    print(f"Consensus GRN saved to {c_adj_fname}")
    return consensus_df

# Example usage:
ADJ_DIRECTORY = '/sc/arion/projects/CommonMind/collin/PsychAD/no_var_pilot/DISEASE_AD_CONTROL_HVG/Control/adj'
RESULTS_FOLDERNAME = '/sc/arion/projects/CommonMind/collin/PsychAD/no_var_pilot/DISEASE_AD_CONTROL_HVG/Control'
DATASET_ID = 'DISEASE_AD_CONTROL_HVG_Control'
C_GRN = calculate_consensus_grn(ADJ_DIRECTORY, RESULTS_FOLDERNAME, DATASET_ID)
C_GRN
