In [1]:
import pandas as pd

def read_pool_file(file_path):
    """
    Reads data from a tab-separated file into a DataFrame.
    
    Parameters:
        file_path (str): Path to the tab-separated file.
    
    Returns:
        DataFrame: DataFrame containing the data from the file.
    """
    try:
        return pd.read_csv(file_path, sep='\t')
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except Exception as e:
        print(f"An error occurred while reading the file: {str(e)}")
        return None

def process_pool_data(df):
    """
    Processes the DataFrame to generate PlateInfo.

    Parameters:
        df (DataFrame): Input DataFrame containing the data.

    Returns:
        dict: Dictionary containing DataFrames for each unique combination of Pool, Organism, and Method.
    """
    if df is None:
        return None

    dfs = {}
    for pool_organism_method_value in df[['Pool', 'Organism', 'Method']].drop_duplicates().itertuples(index=False):
        pool_value, organism_value, method_value = pool_organism_method_value
        temp_df = df[(df['Pool'] == pool_value) & (df['Organism'] == organism_value) & (df['Method'] == method_value)]
        new_df = pd.DataFrame({
            'Pool': pool_value,  # Include Pool value in the new DataFrame
            'Organism': organism_value,  # Include Organism value in the new DataFrame
            'Method': method_value,  # Include Method value in the new DataFrame
            'plate_id': temp_df['Sample name'].str.strip(),
            'multiplex_group': 1,
            'primer_name': temp_df['Unnamed: 12']
        })
        key = f"{pool_value}_{organism_value}_{method_value}"
        dfs[key] = new_df
    return dfs

def generate_plateinfo_output(dfs, n_random_index, input_plate_size, email, mapping_file):
    """
    Generates PlateInfo output and saves it to files.

    Parameters:
        dfs (dict): Dictionary containing DataFrames for each unique combination of Pool and Organism.
        n_random_index (int): Random index value.
        input_plate_size (int): Plate size.
        email (str): Email address.
        mapping_file (str): Path to the mapping file containing configuration mappings.
    """
    if not dfs:
        print("Error: No dataframes found.")
        return

    for key, new_df in dfs.items():
        pool_value = new_df['Pool'].iloc[0]  # Get the pool_value from the DataFrame
        organism_value = new_df['Organism'].iloc[0]  # Get the organism value from the DataFrame
        method_value = new_df['Method'].iloc[0]  # Get the method value from the DataFrame
        
        # Retrieve the configuration file path
        config_path = retrieve_config(organism_value, method_value, mapping_file)
        if config_path is None:
            print(f"Error: Configuration not found for organism '{organism_value}' and method '{method_value}'.")
            continue

        print(f"The mapping config path is {config_path}")
        
        # Create the file name
        plateinfo_file_name = f"Plateinfo-{key}.txt"
        
        # Write to file
        with open(plateinfo_file_name, 'w') as plateinfo_file:
            plateinfo_file.write("[CriticalInfo]\n")
            plateinfo_file.write(f"n_random_index={n_random_index}\n")
            plateinfo_file.write(f"input_plate_size={input_plate_size}\n")
            plateinfo_file.write(f"pool_id={pool_value}\n")  # Write pool_value for pool_id
            plateinfo_file.write(f"tube_label={pool_value}\n")  # Write pool_value for tube_label
            plateinfo_file.write(f"email={email}\n")
            plateinfo_file.write("\n[LibraryInfo]\n")
            plateinfo_file.write("\n[PlateInfo]\n")
            # Exclude Pool and Organism columns from the file
            new_df.drop(columns=['Pool', 'Organism']).to_csv(plateinfo_file, sep='\t', index=False)

            # Write the configuration file path
            #plateinfo_file.write("\n[Configuration]\n")
            #plateinfo_file.write(f"config_path={config_path}\n")

def retrieve_config(organism, modality, filename):
    """
    Retrieve the configuration file path based on the provided organism and modality.

    Args:
        organism (str): The organism for which the configuration is being retrieved.
        modality (str): The modality for which the configuration is being retrieved.
        filename (str): The path to the tab-separated value (TSV) file containing the mappings.

    Returns:
        str or None: The configuration file path if found, otherwise None.

    Reads the TSV file specified by `filename` line by line. Each line is expected to have
    three tab-separated fields: Organism, Modality, and Config file path. It checks if the
    provided `organism` and `modality` match the values in the file (case-insensitive). If
    a match is found, it returns the corresponding configuration file path. If no match is
    found, it returns None.

    Example:
        If the `mapping_configs.tsv` file contains the following line:
        ```
        mouse    snm3C    /path/to/mouse_snm3C_config.ini
        ```
        Calling `retrieve_config("mouse", "snm3C", "mapping_configs.tsv")` would return
        `"/path/to/mouse_snm3C_config.ini"`.
    """
    with open(filename, 'r') as file:
        for line in file:
            fields = line.strip().split('\t')
            if len(fields) == 3:
                org, mod, config = fields
                if org.lower() == organism.lower() and mod.lower() == modality.lower():
                    return config
    return None

def main():
    # Read data
    file_path = 'SALK041.txt'
    mapping_file = 'mapping_configs.tsv'
    df = read_pool_file(file_path)

    if df is None:
        return
    
    # Process data to generate PlateInfo
    n_random_index = 384
    input_plate_size = 384
    email = ""
    dfs = process_pool_data(df)
    
    # Generate PlateInfo output
    generate_plateinfo_output(dfs, n_random_index, input_plate_size, email, mapping_file)

if __name__ == "__main__":
    main()


The mapping config path is /gale/netapp/home/seq/mapping_config/snm3C-seq-V2.hg38.mapping_config.ini
