In [1]:
import pandas as pd
import os
import subprocess

pd.set_option('display.width', None)  # Adjust display width
pd.set_option('display.max_colwidth', None)  # Adjust column width

def read_csv_file(file_path):
    """
    Read a CSV file from the specified file path and return the DataFrame.
    """
    return pd.read_csv(file_path)

def merge_library_pool_components_dfs(df1, df2):
    """
    Merge two DataFrames from library pool components and return the merged result 
    with updated 'library_pool_local_name' and the concatenated name.
    """
    merged_df = pd.concat([df1, df2], ignore_index=True)
    unique_names_1 = df1['library_pool_local_name'].unique()
    unique_names_2 = df2['library_pool_local_name'].unique()
    concatenated_name = '_'.join(sorted([unique_names_1[0], unique_names_2[0]]))
    merged_df['library_pool_local_name'] = concatenated_name
    return merged_df, concatenated_name

def write_csv_file(data, output_file_path):
    """
    Write DataFrame to a specified CSV file path.
    """
    os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
    data.to_csv(output_file_path, index=False)
    print(f"Data written to {output_file_path}")  # Controlled print statement

def check_and_convert_date_format(data, column_name, original_format, target_format):
    """
    Convert date format of a column within a DataFrame.
    """
    data[column_name] = pd.to_datetime(data[column_name], format=original_format, errors='coerce').dt.strftime(target_format)

def combine_pools(input_directory1, input_directory2, new_barcode):
    """
    Combine library pool data from two directories, updating and saving the results to a new directory.
    """
    component_path1 = os.path.join(input_directory1, 'library_pool_component.csv')
    component_path2 = os.path.join(input_directory2, 'library_pool_component.csv')
    
    df1 = read_csv_file(component_path1)
    df2 = read_csv_file(component_path2)
    merged_df, concatenated_name = merge_library_pool_components_dfs(df1, df2)

    output_directory = os.path.join(os.path.dirname(input_directory1), concatenated_name)
    output_component_path = os.path.join(output_directory, 'library_pool_component.csv')
    
    write_csv_file(merged_df, output_component_path)
    # The print_file_contents function calls are removed to prevent direct output

    # Similar steps for the main library pool CSV processing should be included here if necessary

    return output_directory

def process_library_pool(input_directory, output_directory, concatenated_name, new_barcode):
    """
    Process and update the library pool CSV.
    """
    library_pool_path = os.path.join(input_directory, 'library_pool.csv')
    data = read_csv_file(library_pool_path)
    
    check_and_convert_date_format(data, 'library_pool_preparation_date', '%y/%m/%d', '%m/%d/%Y')
    data['library_pool_local_name'] = concatenated_name
    data['library_pool_local_tube_id'] = concatenated_name
    data['library_pool_tube_barcode'] = new_barcode
    
    return data

def concatenate_other_tables(input_directory1, input_directory2, output_directory):
    """
    Concatenates 'barcoded_cell_sample_component.csv', 'barcoded_cell_sample.csv', and 'library.csv'
    from two directories into separate combined DataFrames and saves them to the given output directory.
    Then prints the contents of these files.
    """
    # Define the paths for all three file types in both directories
    component_path1 = os.path.join(input_directory1, 'barcoded_cell_sample_component.csv')
    component_path2 = os.path.join(input_directory2, 'barcoded_cell_sample_component.csv')
    sample_path1 = os.path.join(input_directory1, 'barcoded_cell_sample.csv')
    sample_path2 = os.path.join(input_directory2, 'barcoded_cell_sample.csv')
    library_path1 = os.path.join(input_directory1, 'library.csv')
    library_path2 = os.path.join(input_directory2, 'library.csv')
    
    # Read and concatenate 'barcoded_cell_sample_component.csv'
    df_component1 = pd.read_csv(component_path1)
    df_component2 = pd.read_csv(component_path2)
    concatenated_components = pd.concat([df_component1, df_component2], ignore_index=True)
    
    # Read and concatenate 'barcoded_cell_sample.csv'
    df_sample1 = pd.read_csv(sample_path1)
    df_sample2 = pd.read_csv(sample_path2)
    concatenated_samples = pd.concat([df_sample1, df_sample2], ignore_index=True)

    # Read and concatenate 'library.csv'
    df_library1 = pd.read_csv(library_path1)
    df_library2 = pd.read_csv(library_path2)
    concatenated_libraries = pd.concat([df_library1, df_library2], ignore_index=True)
    
    # Save concatenated DataFrames to new CSV files
    output_component_path = os.path.join(output_directory, 'barcoded_cell_sample_component.csv')
    output_sample_path = os.path.join(output_directory, 'barcoded_cell_sample.csv')
    output_library_path = os.path.join(output_directory, 'library.csv')
    
    concatenated_components.to_csv(output_component_path, index=False)
    concatenated_samples.to_csv(output_sample_path, index=False)
    concatenated_libraries.to_csv(output_library_path, index=False)
    
    # Optionally return the DataFrames
    return concatenated_components, concatenated_samples, concatenated_libraries

input_path1 = '/gale/netapp/seq11/illumina_runs/seq_lib_portal/SALK062'
input_path2 = '/gale/netapp/seq11/illumina_runs/seq_lib_portal/SALK063'
barcode = 'SM-NQUJ9'

output_directory_path = combine_pools(input_path1, input_path2, barcode)
concatenate_other_tables(input_path1, input_path2, output_directory_path)


Data written to /gale/netapp/seq11/illumina_runs/seq_lib_portal/SALK062_SALK063/library_pool_component.csv


(      barcoded_cell_sample_local_name barcoded_cell_sample_source_type  \
 0      UWA7648_CX2324_SubTH_1_P1-1-I3             enriched_cell_sample   
 1      UWA7648_CX2324_SubTH_1_P2-1-E5             enriched_cell_sample   
 2      UWA7648_CX2324_SubTH_1_P3-1-O5             enriched_cell_sample   
 3      UWA7648_CX2324_SubTH_1_P4-1-C7             enriched_cell_sample   
 4      UWA7648_CX2324_SubTH_1_P5-1-K9             enriched_cell_sample   
 5      UWA7648_CX2324_SubTH_1_P6-1-M9             enriched_cell_sample   
 6     UWA7648_CX2324_SubTH_1_P7-1-A11             enriched_cell_sample   
 7     UWA7648_CX2324_SubTH_1_P8-1-I15             enriched_cell_sample   
 8     UWA7648_CX2324_SubTH_1_P9-1-M14             enriched_cell_sample   
 9    UWA7648_CX2324_SubTH_1_P10-1-K18             enriched_cell_sample   
 10   UWA7648_CX2324_SubTH_1_P11-1-K20             enriched_cell_sample   
 11       UWA7648_BS02_MGM3_1_P1-1-E22             enriched_cell_sample   
 12       UWA7648_BS02_MG