In [7]:
import pandas as pd
import os

template_file = "input/OASIS_JUMP-Target2_ECP_source.csv"
plaid_file = "output/OASIS_JUMP-Target2_ECP_PLAID.csv"

df = pd.read_csv(plaid_file)
template_df = pd.read_csv(template_file)

unique_plates = df['plateID'].unique()
batches = [unique_plates[i::4] for i in range(4)]

for batch_index, batch_plates in enumerate(batches, start=1):
    batch_df = df[df['plateID'].isin(batch_plates)].copy()
    batch_file = f'output/OASIS_DILI_ECP_PLAID_Batch{batch_index}.csv'
    batch_df.to_csv(batch_file, index=False)
    print(f"Batch {batch_index} saved to {batch_file}")

    # Update compound names without dose information
    batch_df['cmpdname'] = batch_df['cmpdname'].str.replace('comp', 'Cpd', regex=False)
    batch_df['CONCuM'] = 1
    batch_df['cmpdnum'] = batch_df['cmpdname']

    batch_df['cmpdname'] = batch_df['cmpdname'].str.replace(r'ctrl(\d+)\)?', r'JUMP\1', regex=True)
    batch_df['cmpdnum'] = batch_df['cmpdnum'].str.replace(r'ctrl(\d+)\)?', r'JUMP\1', regex=True)

    dmso_mask = batch_df['cmpdnum'] == 'DMSO'
    if dmso_mask.sum() > 1:
        counter = batch_df[dmso_mask].groupby('cmpdnum').cumcount() + 1
        batch_df.loc[dmso_mask, 'cmpdnum'] += '_rep' + counter.astype(str)

    jump_mask = batch_df['cmpdnum'].str.startswith('JUMP')
    if jump_mask.sum() > 1:
        counter = batch_df[jump_mask].groupby('cmpdnum').cumcount() + 1
        batch_df.loc[jump_mask, 'cmpdnum'] += '_rep' + counter.astype(str)

    dmso_mask = template_df['Compound'] == 'DMSO'
    if dmso_mask.sum() > 1:
        counter = template_df[dmso_mask].groupby('Compound').cumcount() + 1
        template_df.loc[dmso_mask, 'Compound'] += '_rep' + counter.astype(str)

    template_compounds = set(template_df['Compound'])
    unmatched = template_compounds - set(batch_df['cmpdnum'])
    if unmatched:
        print(f"Batch {batch_index}: Unmatched compounds: {unmatched}")
    else:
        print(f"Batch {batch_index}: All compounds are matched.")

    for plate in batch_df['plateID'].unique():
        plate_data = batch_df[batch_df['plateID'] == plate].copy()

        if plate_data['cmpdnum'].duplicated().any():
            print(f"Warning: Duplicate cmpdnum values found in plate {plate}. Resolving...")
            plate_data['cmpdnum'] += '_dup' + plate_data.groupby('cmpdnum').cumcount().astype(str)

        merged = template_df.copy()
        merged['Dest Well'] = merged['Compound'].map(dict(zip(plate_data['cmpdnum'], plate_data['well'])))
        merged['Compound'] = merged['Compound'].str.replace(r'^(DMSO)_rep\d+$', r'\1', regex=True)

        output_file = f'output/OASIS_JUMP-Target2_ECP_source_Batch{batch_index}.csv'
        merged.to_csv(output_file, index=False)
        print(f"Saved {output_file}")


Batch 1 saved to output/OASIS_DILI_ECP_PLAID_Batch1.csv
Batch 1: All compounds are matched.
Saved output/OASIS_JUMP-Target2_ECP_source_Batch1.csv
Batch 2 saved to output/OASIS_DILI_ECP_PLAID_Batch2.csv
Batch 2: All compounds are matched.
Saved output/OASIS_JUMP-Target2_ECP_source_Batch2.csv
Batch 3 saved to output/OASIS_DILI_ECP_PLAID_Batch3.csv
Batch 3: All compounds are matched.
Saved output/OASIS_JUMP-Target2_ECP_source_Batch3.csv
Batch 4 saved to output/OASIS_DILI_ECP_PLAID_Batch4.csv
Batch 4: All compounds are matched.
Saved output/OASIS_JUMP-Target2_ECP_source_Batch4.csv
