In [1]:
import pandas as pd
import zipfile
import json
import os

In [2]:
root = "."
pathogen_code = "mtuberculosis"

# Get assay data
ASSAYS = pd.read_csv(os.path.join(root, "..", "output", pathogen_code, "assays.csv"), low_memory=False)[:450]

# Set path to parameters
PATH_TO_PARAMETERS = os.path.join(root, "..", "output", pathogen_code, "parameters")

# For each assay
PARAMETERS = []
for assay_id, assay_type, act_type, unit in zip(ASSAYS["assay_id"], ASSAYS["assay_type"], ASSAYS["activity_type"], ASSAYS["unit"]):

    # Change unit format
    if type(unit) != str:
        unit = 'nan'
    else:
        unit = unit.replace('/', 'FwdS').replace(" ", "__")

    # Load JSON file
    filepath = os.path.join(PATH_TO_PARAMETERS, "_".join([assay_id, act_type, unit]) + "_parameters.json")
    with open(filepath, "r") as f:
        js = json.load(f)

    # Collect values
    p = [
        js.get("assay_id", assay_id),
        js.get("assay_type", assay_type),
        js.get("activity_type", act_type),
        js.get("unit", unit),
        js.get("organism", ""),
        js.get("strain", ""),
        ", ".join(js.get("mutations", [])),
        ", ".join(js.get("known_drug_resistances", [])),
        js.get("media", ""),
        ]
    PARAMETERS.append(p)

# to pd DataFrame
PARAMETERS = pd.DataFrame(PARAMETERS, columns=["Assay ID", "Assay type", "Activity Type", "Unit", "Organism", "Strain", "Mutations", "Known drug resistances", "Media"])

# Save to TSV
PARAMETERS.to_csv(os.path.join(root, "..", "output", pathogen_code, "assays_parameters.tsv"), sep='\t', index=False)

In [5]:
PARAMETERS[PARAMETERS['Mutations'] != '']

Unnamed: 0,Assay ID,Assay type,Activity Type,Unit,Organism,Strain,Mutations,Known drug resistances,Media
80,CHEMBL3387309,F,INHIBITION,%,Mycobacterium tuberculosis,H37Rv,Rv3161c,,
86,CHEMBL4425486,F,MIC90,umol.L-1,Mycobacterium tuberculosis,mc2 6220,panCD/lysA deletion,,
122,CHEMBL4425484,F,MIC90,umol.L-1,Mycobacterium tuberculosis,mc2 6220,"panCD deletion, lysA deletion",,
184,CHEMBL4425485,F,MIC90,umol.L-1,Mycobacterium tuberculosis,mc2 6220,panCD/lysA deletion mutant,,
191,CHEMBL5623473,F,MIC99,umol.L-1,Mycobacterium tuberculosis,CNCTC My 331/88 (H37Rv),dprE1 (Rv3790),,Sulas semisynthetic medium
196,CHEMBL3387308,F,MIC99,,Mycobacterium tuberculosis,H37Rv,Rv3161c,,
232,CHEMBL4308568,F,MIC,umol.L-1,Mycobacterium tuberculosis,H37Rv,mmpL3,,
254,CHEMBL4189875,B,KI,umol.L-1,Mycobacterium tuberculosis,,IMPDH2deltaCBS,,
264,CHEMBL4395372,F,MIC90,umol.L-1,Mycobacterium tuberculosis,H37Rv SRMV2.6,GuaB2 Tyr487Cys,,
287,CHEMBL4059067,B,MIC=<90,umol.L-1,Mycobacterium tuberculosis H37Rv,,single nucleotide polymorphism in pks13,,
