# Modeling ESKAPE pathogens inhibition

In [2]:
import pandas as pd
import os

DATAPATH = "../data/coadd"

df = pd.read_csv(os.path.join(DATAPATH, "CO-ADD_InhibitionData_r03_01-02-2020_CSV.csv"), low_memory=False)

In [2]:
import collections

data_conc = collections.defaultdict(list)

for r in df[["ORGANISM", "STRAIN", "CONC", "SMILES", "INHIB_AVE"]].values:
    data_conc[(r[0], r[1], r[2])] += [(r[3], r[4])]

In [3]:
selected_strains = set([
    ('Escherichia coli', 'lpxC; MB4902'),
    ('Escherichia coli', 'tolC; MB5747'),
    ('Staphylococcus aureus', 'ATCC 43300; MRSA'),
    ('Candida albicans', 'ATCC 90028'),
    ('Cryptococcus neoformans', 'ATCC 208821; H99'),
    ('Escherichia coli', 'ATCC 25922'),
    ('Klebsiella pneumoniae', 'ATCC 700603; MDR'),
    ('Acinetobacter baumannii', 'ATCC 19606'),
    ('Pseudomonas aeruginosa', 'ATCC 27853')
])

selected_concentration = '32 ug/mL'

In [10]:
data = {}
for k,v in data_conc.items():
    if k[-1] != selected_concentration:
        continue
    if (k[0], k[1]) not in selected_strains:
        continue
    data[(k[0], k[1])] = v

In [14]:
import os
import csv

names = {
    "ecoli_lpxc": ('Escherichia coli', 'lpxC; MB4902'),
    "ecoli_tolc": ('Escherichia coli', 'tolC; MB5747'),
    "saureus": ('Staphylococcus aureus', 'ATCC 43300; MRSA'),
    "calbicans": ('Candida albicans', 'ATCC 90028'),
    "cneoformans": ('Cryptococcus neoformans', 'ATCC 208821; H99'),
    "ecoli_atcc": ('Escherichia coli', 'ATCC 25922'),
    "kpneumoniae": ('Klebsiella pneumoniae', 'ATCC 700603; MDR'),
    "abaumannii": ('Acinetobacter baumannii', 'ATCC 19606'),
    "paeruginosa": ('Pseudomonas aeruginosa', 'ATCC 27853')
}

names_inv = dict((v,k) for k,v in names.items())


for k,v in data.items():
    fn = names_inv[k]
    with open(os.path.join(DATAPATH, fn+".csv"), "w") as f:
        writer = csv.writer(f)
        writer.writerow(["smiles", "inhibition"])
        for x in v:
            writer.writerow([x[0], float(x[1])])