In [22]:
import csv 

high_resistance_levels = ['RI', 'HRI', 'RI/HRI', 'NI/RI/HRI']

In [23]:
def parse_mutations(mutation_string):
    start = mutation_string[0]
    end = mutation_string[-1]
    pos = mutation_string[1:-1]
    return {"start": start, "pos": pos, "end": end}

def parsed_to_string(parsed_mutation):
    return parsed_mutation["start"] + parsed_mutation["pos"] + parsed_mutation["end"]

def initialize_drug_columns():
    oseltamivir = []
    zanamivir = []
    peramivir = []
    laninamivir = []

    drug_columns = {
        'Oseltamivir': oseltamivir,
        'Zanamivir': zanamivir,
        'Peramivir': peramivir,
        'Laninamivir': laninamivir,
    }
    return drug_columns


In [24]:

def fill_columns(input_file, sub_dict):
    drug_columns = initialize_drug_columns()
    with open(input_file, newline='') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader: 
            mutations = row['Mutation']
            if mutations.startswith("Del "):
                mutations = mutations[4:]
                for mutation in mutations.split("-"):
                    for drug, mutation_list in drug_columns.items():
                        level = row[drug].split(' ')[0]
                        if level in high_resistance_levels:
                            parsed = parse_mutations(mutation)
                            if parsed["pos"] in sub_dict:
                                print("mutation in same position")
                                if sub_dict[parsed["pos"]]["end"] == parsed["start"]:
                                    parsed["start"] = sub_dict[parsed["pos"]]["start"]
                                    mutation_list.append(parsed_to_string(parsed))
                                    continue
                            mutation_list.append(mutation)
                continue

            for mutation in mutations.split('+'):
                for drug, mutation_list in drug_columns.items():
                    level = row[drug].split(' ')[0]
                    if level in high_resistance_levels:
                        parsed = parse_mutations(mutation)
                        if parsed["pos"] in sub_dict:
                            print("mutation in same position")
                            if sub_dict[parsed["pos"]]["end"] == parsed["start"]:
                                parsed["start"] = sub_dict[parsed["pos"]]["start"]
                                mutation_list.append(parsed_to_string(parsed))
                                continue
                        mutation_list.append(mutation)
    return drug_columns


In [25]:
H3N2v_sub_list= "NA:N93D NA:D147N NA:H150R NA:V194I NA:I215V NA:L285P NA:Y310H NA:L370S NA:S372L NA:N387K".replace("NA:", "").split(" ")
H3N2v_sub_dict = {parse_mutations(mutation)["pos"]: parse_mutations(mutation) for mutation in H3N2v_sub_list}
drug_columns_H3N2v = fill_columns('H3N2v.csv', H3N2v_sub_dict)

H3N2_sub_list = "NA:I20V,NA:N43D,NA:P46A,NA:N47S,NA:L52P,NA:T56I,NA:T69N,NA:E74D,NA:I77K,NA:L81V,NA:A82V,NA:N93Q,NA:P126H,NA:D127G,NA:N141D,NA:G143K,NA:V149I,NA:T153I,NA:Y155H,NA:K172R,NA:V194I,NA:N208D,NA:K220Q,NA:K221N,NA:K249R,NA:K253R,NA:T265I,NA:T267P,NA:V290I,NA:I302V,NA:V307M,NA:K308E,NA:V313D,NA:K328N,NA:N329D,NA:S331R,NA:S334N,NA:H336N,NA:L338R,NA:D339N,NA:E344R,NA:G346N,NA:H347Q,NA:D356N,NA:N358D,NA:E368K,NA:K369D,NA:E381G,NA:K385T,NA:L390S,NA:R400S,NA:G401D,NA:E435R,NA:L437W,NA:D463N,NA:L466F".replace("NA:", "").split(",")
H3N2_sub_dict = {parse_mutations(mutation)["pos"]: parse_mutations(mutation) for mutation in H3N2v_sub_list}
drug_columns_H3N2 = fill_columns('H3N2.csv', H3N2_sub_dict)

drug_columns_H3N2v.update(drug_columns_H3N2)

for drug, mutation_list in drug_columns_H3N2v.items():
    dict_ = {
        'name': f'{drug} resistance mutations',
        'description': f'The following mutations are associated with reduced inhibition by {drug}.',
        'symbol': '*',
        'nucleotideMutations': [],
        'aminoAcidMutations': mutation_list,
    }
    with open(f'H3N2_{drug}.json', 'w') as jsonfile:
        jsonfile.write(str(dict_))

In [26]:
H1N1pdm_drug_columns = fill_columns('H1N1pdm.csv', {})
for drug, mutation_list in H1N1pdm_drug_columns.items():
    dict_ = {
        'name': f'{drug} resistance mutations',
        'description': f'The following mutations are associated with reduced inhibition by {drug}.',
        'symbol': '*',
        'nucleotideMutations': [],
        'aminoAcidMutations': mutation_list,
    }
    with open(f'H1N1pdm_{drug}.json', 'w') as jsonfile:
        jsonfile.write(str(dict_))