In [1]:
# Parameters
# Refer to injected parameters for any run.ipynb (similar cell below)
strain = "BY4741"
locus = "FKS1-HS1"

In [2]:
# Parameters
strain = "R1158"
locus = "FKS2-HS2"


In [3]:
hotspot = locus.split("-")[-1]

# Notebook to plot classified variants

## Import libraries

In [4]:
import pandas as pd
import numpy as np

## Specify paths

In [5]:
# Input
data_path = f'../classified/{"_".join([strain,locus])}/refined_classification.csv'
fungamr_path = "../../pre/fungamr/fungamrmut_df.csv"

# Output
fungamr_outpath = "../fungamr/"

## Import data

In [6]:
df = pd.read_csv(data_path)
df["Scer_mutation"] = df.apply(
    lambda row: row.wt_aa + str(row.aa_pos) + row.alt_aa, axis=1
)
fungamr = pd.read_csv(fungamr_path)

## Compare data with FungAMR

In [7]:
# import data from FungAMR for comparison (literature reports)
fungamr = pd.read_csv(fungamr_path).rename(columns={"drug": "compound"})
fungamr["compound"] = fungamr.compound.str.lower()
fungamr = fungamr[
    (fungamr.Hotspot == hotspot)
    & (fungamr.compound.isin(["anidulafungin", "caspofungin", "micafungin"]))
]  # filter for correct locus
fungamr = fungamr[
    (fungamr.best_res <= 4) | (fungamr.best_sens >= -4)
]  # exclude low confidence literature reports
fungamr["reported in Scer"] = fungamr.species.str.contains("Saccharomyces cerevisiae")

In [8]:
def bool_cross(x, y):
    if (x not in ["resistant", "sensitive"]) | (y not in ["resistant", "sensitive"]):
        return np.nan
    if x == y:
        if x == "resistant":
            return "resistant"
        else:
            return "sensitive"
    else:
        return "disagreement"

In [9]:
fungamr_merge = pd.merge(
    left=df,
    right=fungamr[
        [
            "Scer_mutation",
            "compound",
            "phenotype",
            "best_res",
            "best_sens",
            "reported in Scer",
        ]
    ],
    on=["Scer_mutation", "compound"],
    how="outer",
)
fungamr_merge["litterature_support"] = fungamr_merge.apply(
    lambda row: bool_cross(row.sensres, row.phenotype), axis=1
)
agreements = fungamr_merge[fungamr_merge.litterature_support.notnull()].pivot_table(
    index="Scer_mutation",
    columns="compound",
    values="litterature_support",
    aggfunc="first",
)
best_scores = fungamr_merge[fungamr_merge.litterature_support.notnull()].pivot_table(
    index="Scer_mutation",
    columns="compound",
    values=["best_res", "best_sens", "reported in Scer"],
    aggfunc="first",
)
best_res = best_scores[[x for x in best_scores.columns if x[0] == "best_res"]].min(
    axis=1
)
best_sens = best_scores[[x for x in best_scores.columns if x[0] == "best_sens"]].max(
    axis=1
)
Scer_rep = best_scores[
    [x for x in best_scores.columns if x[0] == "reported in Scer"]
].any(axis=1)
agreements["best_res_across"] = best_res
agreements["best_sens_across"] = best_sens
agreements["reported in Scer"] = Scer_rep
agreements

compound,anidulafungin,best_res_across,best_sens_across,reported in Scer
Scer_mutation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
R1376S,resistant,4.0,,False


In [10]:
agreements.to_csv(f"{fungamr_outpath}/agreements_{strain}_{locus}.csv")