# Notebook to reproduce all downstream analyses from gyōza-generated data

## Import libraries

In [1]:
import papermill as pm

## Train GMM

In [2]:
for strain, locus, f in [
    ("BY4741", "FKS1-HS1", "../../results/df/avg_scores.csv"),
    ("BY4741", "FKS1-HS2", "../../results/df/avg_scores.csv"),
    ("BY4741", "FKS1-HS3", "../../results/df/avg_scores_HS3.csv"),
    ("R1158", "FKS2-HS1", "../../results/df/avg_scores.csv"),
    ("R1158", "FKS2-HS2", "../../results/df/avg_scores.csv"),
]:
    print(strain, locus, f)
    o = pm.execute_notebook(
        "train_GMM.ipynb",
        f"../parameterized/GMM_training/{strain}_{locus}.ipynb",
        parameters={"strain": strain, "locus": locus, "aggdata_outpath": f},
    )

BY4741 FKS1-HS1 ../../results/df/avg_scores.csv


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

BY4741 FKS1-HS2 ../../results/df/avg_scores.csv


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

BY4741 FKS1-HS3 ../../results/df/avg_scores_HS3.csv


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

R1158 FKS2-HS1 ../../results/df/avg_scores.csv


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

R1158 FKS2-HS2 ../../results/df/avg_scores.csv


Executing:   0%|          | 0/43 [00:00<?, ?cell/s]

## Validate growth data

DMS data are compared to growth data from reconstructed mutants by performing a linear regression. An inferred selection coefficient is obtained for some FKS1-HS1 mutants missing from the DMS dataset, as well as for two mutants whose selection coefficient was underestimated by gyōza (in one condition). 

In [3]:
o = pm.execute_notebook(
    "20240129_validations_test3.ipynb",
    "../parameterized/validations/run.ipynb",
)

Executing:   0%|          | 0/34 [00:00<?, ?cell/s]

In [4]:
%run ../scripts/rescue_missing_mutants.py

## Classify variants

In [5]:
for strain, locus, f in [
    ("BY4741", "FKS1-HS1", "refined_classification_with_missing.csv"),
    ("BY4741", "FKS1-HS2", "refined_classification.csv"),
    ("BY4741", "FKS1-HS3", "refined_classification.csv"),
    ("R1158", "FKS2-HS1", "refined_classification.csv"),
    ("R1158", "FKS2-HS2", "refined_classification.csv"),
]:
    print(strain, locus, f)
    o = pm.execute_notebook(
        "classify_gyoza_data.ipynb",
        f"../parameterized/classification/{strain}_{locus}.ipynb",
        parameters={"strain": strain, "locus": locus, "data": f},
    )

BY4741 FKS1-HS1 refined_classification_with_missing.csv


Executing:   0%|          | 0/32 [00:00<?, ?cell/s]

BY4741 FKS1-HS2 refined_classification.csv


Executing:   0%|          | 0/32 [00:00<?, ?cell/s]

BY4741 FKS1-HS3 refined_classification.csv


Executing:   0%|          | 0/32 [00:00<?, ?cell/s]

R1158 FKS2-HS1 refined_classification.csv


Executing:   0%|          | 0/32 [00:00<?, ?cell/s]

R1158 FKS2-HS2 refined_classification.csv


Executing:   0%|          | 0/32 [00:00<?, ?cell/s]

## Plot heatmaps

In [2]:
for strain, locus, compound in [
    ("BY4741", "FKS1-HS1", "anidulafungin"),
    ("BY4741", "FKS1-HS1", "caspofungin"),
    ("BY4741", "FKS1-HS1", "micafungin"),
    ("BY4741", "FKS1-HS1", "none"),
    ("BY4741", "FKS1-HS2", "anidulafungin"),
    ("BY4741", "FKS1-HS2", "caspofungin"),
    ("BY4741", "FKS1-HS2", "micafungin"),
    ("BY4741", "FKS1-HS2", "none"),
    ("BY4741", "FKS1-HS3", "anidulafungin"),
    ("BY4741", "FKS1-HS3", "caspofungin"),
    ("BY4741", "FKS1-HS3", "micafungin"),
    ("BY4741", "FKS1-HS3", "none"),
    ("R1158", "FKS2-HS1", "anidulafungin"),
    ("R1158", "FKS2-HS1", "caspofungin"),
    ("R1158", "FKS2-HS1", "micafungin"),
    ("R1158", "FKS2-HS1", "none"),
    ("R1158", "FKS2-HS1", "dox"),
    ("R1158", "FKS2-HS2", "anidulafungin"),
    ("R1158", "FKS2-HS2", "caspofungin"),
    ("R1158", "FKS2-HS2", "micafungin"),
    ("R1158", "FKS2-HS2", "none"),
    ("R1158", "FKS2-HS2", "dox"),
]:
    print(strain, locus, compound)
    o = pm.execute_notebook(
        "heatmaps.ipynb",
        f"../parameterized/heatmaps/{strain}_{locus}_{compound}.ipynb",
        parameters={"strain": strain, "locus": locus, "compound": compound},
    )

BY4741 FKS1-HS1 anidulafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS1 caspofungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS1 micafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS1 none


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS2 anidulafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS2 caspofungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS2 micafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS2 none


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS3 anidulafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS3 caspofungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS3 micafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

BY4741 FKS1-HS3 none


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS1 anidulafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS1 caspofungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS1 micafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS1 none


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS1 dox


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS2 anidulafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS2 caspofungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS2 micafungin


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS2 none


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

R1158 FKS2-HS2 dox


Executing:   0%|          | 0/22 [00:00<?, ?cell/s]

## Generate defattr files for ChimeraX

The files generated are used to plot data on the protein structures (here, proportion of mutants classified as "resistant" per position)

In [7]:
for strain, paralog, chain, hotspots, compound in [
    ("BY4741", "FKS1", "/F", ["HS1", "HS2", "HS3"], "anidulafungin"),
    ("BY4741", "FKS1", "/F", ["HS1", "HS2", "HS3"], "caspofungin"),
    ("BY4741", "FKS1", "/F", ["HS1", "HS2", "HS3"], "micafungin"),
    ("BY4741", "FKS1", "/F", ["HS1", "HS2", "HS3"], "none"),
    ("R1158", "FKS2", "#2 /A", ["HS1", "HS2"], "anidulafungin"),
    ("R1158", "FKS2", "#2 /A", ["HS1", "HS2"], "caspofungin"),
    ("R1158", "FKS2", "#2 /A", ["HS1", "HS2"], "micafungin"),
    ("R1158", "FKS2", "#2 /A", ["HS1", "HS2"], "none"),
    ("R1158", "FKS2", "#2 /A", ["HS1", "HS2"], "dox"),
]:
    print(strain, paralog, compound)
    o = pm.execute_notebook(
        "defattr.ipynb",
        f"../parameterized/defattr/{strain}_{paralog}_{compound}.ipynb",
        parameters={
            "strain": strain,
            "paralog": paralog,
            "chain": chain,
            "hotspots": hotspots,
            "compound": compound,
        },
    )

BY4741 FKS1 anidulafungin


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

BY4741 FKS1 caspofungin


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

BY4741 FKS1 micafungin


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

BY4741 FKS1 none


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

R1158 FKS2 anidulafungin


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

R1158 FKS2 caspofungin


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

R1158 FKS2 micafungin


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

R1158 FKS2 none


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]

R1158 FKS2 dox


Executing:   0%|          | 0/15 [00:00<?, ?cell/s]