In [65]:
import os
import json
import argparse
import numpy as np
import pandas as pd
import os.path as op
from core import in_CI, rsquared
from scipy.stats import spearmanr, pearsonr

In [66]:
def Cas9_results(path):
    with open(path, "r") as ih:
        data = json.load(ih)
    CI = in_CI(data["y"], data["y_hat"], data["y_hat_std"]).mean()
    pearson = pearsonr(data["y"], data["y_hat"])
    spearman = spearmanr(data["y"], data["y_hat"])
    rsq = rsquared(data["y"], data["y_hat"])
    results = pd.DataFrame(CI).T
    results["PCC"] = [pearson[0]]
    results["PCC-pval"] = [pearson[1]]
    results["SCC"] = [spearman[0]]
    results["SCC-pval"] = [spearman[1]]
    results["rsquared"] = [rsq]
    results.index = [path]
    return(results)

In [67]:
def DeepCpf1_results(path):
    with open(path, "r") as ih:
        data = json.load(ih)
    r = []
    for a in ["H1", "H2", "H3"]:
        CI = in_CI(
            data["y_"+a], data["y_hat_"+a], data["y_hat_std_"+a]
        ).mean()
        pearson = pearsonr(data["y_"+a], data["y_hat_"+a])
        spearman = spearmanr(data["y_"+a], data["y_hat_"+a])
        rsq = rsquared(data["y_"+a], data["y_hat_"+a])
        results = pd.DataFrame(CI).T
        results["PCC"] = [pearson[0]]
        results["PCC-pval"] = [pearson[1]]
        results["SCC"] = [spearman[0]]
        results["SCC-pval"] = [spearman[1]]
        results["rsquared"] = [rsq]
        results.index = [path+":"+a]
        r.append(results)
    r = pd.concat(r)
    return(r)

In [68]:
model_folders = [op.join("../models/", b) for b in [a for a in os.walk("../models")][0][1]]
model_folders = list(filter(lambda x: "LC" not in x, model_folders))
deeper = sum(
    list(
        filter(
            lambda x: len(x) != 0, 
            [[(op.join(a,c), a) for c in [b for b in os.walk(a)][0][1]] for a in model_folders]
        )
    ), []
)
model_folders = list(set(model_folders) - set([a[1] for a in deeper]))+[a[0] for a in deeper]

In [69]:
model_files = sum([[op.join(a, c) for c in [b for b in os.walk(a)][0][2]] for a in model_folders], [])
model_files = list(filter(lambda x: ".json" in x, model_files))

In [70]:
Cas9 = pd.concat([Cas9_results(a) for a in list(filter(lambda x: "Cpf1" not in x, model_files))])



In [71]:
Cas12a = pd.concat([DeepCpf1_results(a) for a in list(filter(lambda x: "DeepCpf1" in x, model_files))])

In [72]:
Cas12a_pair = pd.concat([Cas9_results(a) for a in list(filter(lambda x: "offtargets" in x, model_files))])

In [73]:
Table1 = pd.concat([Cas9, Cas12a, Cas12a_pair])

In [74]:
def readable_name(x):
    x = x.replace("../", "").replace("/", " ").replace("-u", " MSE").replace(".json", " ELBO")
    x = x.replace("models ", "")
    x = x.replace("JostEtAl model", "Cas9 gRNA-target pair")
    x = x.replace("Cpf1_offtargets model", "Cas12a gRNA-target pair")
    return(x)

In [75]:
Table1.index = [readable_name(a) for a in Table1.index]

In [76]:
Table1 = Table1.sort_values("SCC", ascending=False)

In [77]:
Table1.index = list(
    map(
        lambda x: x.replace("CNN", "C").replace("RNN", "R").replace(
            "ELBO", "E"
        ).replace("MSE E", "E+M"), 
        Table1.index.values
    )
)

In [78]:
Table1.to_excel("ST3.xlsx")

In [79]:
Table1.to_csv("Table1.csv")