In [1]:
import os
import json
import argparse
import numpy as np
import pandas as pd
import os.path as op
from core import in_CI, rsquared
from scipy.stats import spearmanr, pearsonr

In [2]:
def Cas9_results(path):
    with open(path, "r") as ih:
        data = json.load(ih)
    CI = in_CI(data["y"], data["y_hat"], data["y_hat_std"]).mean()
    pearson = pearsonr(data["y"], data["y_hat"])
    spearman = spearmanr(data["y"], data["y_hat"])
    rsq = rsquared(data["y"], data["y_hat"])
    results = pd.DataFrame(CI).T
    results["PCC"] = [pearson[0]]
    results["PCC-pval"] = [pearson[1]]
    results["SCC"] = [spearman[0]]
    results["SCC-pval"] = [spearman[1]]
    results["rsquared"] = [rsq]
    results.index = [path]
    return(results)

In [3]:
def DeepCpf1_results(path):
    with open(path, "r") as ih:
        data = json.load(ih)
    r = []
    for a in ["H1", "H2", "H3"]:
        CI = in_CI(
            data["y_"+a], data["y_hat_"+a], data["y_hat_std_"+a]
        ).mean()
        pearson = pearsonr(data["y_"+a], data["y_hat_"+a])
        spearman = spearmanr(data["y_"+a], data["y_hat_"+a])
        rsq = rsquared(data["y_"+a], data["y_hat_"+a])
        results = pd.DataFrame(CI).T
        results["PCC"] = [pearson[0]]
        results["PCC-pval"] = [pearson[1]]
        results["SCC"] = [spearman[0]]
        results["SCC-pval"] = [spearman[1]]
        results["rsquared"] = [rsq]
        results.index = [path+":"+a]
        r.append(results)
    r = pd.concat(r)
    return(r)

In [4]:
model_folders = [op.join("../models/", b) for b in [a for a in os.walk("../models")][0][1]]
model_folders = list(filter(lambda x: "LC" not in x, model_folders))
deeper = sum(
    list(
        filter(
            lambda x: len(x) != 0, 
            [[(op.join(a,c), a) for c in [b for b in os.walk(a)][0][1]] for a in model_folders]
        )
    ), []
)
model_folders = list(set(model_folders) - set([a[1] for a in deeper]))+[a[0] for a in deeper]

In [5]:
model_files = sum([[op.join(a, c) for c in [b for b in os.walk(a)][0][2]] for a in model_folders], [])
model_files = list(filter(lambda x: ".json" in x, model_files))

In [6]:
Cas9 = pd.concat([Cas9_results(a) for a in list(filter(lambda x: "Cpf1" not in x, model_files))])



In [7]:
Cas12a = pd.concat([DeepCpf1_results(a) for a in list(filter(lambda x: "DeepCpf1" in x, model_files))])

In [8]:
Cas12a_pair = pd.concat([Cas9_results(a) for a in list(filter(lambda x: "offtargets" in x, model_files))])

In [9]:
Table1 = pd.concat([Cas9, Cas12a, Cas12a_pair])

In [10]:
def readable_name(x):
    x = x.replace("../", "").replace("/", " ").replace("-u", " MSE").replace(".json", " ELBO")
    x = x.replace("models ", "")
    x = x.replace("JostEtAl model", "Cas9 gRNA-target pair")
    x = x.replace("Cpf1_offtargets model", "Cas12a gRNA-target pair")
    return(x)

In [11]:
Table1.index = [readable_name(a) for a in Table1.index]

In [12]:
Table1 = Table1.sort_values("SCC", ascending=False)

In [13]:
Table1.to_csv("Table1.csv")

In [14]:
Table1

Unnamed: 0,0.68,0.95,0.997,PCC,PCC-pval,SCC,SCC-pval,rsquared
DeepHF WT CNN ELBO,0.760192,0.948801,0.985132,0.867976,0.000000e+00,0.839213,0.000000e+00,0.753383
DeepHF WT RNN MSE ELBO,0.733813,0.926978,0.978537,0.870306,0.000000e+00,0.838898,0.000000e+00,0.757432
DeepHF WT CNN MSE ELBO,0.759353,0.945444,0.984412,0.867207,0.000000e+00,0.837253,0.000000e+00,0.752047
DeepHF SpCas9HF1 RNN MSE ELBO,0.702567,0.946092,0.988867,0.852994,0.000000e+00,0.836442,0.000000e+00,0.727599
DeepHF WT RNN ELBO,0.772062,0.947362,0.984652,0.860720,0.000000e+00,0.828265,0.000000e+00,0.740839
...,...,...,...,...,...,...,...,...
DeepCRISPR hek293t RNN-v ELBO,0.799400,0.998285,1.000000,-0.127936,5.588398e-10,-0.103051,6.098290e-07,0.016368
DeepCRISPR hek293t RNN MSE-v ELBO,0.708530,0.937848,1.000000,-0.146143,1.310508e-12,-0.119424,7.210535e-09,0.021358
DeepCRISPR hek293t CNN MSE-v ELBO,0.684098,0.916417,1.000000,-0.137896,2.248221e-11,-0.137985,2.182228e-11,0.019015
DeepCRISPR hek293t CNN-v ELBO,0.668238,0.920274,1.000000,-0.153540,8.890007e-14,-0.151118,2.176874e-13,0.023574
