In [3]:
import json

In [13]:
type((324,324)) == tuple

True

In [14]:
str((32,23))

'(32, 23)'

In [10]:
config = {
    "default": "Cas9_RNN_DC_offtarget",
    "on-target report": "templates/ontarget.template.html",
    "off-target report": "templates/offtarget.template.html",
    "Cas9_RNN_DC_offtarget": {
        "on": "./models/test/revcore_full_model_2.ptch",
        "off": "./models/test/peng_siamese.ptch",
        "on_reg": "./models/test/gecrispr.cat",
        "umap": "./models/test/test_umap.bin",
        "reg_type": "catboost",
        "guide_length": 20,
        "use_pam": False,
        "pam_before": False,
        "PAM": "[ATGC]GG",
        "background": "./models/test/test_background.pkl",
        "max": 0.048707843,
        "min": 0.22760779
    },
    "off_target_indices": ["indices/ecoli.pkl"],
    "k_neighbors": 10,
    "offtarget_batch_size": 102400
}

In [11]:
with open("config.json", "w") as oh:
    json.dump(config, oh)

In [1]:
import re
import io
import os
import dash
import time
import json
import uuid
import flask
import base64
import shutil
from reviewed_core import *
import numpy as np
import pandas as pd
import pickle as pkl
import os.path as op
from Bio import SeqIO
from tqdm import tqdm
from time import time
from vicinity import *
from vedis import Vedis
from Bio.Seq import Seq
#from logic import Logic
from zipfile import ZipFile
from Bio.Alphabet import IUPAC
import plotly.graph_objs as go
from operator import itemgetter
from weasyprint import HTML, CSS
import dash_core_components as dcc
from Bio.SeqRecord import SeqRecord
import dash_html_components as html
from sklearn.externals import joblib
from catboost import CatBoostRegressor
from urllib.parse import quote as urlquote
from scipy.spatial.distance import euclidean
from jinja2 import Environment, FileSystemLoader
from dash.dependencies import Input, State, Output
from Bio.SeqFeature import SeqFeature, FeatureLocation




There are known rendering problems and missing features with cairo < 1.15.4. WeasyPrint may work with older versions, but please read the note about the needed cairo version on the "Install" page of the documentation before reporting bugs. http://weasyprint.readthedocs.io/en/latest/install.html



In [9]:
env = Environment(loader=FileSystemLoader('.'))
ON_TEMPLATE = env.get_template(config["on-target report"])
ON_TEMPLATE = env.get_template(config["off-target report"])

In [None]:
def get_random_fn(ext):
    randpath = np.random.choice(np.arange(0, 10), (16,))
    randpath = "".join([str(a) for a in randpath])
    randpath = op.join("reports", randpath+"."+ext)
    return(randpath)

def zip_file(files):
    randpath = Reporter.get_random_fn("zip")
    with ZipFile(randpath, "w") as z: 
        for i in files:
            z.write(i)
            os.remove(i)
    return(randpath)

def get_fasta(df):
    records = []
    d = ""
    for a in df.index:
        for b in df.columns:
            d += b+":"+df.ix[a][b]+";"
        records.append(
            SeqRecord(
                Seq(df.ix[a]["guide"], IUPAC.unambiguous_dna),
                id=str(a), description=d
            )
        )
    randpath = Reporter.get_random_fn("fasta")
    SeqIO.write(records, randpath, "fasta")
    return(randpath)

def get_on_html(data):
    env = Environment(loader=FileSystemLoader('.'))
    on = env.get_template(config["on-target report"])
    templates_vars = {
        "gc": data.to_html()
    }
    chosen = data[data["chosen?"] == True]
    templates_vars["ott"] = chosen.to_html()
    html = on.render(templates_vars)
    randpath_pdf = get_random_fn("pdf")
    HTML(string=html).write_pdf(
        randpath_pdf, stylesheets=[CSS(string='body { font-family: monospace !important }')]
    )
    randpath_fasta_all = get_fasta(data)
    randpath_fasta_chosen = get_fasta(chosen)
    return(randpath_pdf, randpath_fasta_all, randpath_fasta_chosen)

def get_off_html(data):
    env = Environment(loader=FileSystemLoader('.'))
    off = env.get_template(config["off-target report"])
    templates_vars = {
        "gc": data.to_html()
    }
    html = off.render(templates_vars)
    randpath_pdf = get_random_fn("pdf")
    HTML(string=html).write_pdf(
        randpath_pdf, stylesheets=[CSS(string='body { font-family: monospace !important }')]
    )
    randpath_fasta = get_fasta(data)
    return(randpath_pdf, randpath_fasta)

In [2]:
class Reporter():
    
    def __init__(self, on_template, off_template):
        self.env = Environment(loader=FileSystemLoader('.'))
        self.on = self.env.get_template(on_template, off_template)
        
        self.db = Vedis(":mem:")
        
    def get_on_html(self, all_data, cart):
        template_vars = {
            "gc": cart.to_html(),
            "ott": all_data.to_html()
        }
        return(self.on.render(template_vars))
    
    def get_pdf(self, html):
        randpath = Reporter.get_random_fn("pdf")
        HTML(string=html).write_pdf(
            randpath, stylesheets=[CSS(string='body { font-family: monospace !important }')]
        )
        return(randpath)
    
    @staticmethod
    def get_random_fn(ext):
        randpath = np.random.choice(np.arange(0, 10), (16,))
        randpath = "".join([str(a) for a in randpath])
        randpath = op.join("reports", randpath+"."+ext)
        return(randpath)
    
    @staticmethod
    def parse_vedis(s):
        ff = lambda x: x != ""
        u = str(s)
        for a in ["\\n", "b'[", "b\"[", "]'", "]\""]:
            u = u.replace(a, "")
        return(list(filter(ff, u.split(" "))))
    
    @staticmethod
    def val_float(x):
        if x[-1] == ".":
            x += "0"
        return(float(x))
    
    @staticmethod
    def val_int(x):
        return(int(re.sub("[^0-9\-]", "", x)))
    
    def get_data(self, sid):
        activities = [Reporter.val_float(a) for a in Reporter.parse_vedis(self.db[sid+"_activity"])]
        n_off = [Reporter.val_float(a) for a in Reporter.parse_vedis(self.db[sid+"_OTS"])]
        labels = [Reporter.val_int(a) for a in Reporter.parse_vedis(self.db[sid+"_label"])]
        strands = [Reporter.val_int(a) for a in Reporter.parse_vedis(self.db[sid+"_strand"])]
        gd = np.array(Reporter.parse_vedis(self.db[sid+"_guide"])).reshape((len(strands),4))
        act_n = [Reporter.val_int(a) for a in Reporter.parse_vedis(self.db[sid+"_#offtargets"])]
        return(activities, n_off, labels, strands, gd, act_n)
    
    def get_df(self, sid):
        activities, _, labels, strands, gd, n_off = self.get_data(sid)
        guides = [re.sub("[^ATGC]", "", a) for a in gd[:, 0]]
        starts = [Reporter.val_int(a) for a in gd[:, 1]]
        ends = [Reporter.val_int(a) for a in gd[:, 2]]
        pams = [re.sub("[^ATGC]", "", a) for a in gd[:, 3]]
        r = pd.DataFrame(
            {
               "guide": guides, 
               "label": labels,
               "activity": activities, "strand": strands,
               "start": starts, "end": ends, "PAM": pams, "#offtargets": n_off,
            }, columns=["guide", "label", "activity", "#offtargets", "strand", "start", "end", "PAM"]
        )
        return(r)
    
    def get_fasta(self, df):
        records = []
        for a in df.index:
            d = "activity:"+str(df.ix[a]["activity"])+";label:"+str(df.ix[a]["label"])
            d += ";strand:"+str(df.ix[a]["strand"])+";position:"+str(df.ix[a]["start"])+"-"
            d += str(df.ix[a]["end"])+";PAM:"+df.ix[a]["PAM"]+";#offtargets:"+str(df.ix[a]["#offtargets"])
            records.append(
                SeqRecord(
                    Seq(df.ix[a]["guide"], IUPAC.unambiguous_dna),
                    id=str(a), description=d
                )
            )
        randpath = Reporter.get_random_fn("fasta")
        SeqIO.write(records, randpath, "fasta")
        return(randpath)
    
    def get_csv(self, df):
        randpath = Reporter.get_random_fn("csv")
        df.to_csv(randpath)
        return(randpath)
    
    @staticmethod
    def zip_file(files):
        randpath = Reporter.get_random_fn("zip")
        with ZipFile(randpath, "w") as z: 
            for i in files:
                z.write(i)
                os.remove(i)
        return(randpath)