In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import os
import re

#import bokeh
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib_venn import venn3, venn3_circles, venn2
from scipy.stats import norm
import seaborn as sb

import pickle
import json

In [2]:
palette = [
    'tab:orange', 'tab:green', 'tab:red', 'tab:purple', 'tab:brown', 'tab:pink', 'tab:gray',
    'tab:olive', 'tab:cyan', 'lightcoral', 'chocolate', 'gold', 'yellow', 'lime', 'aqua', 'dodgerblue',
    'blueviolet', 'deeppink', "#999999", "#777777", 'tab:blue', 
]

palette = palette*10

In [3]:
def venn_diagrams(mut_id, all_values, mut_wt_values):
    mut_name = mut_id.split("@")[1]
    f, ax = plt.subplots(1,2)
    plt.title(mut_name)
    venn3(subsets=all_values, set_labels=('WT', 'MUT', 'CTL'), alpha=0.7, ax=ax[0], set_colors=["#67A9CF","#EF8A62","#888888"]) # ("b","r","g"))
    venn2(subsets=mut_wt_values, set_labels=('WT', 'MUT'), alpha=0.7, ax=ax[1], set_colors=["#67A9CF","#EF8A62"]) #["b", "r"])
    plt.savefig("outputs/single-cells/" + mut_name + "_venn.png")
    plt.close(f)

In [4]:
def visualize_batches(df, x_limits, y_limits, mut_name):
    fig = plt.figure(figsize=(8,8))
    g1 = sb.scatterplot(data=df, x="X", y="Y", hue="Plate", s=10, alpha=0.2)
    g1.set(xlim=x_limits)
    g1.set(ylim=y_limits)
    plt.title(mut_name)
    plt.legend(bbox_to_anchor=(0.85, 1), loc=2, borderaxespad=0.)
        
    plt.savefig("outputs/single-cells/" + mut_name + "_plate.png")
    plt.close(fig)

In [5]:
def visualize_allele(df, x_limits, y_limits, mut_name, layout="dots"):
    fig = plt.figure(figsize=(8,8))
    
    if layout == "dots":
        g1 = sb.scatterplot(data=df, x="X", y="Y", hue="Allele", s=10, alpha=0.2, palette=["#888888","#67A9CF","#EF8A62"]) #["g","b","r"])
    elif layout == "kde":
        g1 = sb.kdeplot(data=df, x="X", y="Y", hue="Allele", alpha=0.5, levels=20, palette=["g","b","r"]) #['tab:blue',"#2CA02C","#FF7F0E"])
    elif layout == "mixed":
        g1 = sb.scatterplot(data=df, x="X", y="Y", hue="Allele", s=10, alpha=0.2, palette=["g","b","r"]) #['tab:blue',"#2CA02C","#FF7F0E"])
        g2 = sb.kdeplot(data=df, x="X", y="Y", hue="Allele", alpha=0.5, levels=10, palette=["g","b","r"]) #['tab:blue',"#2CA02C","#FF7F0E"])
        
    g1.set(xlim=x_limits)
    g1.set(ylim=y_limits)
    plt.title(mut_name)
    plt.savefig("outputs/single-cells/" + mut_name + "_allele.png")
    
    #plt.show()
    plt.close(fig)

In [6]:
def organize_alleles(d, wt_name):
    # Allele names
    allele = []
    allele += ["Control"]*d["controls"]["features"].shape[0]
    allele += ["Wild type"]*d["wild_type_data"]["features"].shape[0]
    for k in d["mutant_ids"]:
        allele += [k.split("@")[-1]]*d["mutants_data"][k]["features"].shape[0]
    
    # Plate names
    plate = []
    plate += ["Control"]*len(d["ctlimgs"]) #[re.match(r"^(.+)/(.+)/(.+)_(.)", x).groups()[1] for x in d["ctlimgs"]]
    plate += [re.match(r"^(.+)/(.+)/(.+)_(.)", x).groups()[1] for x in d["wild_type_data"]["images"]]
    for k in d["mutant_ids"]:
        plate += [re.match(r"^(.+)/(.+)/(.+)_(.)", x).groups()[1] for x in d["mutants_data"][k]["images"]]
    
    # Dataframe
    df = pd.DataFrame(data=d["Y"], columns=["X","Y"])
    df["Allele"] = allele
    df["Plate"] = plate
    
    # Allele colors
    colors = ["#888888","#67A9CF"] + [palette[i] for i in range(len(d["mutant_ids"]))] # + ["green", "blue"]
    
    # Plot limits
    x_limits = (np.percentile(d["Y"][:,0], q=0.0), np.percentile(d["Y"][:,0], q=100)) #99.9))
    y_limits = (np.percentile(d["Y"][:,1], q=0.0), np.percentile(d["Y"][:,1], q=100)) #99.9))
    
    # Plot all alleles
    fig = plt.figure(figsize=(8,8))
    g1 = sb.scatterplot(data=df, x="X", y="Y", s=10, alpha=0.2, hue="Allele", palette=colors)
    g1.set(xlim=x_limits)
    g1.set(ylim=y_limits)
    plt.legend(bbox_to_anchor=(0.85, 1), loc=2, borderaxespad=0.)
    plt.savefig("outputs/single-cells/" + wt_name + "_allele.png")
    
    return df, x_limits, y_limits

In [None]:
data_dir = "outputs/single-cells/"
wild_types = [k.replace(".pkl", "") for k in os.listdir(data_dir) if k.endswith(".pkl")]

index = {"name":"genes", "children":[]}

for wt in wild_types:
    if not os.path.exists(data_dir + wt + ".pkl"): continue
        
    index["children"].append({"name":wt.split("@")[-1], "children":[]})
    with open(data_dir + wt + ".pkl","rb") as file:
        d = pickle.load(file)
        if not "graph_scores" in d.keys(): continue    
        df, xl, yl = organize_alleles(d, wt.split("@")[-1])
        
        for mut in d["mutant_ids"]:
            # Add JSON entry
            index["children"][-1]["children"].append({"name":mut.split("@")[-1], 
                                                      "pair": wt.split("@")[-1] + "_" + mut.split("@")[-1],
                                                      "impact":d["graph_scores"][mut]["impact_score"],
                                                     })
        
            # Create images
            mut_name = mut.split("@")[-1]
            if not os.path.isfile(mut_name + ".html"):
                subset = df[df.Allele.isin(["Control", "Wild type", mut_name])]
                visualize_allele(subset, xl, yl, mut_name, layout="dots")
                visualize_batches(subset, xl, yl, mut_name)
                
                venn_diagrams(mut, d["graph_scores"][mut]["all_values"], d["graph_scores"][mut]["mut_wt_values"])
                print(mut, d["graph_scores"][mut]["impact_score"])            

json.dump(index, open("outputs/single-cells/alleles.json", "w"))

5268@SERPINB5_p.D141A 0.18227179336659818
5268@SERPINB5_p.A7T 0.08137638617090671
5268@SERPINB5_p.A42F 0.12837331334332833
5268@SERPINB5_p.T37I 0.09465459065885277
5268@SERPINB5_p.I159S 0.11857142857142858
5268@SERPINB5_p.G142V 0.10231923601637108
5268@SERPINB5_WT.o 0.15433314575126617
5268@SERPINB5_p.A165T 0.1598007281088331
231@AKR1B1_p.F252L 0.09147982062780269
231@AKR1B1_WT.o 0.26762995274445656
231@AKR1B1_p.P14R 0.22262959568184132
231@AKR1B1_p.Q27K 0.10024475896562733
673@BRAF_p.G466A 0.7279312343055824
673@BRAF_p.A762E 0.5322080974254887
673@BRAF_p.H574N 0.6151975683890577
673@BRAF_p.G469S 0.9418748219204103
673@BRAF_p.W450L 0.6249187432286024
673@BRAF_p.R682W 0.43767672007540054
673@BRAF_p.D594H 0.3943522417636859
673@BRAF_p.N581S 0.7276067527308838
673@BRAF_p.K601N 0.9469812762705387
673@BRAF_p.L485S 0.9084167593622544
673@BRAF_p.G469V 0.7085048699668641
673@BRAF_p.V600E 0.9451508825204024
673@BRAF_p.G466V 0.4063754427390791
673@BRAF_p.G466E 0.32847682119205296
673@BRAF_p.L613

  
  
  This is separate from the ipykernel package so we can avoid doing imports until
  


84868@HAVCR2_p.L127F 0.21842142156175523


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


84868@HAVCR2_WT.o 0.20458229763320004


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


84868@HAVCR2_p.P6S 0.26181484127794225


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


84868@HAVCR2_p.G86V 0.22741734679762068


  
  This is separate from the ipykernel package so we can avoid doing imports until


84868@HAVCR2_p.P115A 0.25923353016093553


  
  
  This is separate from the ipykernel package so we can avoid doing imports until
  


2064@ERBB2_p.D845A 0.38195215504188956


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


2064@ERBB2_p.S310F 0.19858514401212735


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


2064@ERBB2_p.S418T 0.24811523963381799


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


2064@ERBB2_p.776_776G>VC 0.6263606557377049


  
  This is separate from the ipykernel package so we can avoid doing imports until


2064@ERBB2_p.774_775insAYVM 0.29343207478450894


  
  
  This is separate from the ipykernel package so we can avoid doing imports until
  


27010@TPK1_p.G48C 0.20523724261414503


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


27010@TPK1_p.D29Y 0.13999189627228525


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


27010@TPK1_p.T213S 0.08240322963666588


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


27010@TPK1_p.E81Q 0.07367413887370147


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


27010@TPK1_p.P152T 0.07204898191655988


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


27010@TPK1_p.K111M 0.07458492975734356


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


27010@TPK1_p.T205S 0.09332023575638507


  
  This is separate from the ipykernel package so we can avoid doing imports until


27010@TPK1_p.L185I 0.058495002939447385


  
  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9841@ZBTB24_p.G568C 0.1669214599656029


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9841@ZBTB24_p.M78I 0.1554815573770492


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9841@ZBTB24_p.K123M 0.15826167789914014


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9841@ZBTB24_p.Q669R 0.21087442855753333


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9841@ZBTB24_p.G693S 0.15458993476234856


  
  This is separate from the ipykernel package so we can avoid doing imports until


9841@ZBTB24_WT.o 0.1410604192355117


  
  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.H96L 0.3353523836670705


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.E117K 0.2483389353552724


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.G419W 0.25972517902070835


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.R601W 0.14029168480626905


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.L268P 0.12716640834155277


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.F280Y 0.22624845233182006


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_WT.o 0.0680523479599692


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.G480W 0.12304475115014639


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.A159P 0.1291479072869768


  
  This is separate from the ipykernel package so we can avoid doing imports until
  


9817@KEAP1_p.M110I 0.13811771238200998
