In [None]:
import pandas as pd
import numpy as np
import math
from matplotlib_venn import venn2, venn3
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
from scipy import stats
import networkx as nx

%matplotlib inline

In [None]:
plt.rcParams['svg.fonttype'] = 'none'

SMALL_SIZE = 12
MEDIUM_SIZE = 16
BIGGER_SIZE = 22

plt.rc('font', size=SMALL_SIZE)          # controls default text sizes
plt.rc('axes', titlesize=SMALL_SIZE)     # fontsize of the axes title
plt.rc('axes', labelsize=MEDIUM_SIZE)    # fontsize of the x and y labels
plt.rc('xtick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('ytick', labelsize=SMALL_SIZE)    # fontsize of the tick labels
plt.rc('legend', fontsize=SMALL_SIZE)    # legend fontsize
plt.rc('figure', titlesize=BIGGER_SIZE)  # fontsize of the figure title

In [None]:
def create_bar_plot(data, x_title, y_title):
    fig = plt.figure(figsize=(9, 8))
    ax = fig.add_subplot(111)
    ax.hist(x=data, bins='auto', alpha=0.7, rwidth=0.85)
    ax.grid(False)
    ax.set_xlabel(x_title)
    ax.set_ylabel(y_title)
    ax.set_xscale("log")
    #ax.set_xlim(1, 1000)
    ax.xaxis.set_major_formatter(ScalarFormatter())
    plt.show()
    return fig

In [None]:
def load_ontology(file_name):
    dG = nx.DiGraph()
    file_handle = open(file_name)
    for line in file_handle:
        line = line.rstrip().split()
        if line[2] == 'default':
            dG.add_edge(line[0], line[1])
    file_handle.close()    
    return dG

In [None]:
def get_next_layer(dg, stack, term_visited):
    new_stack = []
    count = 0
    while len(stack) > 0:
        term = stack.pop()
        if term in term_visited:
            continue
        term_visited[term] = 1
        count += 1
        for n in list(dg.neighbors(term)):
            new_stack.append(n)
    return new_stack, count, term_visited

In [None]:
ontology_file = '../data/training_files_av/ontology_ctg_av.txt'

dg = load_ontology(ontology_file)

root = [n for n in dg.nodes() if dg.in_degree(n) == 0][0]
next_layer = list(dg.neighbors(root))
term_visited = {}
layers = []
i = 0
layers.append(1)
term_visited[root] = 1
while len(next_layer) > 0:
    i += 1
    layers.append(0)
    next_layer, layers[i], term_visited = get_next_layer(dg, next_layer, term_visited)


In [None]:
layers

In [None]:
def get_cell_lines_by_tissue_type(tissue_type, cell_annot_df, cell_lines_df):
    ccle_tissue_cell_lines = list(cell_annot_df.query('Site_Primary == @tissue_type')['CCLE_ID'])
    return cell_lines_df.query('C in @ccle_tissue_cell_lines')

In [None]:
#train_df = pd.read_csv('../data/drugcell_all.txt', sep='\t', header=None, names=['C', 'D', 'AUC'])

drugs = pd.read_csv('../data/drug2ind_cg.txt', sep='\t', header=None, names=['I', 'D'])['D']
cell_lines_df = pd.read_csv('../data/cell2ind_cg.txt', sep='\t', header=None, names=['I', 'C'])

cell_mutation = np.loadtxt('../data/cell2mutation_cg.txt', delimiter=',')
genes = set(pd.read_csv('../data/gene2ind_cg.txt', sep='\t', header=None, names=['I', 'G'])['G'])

In [None]:
fig = plt.figure(figsize=(8,8))
venn2([genes, genie_genes], ['DrugCell 2.0', 'GENIE'])
plt.show()

In [None]:
# No. of mutations in cell-lines

mutation_count = np.count_nonzero(cell_mutation == 1, axis=1)
print('Median per cell line:', np.median(mutation_count))
print('Mean per cell line:', np.mean(mutation_count))

fig = create_bar_plot(mutation_count, 'Number of mutations', 'Number of cell lines')