In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import gridspec
import seaborn as sns
import json
import math
import numpy as np
from scipy.stats import spearmanr

GridSpec = gridspec.GridSpec

from allensdk.core.cell_types_cache import CellTypesCache
from allensdk.api.queries.cell_types_api import CellTypesApi

In [2]:
def filter_out_nan(origin):
    new_list = origin
    new_list = [x for x in new_list if str(x) != 'nan']
    return new_list

def palette_builder(cre_line, colour):
    palette = {}
    colours = filter_out_nan(colour)
    index = 0
    for cre in cre_line:
        palette[cre] = colours[index]
        index = index + 1
    return palette

In [3]:
# load all data from csvs stored in the repo

#csv for entrenz symbol and associated gene
entrenz_symbol_s = pd.read_csv("entrenz_symbol_s.csv", index_col=0, names = ['entrez_id', 'gene_symbol'])

#csv file about channel genes and their associated ion channels
ion_channel_genes = pd.read_csv("targets_and_families.csv", index_col=0)


sns.set_context("notebook", font_scale=1.5, rc={"lines.linewidth": 2.5})

#Parameter names
org = pd.read_csv("org.csv", index_col=0)
parameters = list(org['parameters'])

#Parameter names for absolute conductance
abs_parameters = list('abs_' + org['parameters'])

#6 Parameters shared by all neuronal models
shared_parameters = filter_out_nan(list(org['shared']))

#Dictionary of the related genes to the 6 parameters
related_RNA = {}
for para in shared_parameters:
    related_RNA[para] = filter_out_nan(list(org[para]))

abs_related_RNA = {}
for para in shared_parameters:
    abs_related_RNA['abs_' + para] = filter_out_nan(list(org[para]))
abs_related_RNA

#List of gene channels that are voltage gated
ion_channel_genes = ion_channel_genes[ion_channel_genes['MGI symbol'].notnull()]
gene_channel = ion_channel_genes.loc["vgic"]
gene_channel = gene_channel['MGI symbol'].values
gene_channel = list(gene_channel)

gene_channel_vgic = ion_channel_genes.loc["vgic"]
gene_channel_vgic

exclude = []
for gene in list(gene_channel_vgic['Mouse Entrez Gene']):
    if int(gene) in list(transcriptome_df.index):
        if sum(list(transcriptome_df.loc[int(gene)]))/len(list(transcriptome_df.loc[int(gene)])) < .1:
            exclude.append(gene)
for excluded_gene in exclude:
    gene_channel_vgic_filtered = gene_channel_vgic[gene_channel_vgic['Mouse Entrez Gene'] != excluded_gene]
                
gene_channel_vgic_filtered


In [4]:
gene_channel_used = list(gene_channel_vgic_filtered['MGI symbol'])

In [5]:
raw_counts = pd.read_csv("C:/Users/Allose/Downloads/GSE71585_RefSeq_counts.csv", index_col=0)

raw_sum = raw_counts.sum()


In [6]:
norm_counts = raw_counts.divide(raw_sum, axis='columns')

In [7]:
norm_counts = (norm_counts * 1000000) +1 
norm_counts = norm_counts.apply(np.log2)


In [8]:
norm_counts.to_csv('norm_counts.csv')

In [38]:
# norm_counts = pd.read_csv("norm_counts.csv", index_col=0)

In [9]:
norm_counts_filtered = norm_counts.filter(items=gene_channel_used, axis='index')
norm_counts_filtered

Unnamed: 0_level_0,Calb2_tdTpositive_cell_1,Calb2_tdTpositive_cell_2,Calb2_tdTpositive_cell_3,Calb2_tdTpositive_cell_4,Calb2_tdTpositive_cell_5,Calb2_tdTpositive_cell_6,Calb2_tdTpositive_cell_7,Calb2_tdTpositive_cell_8,Calb2_tdTpositive_cell_9,Calb2_tdTpositive_cell_10,...,CAV_VISp_Contra_tdTpos_cell_5,Rbp4_CTX_10pg_1,Rbp4_CTX_10pg_2,Rbp4_CTX_10pg_3,Rbp4_CTX_10pg_4,Rbp4_CTX_10pg_5,Rbp4_CTX_10pg_6,Rbp4_CTX_250ng_1,Rbp4_CTX_250ng_2,Trib2_CTX_250ng_1
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Kcnma1,0.101247,0.000000,3.084664,7.261033,1.876241,7.001397,7.533050,8.857980,7.193113,6.173334,...,8.387455,3.423858,5.456605,1.242548,3.312544,2.676521,2.920908,7.273108,7.005379,7.360396
Kcnn1,0.198543,0.309660,0.000000,0.127867,0.042196,0.123625,0.185289,0.000000,0.414461,5.471726,...,0.560248,0.287179,0.091668,0.365504,0.310030,0.237856,3.114726,3.995580,4.270947,4.200941
Kcnn2,3.588794,1.297081,0.982665,0.000000,0.000000,0.000000,1.946819,0.335838,0.000000,0.286419,...,5.794847,4.829853,1.262202,1.691087,0.000000,0.105609,5.355416,5.136332,5.195371,5.122984
Kcnn3,0.099329,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.596861,...,0.000000,3.684158,4.836567,0.193843,3.377606,3.648951,0.000000,4.884148,4.685582,4.815792
Kcnn4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.210321,0.336080,0.312871
Kcnt1,0.000000,0.000000,0.000000,0.000000,0.103262,6.838593,1.006418,2.446422,0.000000,0.000000,...,4.620849,4.156346,2.962231,6.596563,4.357544,4.907708,0.133179,7.286927,7.350062,7.427123
Kcnt2,4.093178,7.282469,6.200803,9.724795,6.066605,6.552947,0.000000,6.187524,0.000000,2.023781,...,7.323259,0.197044,7.176823,5.207565,0.000000,5.253194,0.255096,4.981214,4.914068,4.813495
Kcnu1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.314227,5.229088,0.000000,0.000000,0.000000,0.000000,0.000000,1.964768,2.078206,2.038416
Catsper1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Catsper2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.116467,0.000000,0.000000,0.370513,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,2.864745,2.253229,1.872119,2.080327


In [10]:
norm_counts_filtered.to_csv('norm_raw_counts_filtered.csv')

Calb2_tdTpositive_cell_1         14030202.14
Calb2_tdTpositive_cell_2          4803366.44
Calb2_tdTpositive_cell_3          3073418.37
Calb2_tdTpositive_cell_4         12192899.59
Calb2_tdTpositive_cell_5         13477232.54
Calb2_tdTpositive_cell_6         12853590.90
Calb2_tdTpositive_cell_7         11893936.78
Calb2_tdTpositive_cell_8         15260769.11
Calb2_tdTpositive_cell_9          4957918.20
Calb2_tdTpositive_cell_10        13660605.32
Calb2_tdTpositive_cell_11         3417166.16
Calb2_tdTpositive_cell_12         5418736.19
Calb2_tdTpositive_cell_13        10449442.27
Calb2_tdTpositive_cell_14        11745991.88
Calb2_tdTpositive_cell_15        11768285.01
Calb2_tdTpositive_cell_16        13373696.93
Calb2_tdTpositive_cell_17         4560352.09
Calb2_tdTpositive_cell_18         3703237.74
Calb2_tdTpositive_cell_19         3698190.24
Calb2_tdTpositive_cell_20         4313873.63
Calb2_tdTpositive_cell_21         3693324.84
Calb2_tdTpositive_cell_22         4461707.18
Calb2_tdTp