In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [62]:
tpm = pd.read_csv("../../data/supp12.tsv", sep="\t", header=0, index_col=list(range(3)), usecols=list(range(5)))
tpm.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Mouse_gene_TPM,Human_gene_TPM
Subclass,Gene,Transcript,Unnamed: 3_level_1,Unnamed: 4_level_1
Astro,TSPAN6,ENST00000373020.8,45.27,86.94
Astro,DPM1,ENST00000371582.8,213.19,88.54
Astro,DPM1,ENST00000371588.9,213.19,88.54
Astro,FUCA2,ENST00000002165.10,142.11,26.09
Astro,GCLC,ENST00000509541.5,28.15,36.55


In [63]:
# just get the first transcript in each gene
tpm = tpm.groupby(['Subclass', 'Gene']).first()
tpm.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Mouse_gene_TPM,Human_gene_TPM
Subclass,Gene,Unnamed: 2_level_1,Unnamed: 3_level_1
Astro,AAAS,16.24,121.01
Astro,AACS,36.53,19.05
Astro,AAMDC,40.93,50.3
Astro,AAMP,40.81,26.85
Astro,AARSD1,37.88,56.02


In [64]:
# rename the columns
tpm.columns = ['mouse', 'human']
tpm.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mouse,human
Subclass,Gene,Unnamed: 2_level_1,Unnamed: 3_level_1
Astro,AAAS,16.24,121.01
Astro,AACS,36.53,19.05
Astro,AAMDC,40.93,50.3
Astro,AAMP,40.81,26.85
Astro,AARSD1,37.88,56.02


In [68]:
clusters = np.unique([i[0] for i in tpm.index])

In [71]:
mouse_genes = [tpm['mouse'].T[cluster] for cluster in clusters]

In [75]:
pd.concat(mouse_genes, axis=1)

Unnamed: 0_level_0,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse,mouse
Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
AAAS,16.24,19.78,42.54,29.22,35.00,39.92,31.53,33.07,44.97,,55.73,27.03,42.87,22.82,24.11
AACS,36.53,46.98,65.38,65.94,48.07,39.30,51.58,56.30,47.17,,,27.79,,41.09,
AAMDC,40.93,,65.52,,30.99,,32.27,34.22,53.17,,154.04,72.06,28.05,40.45,36.48
AAMP,40.81,125.77,192.35,150.80,145.13,170.92,167.82,195.91,213.13,238.55,130.57,250.83,256.75,195.22,269.62
AARSD1,37.88,121.90,147.34,75.35,127.79,136.71,111.07,126.37,141.80,287.84,38.70,132.82,142.82,147.45,176.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
UBQLN4,,,,,,,,,,,,,,,19.65
UBTF,,,,,,,,,,,,,,,31.15
XXYLT1,,,,,,,,,,,,,,,36.27
YJU2,,,,,,,,,,,,,,,14.89


In [77]:
mouse_genes = pd.concat(mouse_genes, axis=1).dropna()

In [79]:
mouse_genes.columns = clusters

In [81]:
human_genes = pd.concat([tpm['human'].T[cluster] for cluster in clusters], axis=1).dropna()

In [84]:
human_genes.columns = clusters

# get correlation network

In [85]:
import igraph as ig
import leidenalg as la
from umap import UMAP

In [100]:
from scipy.stats import rankdata

In [86]:
from numpy import corrcoef

In [119]:
corr_human = corrcoef(human_genes.values, human_genes.values)[:human_genes.shape[0], :human_genes.shape[0]]

In [120]:
corr_mouse = corrcoef(mouse_genes.values, mouse_genes.values)[:human_genes.shape[0], :human_genes.shape[0]]

In [121]:
corr_human.shape

(1970, 1970)

In [213]:
def get_comparative_inds(corr_a, corr_b, n_idx = 30):
    """
    Identifies the top n indecies in a for each row in a, 
    as well as the ranks of the corresponding indexs for the same row in be b
    
    
    inputs:
        corr_a : a matrix of correlation coefficients
        corr_b : a different matrix of correlation coefficients
        n_idx : the number of indicies to check
    """
    
    top_n_a = np.argsort(corr_a)[:, -(n_idx+1):-1]
    idx_ranks_b = rankdata(corr_b, axis=0)
    idx_ranks_b = -1*(idx_ranks_b - corr_b.shape[0])
    ranks_in_b = [idx_ranks_b[i, top_n_a[i]][::-1] for i in range(len(corr_a))] 
    return ranks_in_b

In [215]:
b_ranks = get_comparative_inds(corr_human, corr_mouse)

In [217]:
b_ranks

[array([ 172.,  116.,  810.,  345., 1382.,  482.,  212.,  502.,  743.,
         528.,  337.,  725.,  721., 1872., 1438.,  862.,  591., 1228.,
         150., 1202.,  499., 1399., 1534., 1384.,  722.,  805.,  531.,
          87.,  766.,  137.]),
 array([1403.,  968.,  287., 1200.,   44.,  995.,  138., 1263.,  921.,
        1149., 1918., 1180., 1046., 1397., 1041.,  417., 1093., 1261.,
         770.,  940., 1143.,   45.,  552.,  212., 1204., 1170.,  982.,
        1362., 1339.,  991.]),
 array([1305.,  392., 1498., 1084., 1810., 1388.,  750.,  130., 1964.,
        1105., 1317., 1375., 1391., 1336., 1062., 1593., 1261.,  677.,
         648., 1298., 1594., 1429., 1297., 1315., 1546., 1482., 1602.,
         585., 1405., 1055.]),
 array([ 103., 1685., 1655., 1021., 1568., 1639.,  122., 1862.,  153.,
        1809., 1472., 1849., 1083., 1502., 1790.,  109., 1650.,  176.,
        1568.,  533., 1782., 1294., 1046., 1805., 1802., 1744., 1602.,
        1844., 1650., 1891.]),
 array([ 156., 1456.,  2

In [202]:
len(a)

1970

In [203]:
len(b)

1970

In [204]:
a.shape

(1970, 31)

In [205]:
b.shape

(1970, 1970)

In [206]:
a[0]

array([ 938, 1242,  148,  689, 1770,  803,  289,  850, 1133, 1182,  701,
        506, 1010,   83, 1283,  227, 1165, 1590, 1684,  137,  262, 1713,
        140,  492,  796, 1688, 1012,   71, 1616,  554,    0])

In [207]:
b[0, a[0]]

array([ 137.,  766.,   87.,  531.,  805.,  722., 1384., 1534., 1399.,
        499., 1202.,  150., 1228.,  591.,  862., 1438., 1872.,  721.,
        725.,  337.,  528.,  743.,  502.,  212.,  482., 1382.,  345.,
        810.,  116.,  172.,   -0.])

In [208]:
-1 * (1 - 1970)

1969

In [211]:
[b[i, a[i]] for i in range(len(b))]

[array([ 137.,  766.,   87.,  531.,  805.,  722., 1384., 1534., 1399.,
         499., 1202.,  150., 1228.,  591.,  862., 1438., 1872.,  721.,
         725.,  337.,  528.,  743.,  502.,  212.,  482., 1382.,  345.,
         810.,  116.,  172.,   -0.]),
 array([ 991., 1339., 1362.,  982., 1170., 1204.,  212.,  552.,   45.,
        1143.,  940.,  770., 1261., 1093.,  417., 1041., 1397., 1046.,
        1180., 1918., 1149.,  921., 1263.,  138.,  995.,   44., 1200.,
         287.,  968., 1403.,   -0.]),
 array([1055., 1405.,  585., 1602., 1482., 1546., 1315., 1297., 1429.,
        1594., 1298.,  648.,  677., 1261., 1593., 1062., 1336., 1391.,
        1375., 1317., 1105., 1964.,  130.,  750., 1388., 1810., 1084.,
        1498.,  392., 1305.,   -0.]),
 array([1891., 1650., 1844., 1602., 1744., 1802., 1805., 1046., 1294.,
        1782.,  533., 1568.,  176., 1650.,  109., 1790., 1502., 1083.,
        1849., 1472., 1809.,  153., 1862.,  122., 1639., 1568., 1021.,
        1655., 1685.,  103.,   -0.