In [1]:
import pandas as pd
import numpy as np

## 1) Load positive, negative gene sets and the correponding top-10 neighbor genes
* The details of defining positive and negative cases refers to the paper **(Methods)**
* The top neigbor genes are found by calculated the covariance

In [2]:
## immune genes
p_immune_neighbor = pd.read_table('./posi_immune_neighborname.txt',header=None).iloc[:,:-1].values
n_immune_neighbor = pd.read_table('./nega_immune_neighborname.txt',header=None).iloc[:,:-1].values
## cell cycle genes
p_cell_cycle_neighbor = pd.read_table('./posi_cell_cycle_neighborname.txt',header=None).iloc[:,:-1].values
n_cell_cycle_neighbor = pd.read_table('./nega_cell_cycle_neighborname.txt',header=None).iloc[:,:-1].values
## rhythm
p_rhythm_neighbor = pd.read_table('./posi_rhythm_neighborname.txt',header=None).iloc[:,:-1].values
n_rhythm_neighbor = pd.read_table('./nega_rhythm_neighborname.txt',header=None).iloc[:,:-1].values
## proliferation
p_proliferation_neighbor = pd.read_table('./posi_proliferation_neighborname.txt',header=None).iloc[:,:-1].values
n_proliferation_neighbor = pd.read_table('./nega_proliferation_neighborname.txt',header=None).iloc[:,:-1].values

## 2) Functions for the adjacency matrix and top neighbor selection
Adjacency matrix
> Given $n$ genes, we define a adjacency matrix $A\in\mathbb{R}^{n\times n}$, where the degree of between two genes is the number of their shared neighbor genes

In [3]:
def weighted_Adjacency(posi_top_neighbors,nega_top_neighbors):
    p_genes_degree = []
    n_genes_degree = []

    ## for positive genes
    for i in range(len(posi_top_neighbors)):
        single_degree = []
        for j in range(len(posi_top_neighbors)):
            neigbor_intersec = list(set(posi_top_neighbors[i]).intersection(set(posi_top_neighbors[j])))
            n_intersec = len(neigbor_intersec)
            single_degree.append(n_intersec)
        p_genes_degree.append(single_degree)   
    
    ## for negative genes (random selected)
    for i in range(len(nega_top_neighbors)):
        single_degree = []
        for j in range(len(nega_top_neighbors)):
            neigbor_intersec = list(set(nega_top_neighbors[i]).intersection(set(nega_top_neighbors[j])))
            n_intersec = len(neigbor_intersec)
            single_degree.append(n_intersec)
        n_genes_degree.append(single_degree)  
        
    p =np.array(p_genes_degree)
    row, col = np.diag_indices_from(p)
    p[row, col] = 0
    
    n =np.array(n_genes_degree)
    row, col = np.diag_indices_from(n)
    n[row, col] = 0
        
    return p, n

def top_neighbors_split(p_immune_neighbor, n_immune_neighbor):
    top_posi_neighbors = []
    top_nega_neighbors = []
    for i in range(1,11):
        tmp = []
        for neighbors in p_immune_neighbor:
            tmp.append(neighbors[:i]) 
        top_posi_neighbors.append(tmp)

        tmp = []
        for neighbors in n_immune_neighbor:
            tmp.append(neighbors[:i]) 
        top_nega_neighbors.append(tmp)
    return top_posi_neighbors, top_nega_neighbors

## 3) Four functional types 

#### immune

In [4]:
#top_n = 10
for top_n in range(1,11):

    print('top', top_n)

    top_posi_neighbors, top_nega_neighbors= top_neighbors_split(p_immune_neighbor, n_immune_neighbor)
    A_positive, A_negative = weighted_Adjacency(top_posi_neighbors[top_n-1],top_nega_neighbors[top_n-1])

    idx_row, idx_col = np.where(A_positive==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in positive set)'))

    idx_row, idx_col = np.where(A_negative==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in negative set)'))

    print('\n')

top 1
Sharing 1 neighbors at least with one another gene:	244	(in positive set)
Sharing 1 neighbors at least with one another gene:	219	(in negative set)


top 2
Sharing 2 neighbors at least with one another gene:	197	(in positive set)
Sharing 2 neighbors at least with one another gene:	157	(in negative set)


top 3
Sharing 3 neighbors at least with one another gene:	141	(in positive set)
Sharing 3 neighbors at least with one another gene:	116	(in negative set)


top 4
Sharing 4 neighbors at least with one another gene:	92	(in positive set)
Sharing 4 neighbors at least with one another gene:	74	(in negative set)


top 5
Sharing 5 neighbors at least with one another gene:	73	(in positive set)
Sharing 5 neighbors at least with one another gene:	56	(in negative set)


top 6
Sharing 6 neighbors at least with one another gene:	56	(in positive set)
Sharing 6 neighbors at least with one another gene:	41	(in negative set)


top 7
Sharing 7 neighbors at least with one another gene:	47	(in posit

#### cell cycle

In [5]:
for top_n in range(1,11):

    print('top', top_n)

    top_posi_neighbors, top_nega_neighbors= top_neighbors_split(p_cell_cycle_neighbor, n_cell_cycle_neighbor)
    A_positive, A_negative = weighted_Adjacency(top_posi_neighbors[top_n-1],top_nega_neighbors[top_n-1])

    idx_row, idx_col = np.where(A_positive==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in positive set)'))

    idx_row, idx_col = np.where(A_negative==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in negative set)'))

    print('\n')

top 1
Sharing 1 neighbors at least with one another gene:	557	(in positive set)
Sharing 1 neighbors at least with one another gene:	448	(in negative set)


top 2
Sharing 2 neighbors at least with one another gene:	513	(in positive set)
Sharing 2 neighbors at least with one another gene:	324	(in negative set)


top 3
Sharing 3 neighbors at least with one another gene:	460	(in positive set)
Sharing 3 neighbors at least with one another gene:	228	(in negative set)


top 4
Sharing 4 neighbors at least with one another gene:	422	(in positive set)
Sharing 4 neighbors at least with one another gene:	175	(in negative set)


top 5
Sharing 5 neighbors at least with one another gene:	406	(in positive set)
Sharing 5 neighbors at least with one another gene:	145	(in negative set)


top 6
Sharing 6 neighbors at least with one another gene:	354	(in positive set)
Sharing 6 neighbors at least with one another gene:	96	(in negative set)


top 7
Sharing 7 neighbors at least with one another gene:	311	(in

#### proliferation

In [6]:
for top_n in range(1,11):

    print('top', top_n)

    top_posi_neighbors, top_nega_neighbors= top_neighbors_split(p_proliferation_neighbor, n_proliferation_neighbor)
    A_positive, A_negative = weighted_Adjacency(top_posi_neighbors[top_n-1],top_nega_neighbors[top_n-1])

    idx_row, idx_col = np.where(A_positive==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in positive set)'))

    idx_row, idx_col = np.where(A_negative==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in negative set)'))

    print('\n')

top 1
Sharing 1 neighbors at least with one another gene:	119	(in positive set)
Sharing 1 neighbors at least with one another gene:	101	(in negative set)


top 2
Sharing 2 neighbors at least with one another gene:	92	(in positive set)
Sharing 2 neighbors at least with one another gene:	76	(in negative set)


top 3
Sharing 3 neighbors at least with one another gene:	72	(in positive set)
Sharing 3 neighbors at least with one another gene:	47	(in negative set)


top 4
Sharing 4 neighbors at least with one another gene:	56	(in positive set)
Sharing 4 neighbors at least with one another gene:	37	(in negative set)


top 5
Sharing 5 neighbors at least with one another gene:	56	(in positive set)
Sharing 5 neighbors at least with one another gene:	32	(in negative set)


top 6
Sharing 6 neighbors at least with one another gene:	32	(in positive set)
Sharing 6 neighbors at least with one another gene:	19	(in negative set)


top 7
Sharing 7 neighbors at least with one another gene:	19	(in positive 

#### Rhythm

In [7]:
for top_n in range(1,11):

    print('top', top_n)

    top_posi_neighbors, top_nega_neighbors= top_neighbors_split(p_rhythm_neighbor, n_rhythm_neighbor)
    A_positive, A_negative = weighted_Adjacency(top_posi_neighbors[top_n-1],top_nega_neighbors[top_n-1])

    idx_row, idx_col = np.where(A_positive==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in positive set)'))

    idx_row, idx_col = np.where(A_negative==top_n)
    idx_row = idx_row.tolist()
    print('Sharing %d neighbors at least with one another gene:\t%d\t%s' % (top_n,len(list(set(idx_row))),'(in negative set)'))

    print('\n')

top 1
Sharing 1 neighbors at least with one another gene:	172	(in positive set)
Sharing 1 neighbors at least with one another gene:	142	(in negative set)


top 2
Sharing 2 neighbors at least with one another gene:	145	(in positive set)
Sharing 2 neighbors at least with one another gene:	102	(in negative set)


top 3
Sharing 3 neighbors at least with one another gene:	117	(in positive set)
Sharing 3 neighbors at least with one another gene:	58	(in negative set)


top 4
Sharing 4 neighbors at least with one another gene:	97	(in positive set)
Sharing 4 neighbors at least with one another gene:	44	(in negative set)


top 5
Sharing 5 neighbors at least with one another gene:	95	(in positive set)
Sharing 5 neighbors at least with one another gene:	35	(in negative set)


top 6
Sharing 6 neighbors at least with one another gene:	85	(in positive set)
Sharing 6 neighbors at least with one another gene:	20	(in negative set)


top 7
Sharing 7 neighbors at least with one another gene:	63	(in positi