In [1]:
import plotly.graph_objects as go
import networkx as nx
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
%matplotlib widget
import scipy
from tqdm import tqdm
import mat73
import pandas as pd 

In [2]:
MMRF_data = scipy.io.loadmat('data_may4_2022.mat')
gene_list = MMRF_data['gene_list']

In [3]:
gene_list = [x.strip(' ') for x in gene_list]

In [4]:
ADJ = MMRF_data['ADJ']

In [5]:
missing_subject_list = ['MMRF_2903','MMRF_2905','MMRF_2908','MMRF_2914','MMRF_2926',\
                        'MMRF_2938', 'MMRF_2939', 'MMRF_2941', 'MMRF_2946', 'MMRF_2947']

patient_list = pd.read_csv('subject_list_669.csv')
patient_list = patient_list['0'].values
patient_list = patient_list[1:]
number_of_patients = len(patient_list)

patients_mask = np.ones(number_of_patients,)
for n, patient_id in enumerate(patient_list): 
    if patient_id[0:9] in missing_subject_list: 
        patients_mask[n] = 0

patients_mask = patients_mask > 0

In [6]:
np.sum(ADJ) 

33695.0

In [7]:
G = nx.from_numpy_matrix(ADJ)

In [8]:
# node_of_interest = 222
# [n for n in G.neighbors(node_of_interest)]

In [9]:
## Load ORC edge values

In [10]:
N_subjects = 669
Ng = 8427
edgelist = np.argwhere(ADJ==1)

# RNA 
data_dict = mat73.loadmat("overall_curvature_may4_rna.mat")
overall_curvature = data_dict['overall_curvature']

In [11]:
rna_clustering_results = scipy.io.loadmat('rna_clustering_results.mat')
rna_labels = rna_clustering_results['labels2'][0]

In [12]:
lowrisk_mask = rna_labels==2
highrisk_mask = rna_labels==1

In [13]:
np.sum(highrisk_mask)

106

In [14]:
overall_curvature.shape

(33695, 669)

In [15]:
overall_curvature = overall_curvature[:, patients_mask.astype('bool')]

In [16]:
high_risk_curvature = overall_curvature[:, highrisk_mask.astype('bool')]
high_risk_curvature = np.mean(high_risk_curvature, 1)

In [17]:
low_risk_curvature = overall_curvature[:, lowrisk_mask.astype('bool')]
low_risk_curvature = np.mean(low_risk_curvature, 1)

In [18]:
highrisk_matrix = np.zeros((Ng, Ng))
for n in range(0, len(edgelist)): 
    highrisk_matrix[edgelist[n][0], edgelist[n][1]] = high_risk_curvature[n]
    highrisk_matrix[edgelist[n][1], edgelist[n][0]] = high_risk_curvature[n]

In [19]:
lowrisk_matrix = np.zeros((Ng, Ng))
for n in range(0, len(edgelist)): 
    lowrisk_matrix[edgelist[n][0], edgelist[n][1]] = low_risk_curvature[n]
    lowrisk_matrix[edgelist[n][1], edgelist[n][0]] = low_risk_curvature[n]

In [20]:
difference_matrix = highrisk_matrix - lowrisk_matrix

In [21]:
# difference_matrix = lowrisk_matrix

In [22]:
matplotlib.use('Agg')

In [23]:
gene8 = ['BUB1', 'MCM6', 'NOSTRIN', 'PAM', 'RNF115', 'SNCAIP', 'SPRR2A', 'WEE1']

In [24]:
gene_list = np.array(gene_list) 

In [25]:
positionlist8 = [] 
for n in gene8: 
    position = np.argwhere(gene_list==n)[0][0]
    positionlist8.append(np.argwhere(gene_list==n)[0][0])    

In [26]:
positionlist8

[777, 4236, 4828, 5091, 6251, 6941, 7074, 8152]

In [None]:
G2.edges[(5691, 2835)]

In [41]:
# for node_of_interest in tqdm(range(0, 8427)):

node_of_interest = 8152

listofnodes = [node_of_interest]

for n in G.neighbors(node_of_interest): 
    listofnodes.append(n)
    print(n)
    # for nn in G.neighbors(n): 
    #     listofnodes.append(nn)
    #     print("    ", nn)

listofnodes = np.unique(listofnodes) 

listofedges = [] 
# for edge in G.edges([node_of_interest]): 
#     listofedges.append(edge)


for n in G.neighbors(node_of_interest): 
    # print(gene_list[n])
    for edge in G.edges([n]): 
        listofedges.append(edge)


G2 = nx.from_edgelist(listofedges)
pos = nx.spring_layout(G2)
gene_labels = [] 
for node in listofnodes: 
    gene_labels.append(gene_list[node])

gene_label_dict = dict(zip(listofnodes, gene_labels))

for edge in listofedges: 
    G2[edge[0]][edge[1]]['weight'] = difference_matrix[edge]
    if difference_matrix[edge] > 0: 
        edgecolor = 'tab:blue'
    elif difference_matrix[edge] < 0: 
        edgecolor = 'tab:orange'
    else: 
        edgecolor = 'k'
    G2[edge[0]][edge[1]]['color'] = edgecolor

edges = G2.edges()
colors = [G2[u][v]['color'] for u,v in edges]
weights = [G2[u][v]['weight'] for u,v in edges]

gene_edge_dict = dict(zip(edges, weights))

# plt.figure()
# fig, ax = plt.subplots(figsize=(16, 16))
# nx.draw(G2, pos, with_labels=False, node_size=4, node_color = 'black', edge_color = colors, \
#         font_color='white', labels=gene_label_dict, alpha=0.5)
# nx.draw_networkx_labels(G2, pos, labels=gene_label_dict, font_size=22,\
#                         horizontalalignment='left', verticalalignment='bottom')
# # nx.draw_networkx_edge_labels(G2, pos, edge_labels=gene_edge_dict, rotate=True)

# plt.title(gene_list[node_of_interest], color='k', fontsize=30)
# plt.tight_layout()

# plt.savefig('RNA-subnetworks-2hop/'+gene_list[node_of_interest] + '.jpg', dpi=200)


# plt.close(fig)
# break

748
756
757
776
940
1172
1195
1205
1619
1665
2508
5451
6772
6886
6924
8226
8228
8231


In [42]:
listofnodes

array([ 748,  756,  757,  776,  940, 1172, 1195, 1205, 1619, 1665, 2508,
       5451, 6772, 6886, 6924, 8152, 8226, 8228, 8231])

In [43]:
len(listofnodes)

19

In [44]:
df = pd.DataFrame(gene_list[listofnodes])

In [45]:
df.to_csv('WEE1_1hop_19genes.csv')

In [82]:
edge

(7967, 7196)

In [99]:
G2.edges[edge]

{'weight': -0.06267233519628679, 'color': 'tab:orange'}

In [105]:
fig, ax = plt.subplots(figsize=(16, 16))
nx.draw(G2, pos, with_labels=False, node_size=4, node_color = 'black',  edge_color = colors, \
        font_color='white', labels=gene_label_dict, alpha=0.5)
nx.draw_networkx_labels(G2, pos, labels=gene_label_dict, font_size=22,\
                        horizontalalignment='left', verticalalignment='bottom')
# nx.draw_networkx_edge_labels(G2, pos, edge_labels=gene_edge_dict, rotate=True)


plt.title(gene_list[node_of_interest], color='k', fontsize=30)
plt.tight_layout()

plt.savefig('RNA-subnetworks-2hop/'+gene_list[node_of_interest] + '.jpg', dpi=200)

In [104]:
edgecolors == colors

False

In [31]:
# colors

In [93]:
G2.get_edge_data(5691, 5091)

{'weight': 0.2967884553315585, 'color': 'tab:orange'}

In [94]:
difference_matrix[edgepair]

-0.06267233519628679

In [47]:
node_of_interest

5091

In [52]:
G2

<networkx.classes.graph.Graph at 0x3c0751970>

In [32]:
# gene_edge_dict