In [1]:
import networkx as nx
import numpy as np
import os
import random as rand
from collections import defaultdict

In [2]:
def get_r_dictionaries(file,mapping=True):
    with open(file, "r") as in_file:
        dict_map={}
        for line in in_file:
            line.strip()
            if mapping:
                dict_map[line.split()[1]]=line.split()[0]
            else:
                dict_map[line.split()[0]]=set(line.split()[1:])
    return dict_map

In [3]:
def get_cyclic_net(filename):
    G=nx.read_edgelist(filename, comments='#', delimiter="\t", nodetype =str,  data=(('mode',str),), create_using=nx.DiGraph())
    G.remove_nodes_from(["Source", "Target"])
    selfloops=G.selfloop_edges()
    G.remove_edges_from(G.selfloop_edges())

    while 0 in [d[1] for d in G.in_degree()] or 0 in [d[1] for d in G.out_degree()]:
        nodes_to_remove=[node for node in G if G.in_degree(node) == 0 or G.out_degree(node) == 0]
        G.remove_nodes_from(nodes_to_remove)
        
        
    selfloops_in_reservoir=[edge for edge in selfloops if edge[0] in G.nodes()]
    G.add_edges_from(selfloops_in_reservoir)

    return G

In [4]:
def build_adj_weighted_matrix(filename,mapping):
        #NETWORK v2.0
    net=get_cyclic_net(filename)
    nx.relabel_nodes(net,mapping,copy=False)
    dict_pos=dict((node,pos) for (pos,node) in enumerate(net.nodes()))
    for edge in net.edges(data="mode", default=0):
        source,target,mode=edge
        if mode== "+":
            net[source][target]["weight"]= abs(rand.gauss(0,1))
        elif mode== "-":
            net[source][target]["weight"]= rand.gauss(0,1)*-1
        elif mode== 0:
            net[source][target]["weight"]= rand.gauss(0,1)
        
    return nx.to_numpy_matrix(net),dict_pos

In [4]:
def write_nodes_file(out_file,filename,net):
    with open("%s_%s"%(out_file,filename),"w") as out:
        for gene in net.nodes():
            if "hsa" in gene:
                gene=gene[4:].lower()
            out.write(gene +"\n")

In [5]:
file="Dataset1/network_edge_list_ENCODE.csv"
net=get_cyclic_net("Dataset1/network_edge_list_ENCODE.csv")
G=nx.read_edgelist(file, comments='#', delimiter="\t", nodetype =str,  data=(('mode',str),), create_using=nx.DiGraph())

In [14]:
readout_direct_targets=defaultdict(set)

for source, target in G.edges():
    if source in net.nodes() and target not in net.nodes():
        readout_direct_targets[source].add(target)

for key,target in readout_direct_targets.items():
    print(key)
    #print(target)

SP1
SP2
GATA1
BRCA1
HNF4A
E2F6
hsa-miR-130b
MEF2C
hsa-miR-19a
CTCFL
POU2F2
hsa-miR-595
hsa-miR-20a
hsa-miR-18a
hsa-miR-638
hsa-miR-17
ZNF143
hsa-miR-92a
HEY1
SREBF2
hsa-miR-663
SIN3A
FOSL2
BCL11A
NR2C2
SIX5
MAX
MXI1
PPARGC1A
NFYB
EGR1
CEBPB
RFX5
SPI1
BCLAF1
TCF12
PRDM1
HDAC2
hsa-miR-194
RXRA
NRF1
TAL1
TCF4
hsa-miR-191
ESR1
hsa-miR-192
CCNT2
CTCF
PBX3
hsa-miR-142-3p
POLR3A
hsa-miR-142-5p
ZBTB33
IRF1
EP300
ETS1
NR3C1
ZBTB7A
hsa-miR-330-3p
E2F1
SRF
IRF3
FOS
FOSL1
JUNB
SMARCC1
EBF1
NFYA
CHD2
TBP
GABPA
hsa-miR-1260
hsa-miR-1
CTBP2
JUND
SMARCA4
IRF4
HNF4G
FOXA1
NFE2
RDBP
STAT2
ELK4
hsa-miR-10b
BDP1
USF2
SUZ12
BRF2
hsa-miR-650
hsa-miR-24
hsa-miR-25
hsa-miR-22
hsa-miR-199b-5p
USF1
E2F4
ZEB1
MEF2A
BCL3
hsa-miR-339-5p
hsa-miR-365
hsa-miR-193b
hsa-miR-567
BHLHE40
TFAP2C
BATF
hsa-miR-564
GATA2
TRIM28
NFKB1
hsa-miR-335
POU5F1
SIRT6
hsa-miR-1226
hsa-miR-1228
TFAP2A
hsa-miR-661
GTF2B
MYC
hsa-miR-615-5p
PAX5
JUN
FOXA2
MAFK
hsa-miR-367
hsa-miR-1208
YY1
ATF3
hsa-miR-1207-3p
hsa-miR-1205
hsa-miR-548c-3p


AtlasView({'IRF3': {}, 'hsa-miR-142-5p': {}, 'hsa-miR-130b': {}, 'BRCA1': {}, 'EGR1': {}, 'FOS': {}, 'FOSL1': {}, 'FOSL2': {}, 'JUNB': {}, 'hsa-miR-142-3p': {}, 'HEY1': {}, 'hsa-miR-663': {}})

In [10]:
len(set.union(*readout_direct_targets.values()))

13397

In [None]:
filename=file[file.index("list")+5:file.index(".csv")]

In [None]:
write_nodes_file("all_gene",filename,G)

In [None]:
len(net.nodes())

In [None]:
## Get R dictionaries
## GO term -> set ids
GO_id_map=get_r_dictionaries("test.txt",mapping=False)
#print(GO_id_map)

## id to ENTREZ id
edgeid_ezid_map=get_r_dictionaries("mapping_id_to_entrez.txt")
print(edgeid_ezid_map)

In [None]:
for key,values in GO_id_map.items():
    GO_id_map[key]=set([mapping_relabel[value] for value in values])
GO_id_map

In [None]:
## Primero: Hay que cambiarle el id del res al id de ENTREZ
## hay que hacer un mapping dictionary con la info que tenemos de los nodos de GO.term
##Como no todos están anotados es mejor crear un diccionario 
mapping_relabel = edgeid_ezid_map
for node in net.nodes():
    if node not in edgeid_ezid_map.values():
        mapping_relabel[node]=node
mapping_relabel

In [None]:
## Win test
res_size=207
in_size=1
i_scaling=1

In [None]:
matrix,dict_pos=build_adj_weighted_matrix(file,mapping_relabel)

In [None]:
Win=np.zeros((res_size,1+in_size))*i_scaling
Win[1,]

In [None]:
#print(GO_id_map["GO:0030220"])
for gene in GO_id_map["GO:0030220"]:
    print(dict_pos[gene])
    Win[dict_pos[gene],]=2
print(np.where(Win==2))

In [None]:
def input_matrix_just_genes_GOterm(Win,GOterm,GO_id_map):
    for gene in GO_id_map[GOterm]:
        Win[dict_pos[gene],]=np.random.uniform(0,1)
    return Win

In [None]:
Win=np.zeros((res_size,1+in_size))*i_scaling
input_matrix_just_genes_GOterm(Win,"GO:0030220",GO_id_map)