In [1]:
import networkx as nx
import numpy as np
import os
import random as rand
from collections import defaultdict

In [2]:
def get_r_dictionaries(file,mapping=True):
    with open(file, "r") as in_file:
        dict_map={}
        for line in in_file:
            line.strip()
            if mapping:
                dict_map[line.split()[1]]=line.split()[0]
            else:
                dict_map[line.split()[0]]=set(line.split()[1:])
    return dict_map

In [3]:
def get_cyclic_net(filename):
    G=nx.read_edgelist(filename, comments='#', delimiter="\t", nodetype =str,  data=(('mode',str),), create_using=nx.DiGraph())
    G.remove_nodes_from(["Source", "Target"])
    selfloops=G.selfloop_edges()
    G.remove_edges_from(G.selfloop_edges())

    while 0 in [d[1] for d in G.in_degree()] or 0 in [d[1] for d in G.out_degree()]:
        nodes_to_remove=[node for node in G if G.in_degree(node) == 0 or G.out_degree(node) == 0]
        G.remove_nodes_from(nodes_to_remove)
        
        
    selfloops_in_reservoir=[edge for edge in selfloops if edge[0] in G.nodes()]
    G.add_edges_from(selfloops_in_reservoir)

    return G

In [4]:
def build_adj_weighted_matrix(filename,mapping):
        #NETWORK v2.0
    net=get_cyclic_net(filename)
    nx.relabel_nodes(net,mapping,copy=False)
    dict_pos=dict((node,pos) for (pos,node) in enumerate(net.nodes()))
    for edge in net.edges(data="mode", default=0):
        source,target,mode=edge
        if mode== "+":
            net[source][target]["weight"]= abs(rand.gauss(0,1))
        elif mode== "-":
            net[source][target]["weight"]= rand.gauss(0,1)*-1
        elif mode== 0:
            net[source][target]["weight"]= rand.gauss(0,1)
        
    return nx.to_numpy_matrix(net),dict_pos

In [4]:
def write_nodes_file(out_file,filename,net):
    with open("%s_%s"%(out_file,filename),"w") as out:
        for gene in net.nodes():
            if "hsa" in gene:
                gene=gene[4:].lower()
            out.write(gene +"\n")

In [5]:
file="Dataset1/network_edge_list_ENCODE.csv"
net=get_cyclic_net("Dataset1/network_edge_list_ENCODE.csv")
G=nx.read_edgelist(file, comments='#', delimiter="\t", nodetype =str,  data=(('mode',str),), create_using=nx.DiGraph())

In [16]:
readout_direct_targets=defaultdict(set)

for source, target in G.edges():
    if source in net.nodes() and target not in net.nodes():
        readout_direct_targets[source].add(target)

readout_direct_targets["SRF"]
    #print(target)

{'AC011472.1',
 'AC110814.1',
 'ACSM3',
 'ACTB',
 'ACTG1',
 'ADH6',
 'AFAP1L2',
 'AFTPH',
 'AIF1L',
 'AIM1',
 'AIM2',
 'ANAPC2',
 'ANKRD30BL',
 'ANXA8',
 'ANXA8L2',
 'AP4B1',
 'APP',
 'ARHGAP1',
 'ARPC1A',
 'ARSE',
 'AURKA',
 'B4GALT1',
 'BATF2',
 'BCAS1',
 'BCAS4',
 'BCL10',
 'BCL2L12',
 'BCR',
 'BZRAP1',
 'C12orf26',
 'C13orf18',
 'C14orf1',
 'C14orf43',
 'C16orf54',
 'C17orf101',
 'C18orf45',
 'C1orf116',
 'C3orf14',
 'C4orf19',
 'C5orf51',
 'C5orf56',
 'C6orf105',
 'C7orf64',
 'C8orf77',
 'CA12',
 'CA5A',
 'CACNG1',
 'CAP1',
 'CAPG',
 'CAPN13',
 'CAPS',
 'CAPZB',
 'CASP14',
 'CCDC116',
 'CCDC12',
 'CCDC59',
 'CCNB1IP1',
 'CCT8',
 'CD19',
 'CD37',
 'CD44',
 'CD48',
 'CD7',
 'CD79A',
 'CDC45',
 'CDK11A',
 'CDK11B',
 'CDKN1A',
 'CEACAM1',
 'CEACAM21',
 'CEACAM5',
 'CEACAM6',
 'CFL1',
 'CLIC1',
 'CLIP3',
 'CLNK',
 'CNFN',
 'CNN2',
 'COG3',
 'COL9A3',
 'COPS4',
 'CORO1A',
 'COX16',
 'COX19',
 'CPNE5',
 'CPPED1',
 'CPSF4',
 'CRKL',
 'CRTAM',
 'CRYZL1',
 'CSF2',
 'CSNK2B',
 'CSTF1',
 'CWC

In [22]:
net["SRF"]

AtlasView({'IRF3': {}, 'hsa-miR-142-5p': {}, 'hsa-miR-130b': {}, 'BRCA1': {}, 'EGR1': {}, 'FOS': {}, 'FOSL1': {}, 'FOSL2': {}, 'JUNB': {}, 'hsa-miR-142-3p': {}, 'HEY1': {}, 'hsa-miR-663': {}})

AtlasView({'IRF3': {}, 'hsa-miR-142-5p': {}, 'hsa-miR-130b': {}, 'BRCA1': {}, 'EGR1': {}, 'FOS': {}, 'FOSL1': {}, 'FOSL2': {}, 'JUNB': {}, 'hsa-miR-142-3p': {}, 'HEY1': {}, 'hsa-miR-663': {}})

In [10]:
len(set.union(*readout_direct_targets.values()))

13397

In [None]:
filename=file[file.index("list")+5:file.index(".csv")]

In [None]:
write_nodes_file("all_gene",filename,G)

In [None]:
len(net.nodes())

In [None]:
## Get R dictionaries
## GO term -> set ids
GO_id_map=get_r_dictionaries("test.txt",mapping=False)
#print(GO_id_map)

## id to ENTREZ id
edgeid_ezid_map=get_r_dictionaries("mapping_id_to_entrez.txt")
print(edgeid_ezid_map)

In [None]:
for key,values in GO_id_map.items():
    GO_id_map[key]=set([mapping_relabel[value] for value in values])
GO_id_map

In [None]:
## Primero: Hay que cambiarle el id del res al id de ENTREZ
## hay que hacer un mapping dictionary con la info que tenemos de los nodos de GO.term
##Como no todos están anotados es mejor crear un diccionario 
mapping_relabel = edgeid_ezid_map
for node in net.nodes():
    if node not in edgeid_ezid_map.values():
        mapping_relabel[node]=node
mapping_relabel

In [None]:
## Win test
res_size=207
in_size=1
i_scaling=1

In [None]:
matrix,dict_pos=build_adj_weighted_matrix(file,mapping_relabel)

In [None]:
Win=np.zeros((res_size,1+in_size))*i_scaling
Win[1,]

In [None]:
#print(GO_id_map["GO:0030220"])
for gene in GO_id_map["GO:0030220"]:
    print(dict_pos[gene])
    Win[dict_pos[gene],]=2
print(np.where(Win==2))

In [None]:
def input_matrix_just_genes_GOterm(Win,GOterm,GO_id_map):
    for gene in GO_id_map[GOterm]:
        Win[dict_pos[gene],]=np.random.uniform(0,1)
    return Win

In [None]:
Win=np.zeros((res_size,1+in_size))*i_scaling
input_matrix_just_genes_GOterm(Win,"GO:0030220",GO_id_map)