In [1]:
import networkx as nx
import numpy as np
import os
import random as rand
from collections import defaultdict
import matplotlib.pyplot as plt

In [2]:
def get_r_dictionaries(file,mapping=True):
    with open(file, "r") as in_file:
        dict_map={}
        for line in in_file:
            line.strip()
            if mapping:
                dict_map[line.split()[1]]=line.split()[0]
            else:
                dict_map[line.split()[0]]=set(line.split()[1:])
    return dict_map

In [3]:
def get_cyclic_net(filename):
    G=nx.read_edgelist(filename, comments='#', delimiter="\t", nodetype =str,  data=(('mode',str),), create_using=nx.DiGraph())
    G.remove_nodes_from(["Source", "Target"])
    selfloops=G.selfloop_edges()
    G.remove_edges_from(G.selfloop_edges())

    while 0 in [d[1] for d in G.in_degree()] or 0 in [d[1] for d in G.out_degree()]:
        nodes_to_remove=[node for node in G if G.in_degree(node) == 0 or G.out_degree(node) == 0]
        G.remove_nodes_from(nodes_to_remove)
        
        
    selfloops_in_reservoir=[edge for edge in selfloops if edge[0] in G.nodes()]
    G.add_edges_from(selfloops_in_reservoir)

    return G

In [4]:
def build_adj_weighted_matrix(filename,mapping):
        #NETWORK v2.0
    net=get_cyclic_net(filename)
    nx.relabel_nodes(net,mapping,copy=False)
    dict_pos=dict((node,pos) for (pos,node) in enumerate(net.nodes()))
    for edge in net.edges(data="mode", default=0):
        source,target,mode=edge
        if mode== "+":
            net[source][target]["weight"]= abs(rand.gauss(0,1))
        elif mode== "-":
            net[source][target]["weight"]= rand.gauss(0,1)*-1
        elif mode== 0:
            net[source][target]["weight"]= rand.gauss(0,1)
        
    return nx.to_numpy_matrix(net),dict_pos

In [5]:
def write_nodes_file(out_file,filename,net):
    with open("%s_%s"%(out_file,filename),"w") as out:
        for gene in net.nodes():
            if "hsa" in gene:
                gene=gene[4:].lower()
            out.write(gene +"\n")

In [6]:
file="Dataset1/network_edge_list_ENCODE.csv"
net=get_cyclic_net("Dataset1/network_edge_list_ENCODE.csv")
G=nx.read_edgelist(file, comments='#', delimiter="\t", nodetype =str,  data=(('mode',str),), create_using=nx.DiGraph())

In [9]:
G.in_edges("TMEM14C")

InEdgeDataView([('SP1', 'TMEM14C'), ('NR3C1', 'TMEM14C'), ('SREBF1', 'TMEM14C'), ('MAFK', 'TMEM14C'), ('MAFF', 'TMEM14C')])

In [7]:
readout_to_cyclic=['ZNF775',
 'TMEM14C',
 'C20orf111',
 'STYXL1',
 'BRD9',
 'ABCA3',
 'ALDH1A1',
 'ACAD9']

In [8]:
input_nodes=[(go,readout) for go,set_readout in GO_id_map.items() for readout in set_readout ]

NameError: name 'GO_id_map' is not defined

In [9]:
edges_readout=[edge for edge in G.edges() if edge[1] in readout_to_cyclic and edge[0] in net.nodes()]

In [10]:
edges_readout

[('SP1', 'TMEM14C'),
 ('BRCA1', 'STYXL1'),
 ('E2F6', 'ABCA3'),
 ('E2F6', 'ACAD9'),
 ('CTCFL', 'ABCA3'),
 ('SIN3A', 'BRD9'),
 ('SIN3A', 'C20orf111'),
 ('MAX', 'ABCA3'),
 ('MAX', 'ACAD9'),
 ('MAX', 'ALDH1A1'),
 ('NFYB', 'ZNF775'),
 ('NRF1', 'ABCA3'),
 ('NRF1', 'C20orf111'),
 ('NRF1', 'ACAD9'),
 ('TCF4', 'ZNF775'),
 ('CTCF', 'ALDH1A1'),
 ('CTCF', 'C20orf111'),
 ('CTCF', 'ABCA3'),
 ('ZBTB33', 'STYXL1'),
 ('ETS1', 'STYXL1'),
 ('NR3C1', 'TMEM14C'),
 ('E2F1', 'BRD9'),
 ('FOS', 'ZNF775'),
 ('NFYA', 'ZNF775'),
 ('GABPA', 'BRD9'),
 ('GABPA', 'STYXL1'),
 ('JUND', 'C20orf111'),
 ('JUND', 'ACAD9'),
 ('USF1', 'ALDH1A1'),
 ('E2F4', 'BRD9'),
 ('MYC', 'BRD9'),
 ('JUN', 'ALDH1A1'),
 ('MAFK', 'TMEM14C'),
 ('ATF3', 'ACAD9'),
 ('RAD21', 'ALDH1A1'),
 ('RAD21', 'C20orf111'),
 ('MAFF', 'TMEM14C'),
 ('SREBF1', 'ZNF775'),
 ('SREBF1', 'TMEM14C'),
 ('ELF1', 'STYXL1')]

In [11]:
net.add_edges_from(edges_readout)
net.add_edges_from(input_nodes)
nx.write_gexf(net, "test.gexf")

NameError: name 'input_nodes' is not defined

In [12]:
G.add_edges_from(input_nodes)
nx.write_gexf(G, "full.gexf")

NameError: name 'input_nodes' is not defined

In [13]:
readout_direct_targets=defaultdict(set)

for source, target in G.edges():
    if source in net.nodes() and target not in net.nodes():
        readout_direct_targets[source].add(target)

readout_direct_targets["MAX"]
    #print(target)

{'AATF',
 'ABCA17P',
 'ABCB4',
 'ABCF1',
 'ABCG1',
 'AC011472.1',
 'AC013553.1',
 'AC044799.1',
 'AC136618.1',
 'ACAP1',
 'ACTC1',
 'ADAM19',
 'ADAMTS16',
 'ADH6',
 'ADM',
 'ADORA1',
 'AFMID',
 'AFP',
 'AGFG2',
 'AGT',
 'AIMP2',
 'AIP',
 'AK2',
 'AKR1A1',
 'AKR1C1',
 'AKR1C3',
 'AKT1S1',
 'ALB',
 'ALDOC',
 'ALG2',
 'ALG3',
 'ALOX15B',
 'AMDHD2',
 'AMN',
 'AMPD3',
 'ANAPC7',
 'ANG',
 'ANK1',
 'ANKRD12',
 'ANKRD30BL',
 'ANO3',
 'ANP32B',
 'AOC3',
 'AP1G2',
 'AP1S1',
 'AP2M1',
 'AP4M1',
 'APBB3',
 'APEX1',
 'APOBR',
 'APOC1',
 'APOC2',
 'APOE',
 'APOH',
 'APRT',
 'ARHGAP24',
 'ARSK',
 'ASGR1',
 'ASGR2',
 'ASPSCR1',
 'ATL3',
 'ATP13A4',
 'ATP6V0B',
 'ATP6V0D1',
 'ATP8B1',
 'ATXN3',
 'BAT3',
 'BAX',
 'BCAS3',
 'BCKDHA',
 'BCL2',
 'BCR',
 'BIN1',
 'BLOC1S3',
 'BMP4',
 'BOLA1',
 'BPIL1',
 'BST2',
 'BUB3',
 'C10orf116',
 'C10orf18',
 'C10orf46',
 'C10orf76',
 'C10orf88',
 'C11orf59',
 'C12orf24',
 'C12orf49',
 'C13orf23',
 'C16orf5',
 'C16orf54',
 'C16orf73',
 'C17orf86',
 'C17orf87',
 'C18orf

In [14]:
len(set(net["SRF"]))

12

In [15]:
len(net.out_edges("SRF"))

12

In [16]:
len(set.union(*readout_direct_targets.values()))

13389

In [17]:
filename=file[file.index("list")+5:file.index(".csv")]

In [18]:
write_nodes_file("all_gene",filename,G)

In [19]:
len(net.nodes())

215

In [36]:
## Get R dictionaries
## GO term -> set ids
GO_id_map=get_r_dictionaries("test.txt",mapping=False)
#print(GO_id_map)

## id to ENTREZ id
edgeid_ezid_map=get_r_dictionaries("mapping_id_to_entrez.txt")
print(edgeid_ezid_map)

{'5669': 'SP1', '6667': 'SP1', '199699': 'SP1', '6668': 'SP2', '2623': 'GATA1', '672': 'BRCA1', '3172': 'HNF4A', '1876': 'E2F6', '406920': 'hsa-miR-130b', '4208': 'MEF2C', '140690': 'CTCFL', '5452': 'POU2F2', '406982': 'hsa-miR-20a', '406953': 'hsa-miR-18a', '7702': 'ZNF143', '23462': 'HEY1', '6721': 'SREBF2', '25942': 'SIN3A', '2355': 'FOSL2', '53335': 'BCL11A', '7182': 'NR2C2', '147912': 'SIX5', '4149': 'MAX', '4601': 'MXI1', '10891': 'PPARGC1A', '4801': 'NFYB', '1958': 'EGR1', '1051': 'CEBPB', '5993': 'RFX5', '6688': 'SPI1', '9774': 'BCLAF1', '6938': 'TCF12', '639': 'PRDM1', '3066': 'HDAC2', '6256': 'RXRA', '4779': 'NRF1', '4899': 'NRF1', '6886': 'TAL1', '6925': 'TCF4', '6934': 'TCF4', '2099': 'ESR1', '905': 'CCNT2', '10664': 'CTCF', '5090': 'PBX3', '11128': 'POLR3A', '10009': 'ZBTB33', '3659': 'IRF1', '2033': 'EP300', '2113': 'ETS1', '2908': 'NR3C1', '51341': 'ZBTB7A', '1869': 'E2F1', '6722': 'SRF', '3661': 'IRF3', '2353': 'FOS', '8061': 'FOSL1', '3726': 'JUNB', '10535': 'JUNB', '6

In [37]:
## Primero: Hay que cambiarle el id del res al id de ENTREZ
## hay que hacer un mapping dictionary con la info que tenemos de los nodos de GO.term
##Como no todos están anotados es mejor crear un diccionario 
mapping_relabel = edgeid_ezid_map
for node in net.nodes():
    if node not in edgeid_ezid_map.values():
        mapping_relabel[node]=node
mapping_relabel

{'10009': 'ZBTB33',
 '100126319': 'hsa-miR-216b',
 '100126340': 'hsa-miR-944',
 '100126348': 'hsa-miR-760',
 '100302143': 'hsa-miR-1248',
 '100302201': 'hsa-miR-1228',
 '100302232': 'hsa-miR-1226',
 '10127': 'ZNF263',
 '10155': 'TRIM28',
 '1051': 'CEBPB',
 '10535': 'JUNB',
 '10538': 'BATF',
 '10664': 'CTCF',
 '10891': 'PPARGC1A',
 '1106': 'CHD2',
 '11128': 'POLR3A',
 '140690': 'CTCFL',
 '147912': 'SIX5',
 '1488': 'CTBP2',
 '1826': 'CHD2',
 '1869': 'E2F1',
 '1874': 'E2F4',
 '1876': 'E2F6',
 '1879': 'EBF1',
 '1958': 'EGR1',
 '199699': 'SP1',
 '1997': 'ELF1',
 '2005': 'ELK4',
 '2033': 'EP300',
 '2099': 'ESR1',
 '2113': 'ETS1',
 '23462': 'HEY1',
 '23512': 'SUZ12',
 '2353': 'FOS',
 '2355': 'FOSL2',
 '23764': 'MAFF',
 '2551': 'GABPA',
 '25942': 'SIN3A',
 '2623': 'GATA1',
 '2624': 'GATA2',
 '26469': 'BDP1',
 '2908': 'NR3C1',
 '2959': 'GTF2B',
 '3066': 'HDAC2',
 '3169': 'FOXA1',
 '3170': 'FOXA2',
 '3172': 'HNF4A',
 '3174': 'HNF4G',
 '3297': 'HSF1',
 '3659': 'IRF1',
 '3661': 'IRF3',
 '3662': 'I

In [38]:
for key,values in GO_id_map.items():
    GO_id_map[key]=set([mapping_relabel[value] for value in values])
GO_id_map

{'GO:0006337': {'NFE2', 'SMARCA4', 'SMARCB1', 'SMARCC1', 'SMARCC2'},
 'GO:0030220': {'EP300', 'GATA1', 'MEF2C', 'SRF', 'TAL1'},
 'GO:0043923': {'EP300', 'JUN', 'SMARCA4', 'SMARCB1', 'SP1'},
 'GO:1901984': {'BRCA1', 'HDAC2', 'SIN3A', 'SPI1', 'TAF7'}}

In [26]:
set.union(*GO_id_map.values())

{'BRCA1',
 'E2F1',
 'E2F6',
 'FOS',
 'FOSL1',
 'JUN',
 'JUNB',
 'JUND',
 'MAX',
 'SREBF1',
 'STAT1',
 'SUZ12'}

In [27]:
readout_common=set(gene for gene_res in set.union(*GO_id_map.values()) for gene in set(G[gene_res]) if gene in set.union(*readout_direct_targets.values()) )

In [28]:
readout_common

{'TRAPPC2',
 'LONP1',
 'NCOA4',
 'YDJC',
 'HDAC7',
 'hsa-miR-302b*',
 'SMC1B',
 'CSNK1D',
 'TTC37',
 'FAM158A',
 'CDK1',
 'MAF1',
 'LRRC61',
 'hsa-miR-130b*',
 'METTL7B',
 'RRAS2',
 'ATL3',
 'VSX2',
 'NUP155',
 'NPM1',
 'NOP56',
 'RUNDC2C',
 'SNORD32A',
 'POLR2E',
 'YPEL4',
 'DARS',
 'FAM46B',
 'ST8SIA2',
 'LBX2',
 'HIGD2A',
 'PIGZ',
 'LOX',
 'ADORA1',
 'KIAA1984',
 'GXYLT1',
 'HAND1',
 'QRICH1',
 'C7orf40',
 'RPS12',
 'RND1',
 'AMDHD2',
 'CTGF',
 'RAB11B',
 'SERPINC1',
 'CORO1C',
 'GAS2L3',
 'CLIC2',
 'ORMDL2',
 'NDRG2',
 'LRG1',
 'PCSK5',
 'DCTN5',
 'SDF4',
 'ITGA5',
 'LPAR6',
 'KIAA0195',
 'CHAF1A',
 'hsa-miR-302b',
 'SLC7A5P2',
 'SPRYD4',
 'C2orf18',
 'SDPR',
 'SFT2D2',
 'NR1H2',
 'WNT7B',
 'FBXO18',
 'ETV5',
 'ATP6V0D1',
 'C6orf211',
 'MT1A',
 'EVL',
 'C21orf63',
 'TTR',
 'IKZF3',
 'ALPK1',
 'GCFC1',
 'UBE2S',
 'WDR25',
 'C1QTNF6',
 'ELOVL2',
 'CDC20',
 'C6orf108',
 'TEX14',
 'DMRT3',
 'IRX5',
 'FAHD2B',
 'CNDP2',
 'EPO',
 'ALOX15B',
 'TLR6',
 'ARL14',
 'SPATS2',
 'SIAH2',
 'HIST1

In [29]:
def get_len(item):
    return len(net.in_edges(item))

In [30]:
lista=[(gene,len(G.in_edges(gene))) for gene in readout_common]

In [31]:
len_innodes_gene=defaultdict(set)
for el in lista:
    len_innodes_gene[el[1]].add(el[0])
len_innodes_gene[5]

{'AARS',
 'AATF',
 'ABCA17P',
 'ABCB8',
 'ACTL6A',
 'AIFM2',
 'AIMP2',
 'APTX',
 'ARRDC2',
 'ATP5G1',
 'AVPI1',
 'C10orf88',
 'C11orf59',
 'C13orf29',
 'C14orf119',
 'C16orf74',
 'C19orf40',
 'C19orf51',
 'C1QTNF6',
 'C1orf107',
 'C1orf228',
 'C20orf24',
 'C21orf58',
 'C5orf39',
 'C5orf51',
 'C6orf89',
 'C7orf40',
 'C8orf55',
 'C9orf3',
 'CABC1',
 'CCBL1',
 'CCDC123',
 'CCNT1',
 'CDC5L',
 'CDR2',
 'CENPL',
 'CISD2',
 'CNTLN',
 'COX5A',
 'CPB2',
 'CREB3L4',
 'CSPG4',
 'CYP51A1',
 'DAGLB',
 'DARS2',
 'DDIT3',
 'DDX47',
 'DENND4A',
 'DEPDC4',
 'DLX4',
 'DNA2',
 'DNAJC9',
 'DOCK10',
 'DRAP1',
 'DSCC1',
 'ENOPH1',
 'ERGIC2',
 'EXD1',
 'FLJ31306',
 'FLJ45983',
 'FOXE1',
 'GADD45A',
 'GIMAP4',
 'GLDC',
 'GPATCH3',
 'GPBP1',
 'HELQ',
 'HES4',
 'HGSNAT',
 'HIGD2A',
 'HMX2',
 'HOXA2',
 'HOXD8',
 'HRC',
 'HSF2BP',
 'HSPBAP1',
 'ICAM1',
 'IFI6',
 'IQCD',
 'ISLR2',
 'ITPR3',
 'KDM3A',
 'KEAP1',
 'KIAA0406',
 'KNCN',
 'LBX1',
 'LBX2',
 'LENG1',
 'LHX5',
 'LIMS1',
 'LIN37',
 'LMNB1',
 'LOC729082',
 '

In [32]:
set_common=[]
for GO_term in GO_id_map.keys():
    print()
    print(GO_term)
    for gene_view in GO_id_map[GO_term]:
        print(gene_view,"",set(G[gene_view]) & len_innodes_gene[5])
        set_common.append(set(G[gene_view]) & len_innodes_gene[5])


GO:0070317
BRCA1  {'CCNT1', 'NOL11', 'THADA', 'GPBP1', 'GPATCH3', 'CDC5L', 'SLC25A11', 'SNORD35B', 'NUDT2', 'POLR2E', 'XPC', 'LSMD1', 'MED22', 'C6orf89', 'SLC15A4', 'ERGIC2'}
MAX  {'LONP1', 'PLD6', 'LOXL1', 'RPS8', 'SAE1', 'SCAMP3', 'TASP1', 'SGOL1', 'NOC3L', 'ZNF785', 'C11orf59', 'HES4', 'HRC', 'UBE2S', 'POLE4', 'CCNT1', 'WDR36', 'MCOLN1', 'EXD1', 'ABCA17P', 'POLR2E', 'CABC1', 'HELQ', 'HIGD2A', 'AIMP2', 'WDR4', 'DOCK10', 'WDR89', 'HSPBAP1', 'C7orf40', 'HSF2BP', 'THAP8', 'KDM3A', 'ENOPH1', 'C1orf107', 'GLDC', 'THADA', 'ZFYVE27', 'SNORA76', 'AATF', 'C10orf88', 'MAP1D', 'PNO1'}
E2F1  {'SH2D4A', 'DSCC1', 'SUPT4H1', 'LMNB1', 'AVPI1', 'PQLC2', 'TM7SF3', 'SLC25A24', 'HGSNAT', 'RSAD1', 'HES4', 'CREB3L4', 'CENPL', 'RBBP8', 'TMEM159', 'SKIL', 'DARS2', 'TUBGCP3', 'PAWR', 'OSR2', 'ITPR3', 'POLA1', 'C8orf55', 'TCP11L1', 'TRPM7', 'TMEM201', 'MBOAT1', 'ARRDC2', 'HSF2BP', 'DNAJC9', 'COX5A', 'AIFM2', 'TRIP13', 'RNFT1', 'STRADA', 'C16orf74', 'CCBL1'}
SUZ12  {'SP9', 'hsa-miR-9*', 'SIM1', 'HOXA2', 'DLX4

In [37]:
i=range(0,len(set_common))
for index,el in enumerate(set_common):
    print(index)
    for number in i:
        if number != index:
            print(el & set_common[number])

0
set()
{'CCNT1', 'POLR2E', 'THADA'}
set()
set()
set()
{'CDC5L', 'THADA'}
set()
set()
set()
set()
set()
1
set()
{'ZNF785', 'ENOPH1', 'ABCA17P', 'CABC1', 'ACAD9', 'HRC', 'ABCA3', 'HSF2BP', 'AATF', 'AIMP2', 'C10orf88', 'EXD1', 'HSPBAP1', 'POLE4', 'HES4', 'WDR36'}
{'SP9', 'AATF'}
{'C8orf55', 'PQLC2', 'HES4', 'HSF2BP'}
set()
{'ACAD9', 'HES4'}
{'CABC1'}
set()
set()
set()
set()
2
{'CCNT1', 'POLR2E', 'THADA'}
{'ZNF785', 'ENOPH1', 'ABCA17P', 'CABC1', 'ACAD9', 'HRC', 'ABCA3', 'HSF2BP', 'AATF', 'AIMP2', 'C10orf88', 'EXD1', 'HSPBAP1', 'POLE4', 'HES4', 'WDR36'}
{'AATF'}
{'HES4', 'HSF2BP'}
set()
{'ACAD9', 'HES4', 'THADA', 'ZFYVE27'}
{'CABC1'}
set()
set()
{'ALDH1A1'}
set()
3
set()
{'SP9', 'AATF'}
{'AATF'}
{'OSR2'}
set()
set()
set()
set()
set()
set()
set()
4
set()
{'C8orf55', 'PQLC2', 'HES4', 'HSF2BP'}
{'HES4', 'HSF2BP'}
{'OSR2'}
set()
{'HES4', 'CCBL1'}
set()
set()
set()
set()
set()
5
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
set()
6
{'CDC5L', 'THADA'}
{'ACAD9', 'HES4'}
{'ACAD9', 'H

In [45]:
for gene in len_innodes_gene[10]:
    
    genes_res_no_go=set(edge[0] for edge in G.in_edges(gene) if edge[0] not in set.union(*GO_id_map.values()))
    
    if genes_res_no_go.intersection(set(net.nodes())) != genes_res_no_go:
        continue
    print(gene)
    for key in GO_id_map.keys():
        print(key)
        print(set(gene_res for edge in G.in_edges(gene) for gene_res in GO_id_map[key] if gene_res in edge[0] ))
   
    print(genes_res_no_go)
   
    print()
    


IVL
GO:0070317
{'MAX'}
GO:0051591
set()
{'BCL11A', 'EBF1', 'SPI1', 'CEBPB', 'PBX3', 'EP300', 'IRF4', 'BATF', 'TAL1'}

SPHK1
GO:0070317
{'E2F6', 'E2F1'}
GO:0051591
{'JUN', 'FOS', 'JUND'}
{'CTCF', 'SPI1', 'MYC', 'EP300', 'RAD21', 'hsa-miR-124', 'FOSL2'}

TMEM116
GO:0070317
{'MAX', 'E2F6'}
GO:0051591
set()
{'SPI1', 'CEBPB', 'USF1', 'NFE2', 'BATF', 'GTF2B', 'E2F4', 'USF2'}

LOC100129726
GO:0070317
{'E2F1'}
GO:0051591
{'JUN', 'JUND'}
{'MYC', 'FOXA1', 'HNF4A', 'HNF4G', 'RAD21', 'FOXA2', 'SMC3', 'PPARGC1A'}

LOC147727
GO:0070317
{'MAX', 'E2F6'}
GO:0051591
set()
{'ATF3', 'MYC', 'USF1', 'NFE2', 'MAFK', 'MAFF', 'YY1', 'USF2'}

GPR108
GO:0070317
set()
GO:0051591
{'FOS'}
{'SP2', 'NFYB', 'SPI1', 'PBX3', 'EP300', 'IRF3', 'NFYA', 'SIX5', 'SP1'}

LOC440925
GO:0070317
set()
GO:0051591
{'JUN', 'JUND'}
{'TCF4', 'RXRA', 'EP300', 'MAFK', 'FOXA1', 'HNF4A', 'MAFF', 'HNF4G', 'FOXA2'}

BST2
GO:0070317
{'MAX', 'E2F6'}
GO:0051591
{'STAT1', 'FOS'}
{'NFYB', 'EP300', 'USF1', 'NFYA', 'STAT2', 'GTF2B'}

TUBB1
GO:0070

In [48]:
GO_007={readout_gene : { edge[0] for edge in G.in_edges(readout_gene) if edge[0] in net.nodes()}for readout_gene in ["STYXL1","BRD9","TUBB1"]}
np.save('GO_007.npy',GO_007) 
GO_007

{'BRD9': {'E2F1', 'E2F4', 'GABPA', 'MYC', 'SIN3A'},
 'STYXL1': {'BRCA1', 'ELF1', 'ETS1', 'GABPA', 'ZBTB33'},
 'TUBB1': {'CTCF',
  'E2F6',
  'EP300',
  'MAX',
  'MYC',
  'NFYB',
  'RAD21',
  'TAL1',
  'TCF4',
  'hsa-miR-133a'}}

In [49]:
GO_007_005= {readout_gene :{ edge[0] for edge in G.in_edges(readout_gene) if edge[0] in net.nodes()}for readout_gene in ["SPHK1","ACAD9","BST2"]}
np.save('GO_007and005.npy',GO_007_005) 

In [50]:
GO_005={readout_gene : { edge[0] for edge in G.in_edges(readout_gene)} for readout_gene in ["UBE2C","hsa-miR-483-5p","C20orf111"]}
np.save('GO_005.npy',GO_005) 

In [34]:
[key for dici in [GO_005,GO_007,GO_007_005] for key in dici]

['ZNF775',
 'TMEM14C',
 'C20orf111',
 'STYXL1',
 'BRD9',
 'ABCA3',
 'ALDH1A1',
 'ACAD9']

In [30]:
## Win test
res_size=207
in_size=1
i_scaling=1

In [None]:
matrix,dict_pos=build_adj_weighted_matrix(file,mapping_relabel)

In [None]:
Win=np.zeros((res_size,1+in_size))*i_scaling
Win[1,]

In [None]:
#print(GO_id_map["GO:0030220"])
for gene in GO_id_map["GO:0030220"]:
    print(dict_pos[gene])
    Win[dict_pos[gene],]=2
print(np.where(Win==2))

In [None]:
def input_matrix_just_genes_GOterm(Win,GOterm,GO_id_map):
    for gene in GO_id_map[GOterm]:
        Win[dict_pos[gene],]=np.random.uniform(0,1)
    return Win

In [None]:
Win=np.zeros((res_size,1+in_size))*i_scaling
input_matrix_just_genes_GOterm(Win,"GO:0030220",GO_id_map)