In [10]:
import os 
import pandas as pd
import numpy as np
import networkx as nx
from pyvis.network import Network
import stringdb as sdb

In [7]:
def build_network_from_files(nodes_file,edges_file):
    nodes_table = pd.read_csv(nodes_file)
    edges_table = pd.read_csv(edges_file)
    nodes_table = nodes_table[["id","label"]]
    edges_table = edges_table[["from","to"]]
    
    nodes_dict = {}
    for index, row in nodes_table.iterrows():
        nodes_dict[row[0]] = row[1]

    net = nx.Graph()
    net.add_nodes_from(list(nodes_dict.values()))
    
    for index,row in edges_table.iterrows():
        source = nodes_dict[row[0]]
        target = nodes_dict[row[1]]       
        net.add_edge(source,target)
        
    return net

def build_network_from_string(gene_list):
    net_table = sdb.get_network(gene_list)
    
    net = nx.Graph()
    net.add_nodes_from(gene_list)
    
    for index,row in net_table.iterrows():
        net.add_edge(row[2],row[3])
        
    remove_not_connected_nodes(net)

    return net

def remove_not_connected_nodes(G):
    for node in list(G.nodes):
        if len(list(G.neighbors(node))) == 0:
            G.remove_node(node)

def build_network_from_string_table(string_table_file):
    string_table = pd.read_csv(string_table_file)
    
    net = nx.Graph()
    for index,row in string_table.iterrows():
        net.add_edge(row[0],row[1])
    
    return net

def net_in_common(net1,net2):
    common_net = nx.Graph()
    
    edges1 = list(net1.edges)
    edges2 = list(net2.edges)
    
    common_edges = []
    
    for edge in edges1:
        if edge in edges2 or edge[::-1] in edges2:
            common_edges.append(edge)
    common_net.add_edges_from(common_edges) 
    
    """
    for node in net1.nodes():
        if len(list(net1.neighbors(node))) == 0 and len(list(net2.neighbors(node))) == 0:
            common_net.add_node(node)
    """
    return common_net

def write_network_files(net,nodes_file,edges_file):
    with open(edges_file,"w") as sf:
        sf.write("Source,Target\n") 
        for edge in list(net.edges):
            sf.write(edge[0]+","+edge[1]+"\n")
    with open(nodes_file,"w") as lf:
        lf.write("id,label\n")
        for node in list(net.nodes):
            lf.write(node+","+node+"\n")

In [11]:
def getGenes(path):
    gene_file_dict = {}

    for file in os.listdir(path):
        if "," in file:
            continue
        geneName = ((file.split("@"))[1].split("."))[0]
        if geneName not in gene_file_dict:
            gene_file_dict[geneName] = [file]
        else:
            gene_file_dict[geneName] += [file]

    return gene_file_dict

def mergeExpansionListsExclusive(gene,path,fileList):
    merged = []
    first = True
    for file in fileList:          
        df = pd.read_csv(path+file,header=1)
        df = df[(df["Frel"] > 0.95) & df["gene_name"]]
        neighbors = list(df["gene_name"])                 
        
        if first:
            merged += list(neighbors)
            merged =(np.unique(np.array(merged))).tolist()
        
            if gene in merged:
                merged.remove(gene)
            first = False
        else:
            new_merged = []
            for gene in neighbors:
                if gene in merged:
                    new_merged.append(gene)
            merged = new_merged.copy()
                
    return merged

def mergeExpansionLists(gene,path,fileList):
    merged = []
    
    for file in fileList:
        df = pd.read_csv(path+file,header=1)
        df = df[(df["Frel"] > 0.95) & df["gene_name"]]

        merged += list(df["gene_name"])
        merged =(np.unique(np.array(merged))).tolist()
        
        if gene in merged:
            merged.remove(gene)
    return merged  

def mergeExpansionListsSemiExclusive(gene,path,fileList):
    merged = {}
    N = len(fileList)
    
    for file in fileList: 
        df = pd.read_csv(path+file,header=1)
        df = df[(df["Frel"] > 0.95) & df["gene_name"]]
        
        gene_list_tmp = list(df["gene_name"])
        gene_list_tmp =(np.unique(np.array(gene_list_tmp))).tolist()
        
        if gene in gene_list_tmp:
            gene_list_tmp.remove(gene)

        for g in gene_list_tmp:
            if g not in merged.keys():
                merged[g] = 1
            else:
                merged[g] += 1                  
        
    return [x for x in merged.keys() if merged[x]/N >= 0.5]             

def mergeAll(gf_dict,path):
    final = {}
    i = 1
    for gene in list(gf_dict):
        final[gene] = mergeExpansionListsSemiExclusive(gene,path,gf_dict[gene])
        print(i,"/",len(gf_dict.keys()))
        i += 1
    return final

def getAllGenes(gf_dict):
    total_genes = list(gf_dict)
    for gene in f.keys():
        total_genes += list(f[gene])
        
    total_genes = (np.unique(np.array(total_genes))).tolist()
    return total_genes

def buildGraph(gf_dict):
    G = nx.Graph()
    keys = list(gf_dict)
    total_genes = getAllGenes(gf_dict)
    
    print("Building the nodes...")
    index = 1
    for geneName in total_genes:        
        if geneName in keys:
            G.add_node(geneName,id = index,color="red")
        else:
            G.add_node(geneName,id = index,color="blue")
        index += 1
    print("Building the edges...")
    for centerGene in gf_dict:
        neighours = gf_dict[centerGene]
        
        for n in neighours:
            G.add_edge(centerGene,n,color = "black")
    print(len(G.edges),"edges built")
    return G

def createNetworkFiles(G,structure_file,label_file):
    # Create the file containing the connections between the nodes
    with open(structure_file,"w") as sf:
        sf.write("from,to\n")
        for edge in list(G.edges):
            sf.write(str(G.nodes[edge[0]]["id"])+","+str(G.nodes[edge[1]]["id"])+"\n")
    # Create the file containing the labels of the nodes
    with open(label_file,"w") as lf:
        lf.write("id,label\n")
        for node in list(G.nodes):
            lf.write(str(G.nodes[node]["id"])+","+node+"\n")

def recursiveCut(G,degree):
    to_remove = []
    for node in G.nodes:
        l = [x for x in G.neighbors(node)]
        if len(l) <= degree:
            to_remove += [node]
    if len(to_remove) > 0:
        G.remove_nodes_from(to_remove)
        return recursiveCut(G,degree)
    else:
        return G.copy()

In [8]:
nodes_file = "../R_code/nodes.csv"
edges_file = "../R_code/edges.csv"

our_net = build_network_from_files(nodes_file,edges_file)    
string_net = build_network_from_string(our_net.nodes)

print("--- NODES ----")
print("our: ",len(our_net.nodes),"\tstringdb:",len(string_net.nodes))

print("--- EDGES ----")
print("our: ",len(our_net.edges),"\tstringdb:",len(string_net.edges))
    
common_net = net_in_common(our_net,string_net)
print("common:",len(common_net.nodes),"\t",len(common_net.edges))

write_network_files(common_net,"common_labels.csv","common_net.csv")
write_network_files(string_net,"string_labels.csv","string_net.csv")

--- NODES ----
our:  148 	stringdb: 66
--- EDGES ----
our:  147 	stringdb: 127
common: 0 	 0


In [6]:
path = "../Data/all.results/"
gf_dict = getGenes(path)
print(len(gf_dict.keys()))

f = mergeAll(gf_dict,path)

total_genes = list(gf_dict)
for gene in f.keys():
    total_genes += list(f[gene])
    
total_genes = (np.unique(np.array(total_genes))).tolist()
print(len(total_genes))


G = buildGraph(f)
createNetworkFiles(G,"../Files/net.csv","labels.csv")

h1n1 = pd.read_table("../Files/genes_h1n1.txt",header=None)
h1n1 = list(h1n1[0])

both = [x for x in h1n1 if x in list(G.nodes)]

with open("../Files/genes_in_common.csv","w") as file:
    for gene in both:
        file.write(gene+"\n")

Unnamed: 0.1,Unnamed: 0,id,label
0,1,t019615,SNORA1
1,2,t012232,SNORD14A
2,3,t018938,SNORA52
3,4,t015979,SNORA57
4,5,t009543,RDX
...,...,...,...
196,197,t197908,FTH1P8
197,198,t152511,HRH2
198,199,t099262,SNORD119
199,200,t151117,G3BP1
