In [1]:
import pandas as pd
import igraph
import numpy as np

#from https://github.com/idekerlab/cy-rest-R/blob/develop/workflow1_structure_based_visualization.R

# Step 1: Network Data Preparation

Load yeast network SIF file as Data Frame

In [2]:
yeast_table = pd.read_csv("./cy-rest-R/data/yeastHighQuality.sif",sep=' ', header=None)

In [3]:
yeast_table.head()

Unnamed: 0,0,1,2
0,YLR197W,pp,YDL014W
1,YOR039W,pp,YOR061W
2,YDR473C,pp,YPR178W
3,YOR332W,pp,YLR447C
4,YER090W,pp,YKL211C


Convert it to simple edge list

In [4]:
yeast_table_edgelist=yeast_table.iloc[:,[0,2]]

Convert DF to undirected igraph object
This is a PPI network, so import as undirected.

In [5]:
tuples=[tuple(x) for x in yeast_table_edgelist.values]
g_original = igraph.Graph.TupleList(tuples,directed=False)

Extract componentes (individual connected subgraphs)

In [6]:
subgraphs=g_original.decompose()

Pick largest subgraph

In [7]:
tmp=[i.vcount() for i in subgraphs]
largeset_subgraph = subgraphs[tmp.index(max(tmp))]

Remove duplicate edges

In [8]:
g = largeset_subgraph.simplify(multiple=True, loops=True)
g.name = "Yeast network"

# Step 2: Basic statistical analysis

Global Network Statics

In [9]:
print("Density:",g.density())
print("Transitivity:",g.transitivity_undirected())

Density: 0.001594505204957334
Transitivity: 0.0816233202786051


Node statistics

In [10]:
g_closeness = g.vs.closeness() # Closeness Centrarity
g_degree = g.vs.degree() # Degree
g_pagerank = g.vs.pagerank(directed = False) # PageRank
g_vs_betweenness = g.vs.betweenness() # Betweenness Centrarity

Edge statics

In [11]:
g_es_betweenness = g.es.edge_betweenness() # Edge Betweenness

# Step 3: Community Detection: Try multiple algorithms

In [12]:
communities_greedy = g.community_fastgreedy().as_clustering()
communities_leading = g.community_leading_eigenvector()
communities_label_propagation = g.community_label_propagation()

In [13]:
v_community_greedy = communities_greedy.membership
v_community_leading = communities_leading.membership
v_community_label_propagation = communities_label_propagation.membership

In [14]:
def getCommunityEdge(g, community):
    num_edges = g.ecount()
    edge_community = [0]*num_edges
    comms = community
    sources=[i[0] for i in g.get_edgelist()]
    targets=[i[1] for i in g.get_edgelist()]
    for i in range(num_edges):
        sidx = sources[i]
        tidx = targets[i]
        source = comms[sidx]
        target = comms[tidx]
        
        if source == target:
            edge_community[i] = source+1
    return edge_community
    

In [15]:
e_commnity_greedy = getCommunityEdge(g, v_community_greedy)
e_commnity_leading = getCommunityEdge(g, v_community_leading)
e_community_label_propagation = getCommunityEdge(g, v_community_label_propagation)

# Step 4: Send data to Cytoscape