## Run community detection experiments

Do this for Louvain algorithm.

And hopefully eADAGE & coexpression networks, if time...

In [1]:
import os

import numpy as np
import pandas as pd
import igraph as ig
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches 
import seaborn as sns

from sample_nodes import (
    sample_degree_matched,
    sort_by_degree,
)

np.random.seed(42)

In [2]:
# relevant file paths
data_dir = './data'
edge_list = os.path.join(data_dir, 'edge_list_processed_unsigned.csv')

# map of Pa gene names to generic/not generic status, generated by Alex
generic_gene_map = os.path.join('..', 'pseudomonas_analysis', 'annot_df.tsv')

# script parameters
NUM_NODE_SAMPLES = 100 # number of degree-matched node samples for permutation test

In [3]:
graph_df = pd.read_csv(edge_list)
graph_df.head()

Unnamed: 0,from,to,weight
0,PA0001,PA0002,0.578872
1,PA0001,PA0548,0.510323
2,PA0001,PA0963,0.561494
3,PA0001,PA3637,0.512365
4,PA0001,PA5554,0.554768


In [4]:
G = ig.Graph.TupleList(graph_df.values,
                       weights=True,
                       directed=False)

In [5]:
# make sure vertex/edge properties exist
print(G.es['weight'][:5])

[0.578871792667748, 0.510322953238081, 0.561494130101994, 0.512364523924246, 0.554767556277657]


In [6]:
annot_df = pd.read_csv(generic_gene_map, sep='\t', index_col=0)
annot_df.head()

Unnamed: 0,label
PA0001,0
PA0002,0
PA0003,0
PA0004,0
PA0005,0


In [7]:
is_generic = [int(annot_df.loc[v['name'], 'label']) for v in G.vs]
G.vs['is_generic'] = is_generic

In [8]:
# what to do about negative edge weights?
partition = G.community_multilevel(weights=G.es['weight'], return_levels=False)
print(partition[0])

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 64, 65, 66, 67, 68, 69, 71, 72, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 107, 121, 122, 150, 247, 249, 250, 336, 337, 338, 342, 347, 350, 351, 367, 371, 372, 516, 587, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 740, 742, 744, 1027, 1032, 1039, 1042, 1082, 1083, 1084, 1085, 1086, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1151, 1152, 1153, 1164, 1165, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1197, 1199, 1200, 1201, 1203, 1205, 1208, 1223, 1224, 1227, 1229, 1240, 1242, 1243, 1244,