In [6]:
## make sure the working directory is in /differential when running the scripts

In [7]:
import pandas as pd
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'

expressions = pd.read_csv("data/gene_expression.csv")
expressions = expressions.drop(['ID_REF'], axis = 1)
expressions = expressions[expressions['Symbol'].notnull()]

In [8]:
# extract labels

labels = expressions.iloc[0, :]
labels = labels.map({"disease status: non-asthmatic": 0, "disease status: asthmatic": 1, "disease status: N/A": 2})
bad_index = np.where(labels > 1)
labels = labels[labels <= 1]

bad_cols = expressions.columns[bad_index]
expressions = expressions.drop(bad_cols, axis = 1)
expressions = expressions[1:]
# expressions.iloc[0, :] = labels
# expressions.loc[0, 'Symbol'] = 'label'

In [9]:
cols = expressions.columns
for key in cols[1:]:
    expressions[key] = pd.to_numeric(expressions[key])

In [10]:
expressions = expressions.groupby(['Symbol']).mean()

In [11]:
## Subset genes based on network results

shortlist = pd.read_csv("data/edgelist_Virtualitics.csv")

# top 5 communities
community_num = ["Community " + str(x) for x in [0, 27, 48, 2, 23, 7]]

In [12]:
comm_shortlist = shortlist[shortlist['Louvain Community'].isin(community_num)]
community_genes = comm_shortlist['Node ID']

In [13]:
community_expressions = expressions[expressions.index.isin(community_genes)]

comm_mapping = comm_shortlist.set_index('Node ID').to_dict()['Louvain Community']

In [14]:
mapped_comm = community_expressions.index.to_series().map(comm_mapping)

In [15]:
control_index = labels[labels == 0].index
asthma_index = labels[labels == 1].index

In [16]:
community = {}
community['control'] = community_expressions[control_index]
community['asthma'] = community_expressions[asthma_index]
community['asthma']['community'] = mapped_comm
community['control']['community'] = mapped_comm

In [17]:
community['control']

Unnamed: 0_level_0,GSM870902,GSM870908,GSM870909,GSM870916,GSM870920,GSM870925,GSM870928,GSM870931,GSM870933,GSM870935,...,GSM871016,GSM871017,GSM871020,GSM871021,GSM871022,GSM871025,GSM871026,GSM871027,GSM871032,community
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAAS,7.270958,7.400666,7.274018,7.305615,7.339042,7.547801,7.329639,7.409556,7.300221,7.824174,...,6.910281,6.927554,7.239283,6.838720,7.050743,7.252188,7.018683,6.891862,7.083908,Community 27
AAR2,8.005934,7.788928,7.795040,8.175761,7.790937,7.630951,7.840549,7.940925,7.945878,8.135699,...,8.176222,8.070155,8.009229,7.881362,7.962036,7.891650,7.910554,7.820054,8.107168,Community 2
ABCA2,5.441511,5.464134,4.936652,5.750829,5.393898,5.455789,5.291827,5.463618,5.174071,5.631628,...,4.987576,5.156936,5.132485,5.417698,5.529799,5.577380,5.645695,5.104899,5.169124,Community 27
ABCC1,7.290247,7.968351,8.225717,7.978555,7.823975,7.750453,7.776225,7.538786,7.778362,7.747120,...,7.676370,7.554297,7.558153,7.562079,7.847582,7.692821,7.588472,7.878840,7.931561,Community 27
ABCD4,6.316590,6.257385,6.164607,6.193646,6.287232,5.882032,6.041392,5.923500,6.118146,6.096498,...,6.440137,6.269538,6.415836,6.220065,6.504010,6.207760,6.171836,6.245180,5.892234,Community 48
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZNF821,5.630131,5.529426,5.238278,5.444373,5.544027,5.687905,5.444817,5.569761,5.517299,5.540761,...,5.609227,5.440470,5.705736,5.738321,5.713382,5.714240,5.476501,5.312385,5.525644,Community 23
ZNF844,4.879632,4.926237,4.489063,4.917928,4.808789,4.614156,4.462999,4.802756,4.956813,4.874876,...,5.017132,4.829034,4.616763,4.625856,4.597220,4.440371,4.343730,5.135127,4.608778,Community 23
ZNF879,6.064936,6.722705,5.882056,6.606961,7.178251,6.409577,6.764717,6.852185,6.319512,5.890456,...,6.628029,6.786501,6.381116,6.001436,7.155586,6.451342,6.758449,6.876427,6.541575,Community 27
ZSCAN21,5.834750,5.770977,6.334286,6.170622,6.254965,5.926183,5.993926,6.321244,5.799894,5.861283,...,5.997561,5.961450,5.831142,5.770635,6.238144,5.838271,6.080958,5.884458,6.085920,Community 27


In [277]:
import pickle

with open("community_expr.pkl", "wb") as f:
    pickle.dump(community, f)
    