In [1]:
import pandas as pd
import gene_conversion_tools as gct

The following data was downloaded from CCSB and converted to edge list sifs for both symbol and entrez from the simple sifs given in both cases. No additional gene conversions were performed for these networks.

## Load Lit-BM-13 Raw Data
#### Source: http://interactome.dfci.harvard.edu/H_sapiens/download/Lit-BM-13.tsv
Downloaded: September 30, 2016  
As described in Rolland et al: http://interactome.dfci.harvard.edu/H_sapiens/download/Rolland-etal_Cell_2014.pdf

In [2]:
wd = '/cellar/users/jkhuang/Data/Projects/Network_Analysis/Data/'
Lit_BM_13_Raw = pd.read_csv(wd+'Network_Data_Raw/Lit-BM-13.tsv',sep='\t')

In [3]:
Lit_BM_13_Raw.shape

(11045, 4)

In [4]:
Lit_BM_13_Symbol = Lit_BM_13_Raw[['symbol_a','symbol_b']].dropna().values.tolist()
Lit_BM_13_Symbol = [sorted(edge) for edge in Lit_BM_13_Symbol]
Lit_BM_13_Symbol_filt = gct.filter_converted_edgelist(Lit_BM_13_Symbol)
gct.write_edgelist(Lit_BM_13_Symbol_filt, wd+'Network_SIFs_Symbol/Lit-BM-13_Symbol.sif')

11040 input edges
889 self-edges removed
0 edges with un-mapped genes removed
0 duplicate edges removed
Edge list filtered: 0.01 seconds
10151 Edges remaining
Edge list saved: 0.02 seconds


In [5]:
Lit_BM_13_Entrez = Lit_BM_13_Raw[['entrez_gene_ida','entrez_gene_idb']].dropna().values.tolist()
Lit_BM_13_Entrez = [sorted([str(edge[0]),str(edge[1])]) for edge in Lit_BM_13_Entrez]
Lit_BM_13_Entrez_filt = gct.filter_converted_edgelist(Lit_BM_13_Entrez)
gct.write_edgelist(Lit_BM_13_Entrez_filt, wd+'Network_SIFs_Entrez/Lit-BM-13_Entrez.sif')

11045 input edges
890 self-edges removed
0 edges with un-mapped genes removed
0 duplicate edges removed
Edge list filtered: 0.01 seconds
10155 Edges remaining
Edge list saved: 0.02 seconds


## Load HI-III-16 Raw Data
#### Source: http://interactome.dfci.harvard.edu/H_sapiens/index.php?page=newrelease
#### File: 'HI-III, preliminary release 2.3'
Downloaded: September 30, 2016  
Currently unpublished data, but free to use for preliminary exploration

In [10]:
wd = '/cellar/users/jkhuang/Data/Projects/Network_Analysis/Data/'
HI_III_16_Raw = pd.read_csv(wd+'Network_Data_Raw/CCSB-HuRI_preliminary_release_2.3.tsv',sep='\t')

In [16]:
import itertools
itertools.
HI_III_16_Raw[['symbol_a','symbol_b']].values.tolist()

[['A1CF', 'C19orf54'],
 ['A1CF', 'C6orf195'],
 ['A1CF', 'C9orf169'],
 ['A1CF', 'C9orf24'],
 ['A1CF', 'DSCR10'],
 ['A1CF', 'KLHDC7B'],
 ['A1CF', 'RIMBP3'],
 ['A1CF', 'WBSCR27'],
 ['A2LD1', 'HRASLS5'],
 ['AADAC', 'APPBP2'],
 ['AAGAB', 'AP1S3'],
 ['AAMP', 'AEN'],
 ['AANAT', 'C6orf195'],
 ['AANAT', 'GRB10'],
 ['AANAT', 'KRTAP8-1'],
 ['AASDHPPT', 'EFHC2'],
 ['ABCE1', 'EIF3J'],
 ['ABHD11', 'C16orf11'],
 ['ABHD11', 'DOK6'],
 ['ABHD11', 'DTX2'],
 ['ABHD11', 'MGAT5B'],
 ['ABHD11', 'MORN3'],
 ['ABHD11', 'USHBP1'],
 ['ABHD15', 'VWC2'],
 ['ABHD16A', 'BRI3'],
 ['ABHD16A', 'CIDEB'],
 ['ABHD16A', 'DTX2'],
 ['ABHD16A', 'FAM134C'],
 ['ABHD16A', 'GDAP1'],
 ['ABHD16A', 'RHBDL1'],
 ['ABHD16A', 'TMEM237'],
 ['ABHD16A', 'TMEM31'],
 ['ABHD4', 'CMTM5'],
 ['ABHD4', 'DGAT2L6'],
 ['ABHD4', 'DMBX1'],
 ['ABHD5', 'DGAT2L6'],
 ['ABHD5', 'TMEM31'],
 ['ABI1', 'ABI2'],
 ['ABI1', 'C16orf48'],
 ['ABI1', 'C22orf15'],
 ['ABI1', 'CCDC57'],
 ['ABI1', 'CCHCR1'],
 ['ABI1', 'FAM124B'],
 ['ABI1', 'FBXL19'],
 ['ABI1', 'KIAA1217']

In [7]:
HI_III_16_Raw.shape

(37062, 4)

In [8]:
HI_III_16_Symbol = HI_III_16_Raw[['symbol_a','symbol_b']].dropna().values.tolist()
HI_III_16_Symbol = [sorted(edge) for edge in HI_III_16_Symbol]
HI_III_16_Symbol_filt = gct.filter_converted_edgelist(HI_III_16_Symbol)
gct.write_edgelist(HI_III_16_Symbol_filt, wd+'Network_SIFs_Symbol/HI-III-16_Symbol.sif')

37062 input edges
435 self-edges removed
0 edges with un-mapped genes removed
0 duplicate edges removed
Edge list filtered: 0.03 seconds
36627 Edges remaining
Edge list saved: 0.03 seconds


In [9]:
HI_III_16_Entrez = HI_III_16_Raw[['entrez_gene_ida','entrez_gene_idb']].dropna().values.tolist()
HI_III_16_Entrez = [sorted([str(edge[0]),str(edge[1])]) for edge in HI_III_16_Entrez]
HI_III_16_Entrez_filt = gct.filter_converted_edgelist(HI_III_16_Entrez)
gct.write_edgelist(HI_III_16_Entrez_filt, wd+'Network_SIFs_Entrez/HI-III-16_Entrez.sif')

37062 input edges
435 self-edges removed
0 edges with un-mapped genes removed
0 duplicate edges removed
Edge list filtered: 0.04 seconds
36627 Edges remaining
Edge list saved: 0.05 seconds
