In [45]:
import pandas as pd
import gene_conversion_tools as gct

## Load HumanNet Raw Data
#### Source: http://www.functionalnet.org/humannet/HumanNet.v1.benchmark.txt
Downloaded: August 12, 2016  
No latest version date posted (last updated likely around 2011).  
Citation: Insuk Lee, U. Martin Blom, Peggy I. Wang, Jung Eun Shin, and Edward M. Marcotte
Genome Research 21(7):1109-21 (2011)

In [46]:
wd = '/cellar/users/jkhuang/Data/Projects/Network_Analysis/Data/'
HumanNet_Raw = pd.read_csv(wd+'Network_Data_Raw/HumanNet.v1.join.txt',sep='\t',header=-1)

In [47]:
f = open(wd+'Network_Data_Raw/HumanNet.v1.evidence_code.txt')
HumanNet_headers = ['Gene 1', 'Gene 2']+[name.split(' = ')[0] for name in f.read().splitlines()[1:-1]]
HumanNet_Raw.columns = HumanNet_headers

In [48]:
# Extract gene list
HumanNet_Raw_Genes = list(set(HumanNet_Raw['Gene 1']).union(set(HumanNet_Raw['Gene 2'])))
HumanNet_Raw_Genes = [str(gene) for gene in HumanNet_Raw_Genes]

In [49]:
# Get edge list of network
query_edgelist = HumanNet_Raw[['Gene 1','Gene 2']].astype(str).values.tolist()
print len(query_edgelist), "HumanNet Edges"

476399 HumanNet Edges


## Convert Genes

In [50]:
query_string, valid_genes, invalid_genes = gct.query_constructor(HumanNet_Raw_Genes)

16243 Valid Query Genes
0 Invalid Query Genes


In [51]:
# Set scopes (gene naming systems to search)
scopes = "entrezgene, retired"

# Set fields (systems from which to return gene names from)
fields = "symbol, entrezgene"

In [52]:
# Query MyGene.Info
match_list = gct.query_batch(query_string, scopes=scopes, fields=fields)
print len(match_list), 'Matched query results'

Batch query complete: 25.79 seconds
16243 Matched query results


In [53]:
match_table_trim, query_to_symbol, query_to_entrez = gct.construct_query_map_table(match_list, valid_genes)

Queries without full matching results found: 8

0 Queries with mutliple matches found

Query mapping table/dictionary construction complete: 21.98 seconds


## Construct Converted Network

In [54]:
HumanNet_edgelist_symbol = gct.convert_edgelist(query_edgelist, query_to_symbol)
HumanNet_edgelist_symbol_filt = gct.filter_converted_edgelist(HumanNet_edgelist_symbol)
gct.write_edgelist(HumanNet_edgelist_symbol_filt, wd+'Network_SIFs_Symbol/HumanNet_Symbol.sif')

476399 input edges
7 self-edges removed
181 edges with un-mapped genes removed
202 duplicate edges removed
Edge list filtered: 0.53 seconds
476211 Edges remaining
Edge list saved: 0.89 seconds


In [55]:
HumanNet_edgelist_entrez = gct.convert_edgelist(query_edgelist, query_to_entrez)
HumanNet_edgelist_entrez_filt = gct.filter_converted_edgelist(HumanNet_edgelist_entrez)
gct.write_edgelist(HumanNet_edgelist_entrez_filt, wd+'Network_SIFs_Entrez/HumanNet_Entrez.sif')

476399 input edges
7 self-edges removed
181 edges with un-mapped genes removed
202 duplicate edges removed
Edge list filtered: 0.45 seconds
476211 Edges remaining
Edge list saved: 0.32 seconds
