# biogridpy walkthrough #
-----

In [1]:
from biogridpy.biogrid_client import BioGRID

In [2]:
# from the biogridpy parent directory:
BG = BioGRID()

# if in another directory, specify path biogridpyrc
#BG = BioGRID(config_filepath='/path/to/biogridpyrc')

----------------
# Non-Interaction endpoints #
<font color='red'>evidence</font>

>BG.evidence()

<font color='red'>identifiers</font>

>BG.identifiers()

<font color='red'>organisms</font>

>BG.organisms()

<font color='red'>version</font>

>BG.version()

### Each of these accepts only the **format** keyword argument.  Acceptable values are:

1.  <font color='blue'>"tab2"</font> (default)
2.  <font color='blue'>"json"</font>
    

### <font color='blue'>"tab2"</font> is default, show with the  <font color='red'>evidence</font> endpoint

In [3]:
evid = BG.evidence()

### <font color='blue'>"tab2"</font> results are accessed in list form via the <font color='green'>result</font> attribute

In [4]:
evid.result[:5]

[u'AFFINITY CAPTURE-LUMINESCENCE',
 u'AFFINITY CAPTURE-MS',
 u'AFFINITY CAPTURE-RNA',
 u'AFFINITY CAPTURE-WESTERN',
 u'BIOCHEMICAL ACTIVITY']

### similar result with the  <font color='red'>identifiers</font> enpoint

In [5]:
idents = BG.identifiers(format_="tab2")

In [6]:
idents.result[:5] # first 5 identifiers

[u'ANIMALQTLDB', u'APHIDBASE', u'BEEBASE', u'BGD', u'BIOGRID']

### format can also be <font color='blue'>"json"</font>, shown with the  <font color='red'>organisms</font> endpoint

In [7]:
# notice the keyword argument is format_ so it doen't clobber the Python built-in format
orgs = BG.organisms(format_='json')

### which returns a dictionary-like object

In [8]:
orgs.result['9606'] #keys are str for json

'Homo sapiens'

### The  <font color='red'>version</font> endpoint simply returns the BioGRID REST Service version

In [9]:
BG.version() # no need to use the result attribute

u'3.4.140'

----------------
# Interactions endpoints

1. <font color='red'>interactions_single</font>
2. <font color='red'>interactions</font>

## Single Interaction 
### <font color='red'>interactions_single</font> is a special use version of the <font color='red'>interactions</font> endpoint

In [10]:
# the second positional argument is an integer representing the BioGRID interaction ID.
single_result = BG.interactions_single('json', 103)

In [11]:
single_result.result

{u'103': {u'BIOGRID_ID_A': 112315,
  u'BIOGRID_ID_B': 108607,
  u'BIOGRID_INTERACTION_ID': 103,
  u'ENTREZ_GENE_A': u'6416',
  u'ENTREZ_GENE_B': u'2318',
  u'EXPERIMENTAL_SYSTEM': u'Two-hybrid',
  u'EXPERIMENTAL_SYSTEM_TYPE': u'physical',
  u'MODIFICATION': u'-',
  u'OFFICIAL_SYMBOL_A': u'MAP2K4',
  u'OFFICIAL_SYMBOL_B': u'FLNC',
  u'ONTOLOGY_TERMS': {},
  u'ORGANISM_A': 9606,
  u'ORGANISM_B': 9606,
  u'PUBMED_AUTHOR': u'Marti A (1997)',
  u'PUBMED_ID': 9006895,
  u'QUALIFICATIONS': u'-',
  u'QUANTITATION': u'-',
  u'SOURCEDB': u'BIOGRID',
  u'SYNONYMS_A': u'JNKK|JNKK1|MAPKK4|MEK4|MKK4|PRKMK4|SAPKK-1|SAPKK1|SEK1|SERK1|SKK1',
  u'SYNONYMS_B': u'ABP-280|ABP280A|ABPA|ABPL|FLN2|MFM5|MPD4',
  u'SYSTEMATIC_NAME_A': u'-',
  u'SYSTEMATIC_NAME_B': u'-',
  u'TAGS': u'-',
  u'THROUGHPUT': u'Low Throughput'}}

## <font color='red'>interactions</font> endpoint
### The positional argument is the format parameter.  It is required to specify the format.  **kwargs are the same parameters listed on the [BioGRID Help Wiki](http://wiki.thebiogrid.org/doku.php/biogridrest#list_of_parameters)

In [12]:
# geneList can be a Python list of gene names, or other gene identifiers
bg_results = BG.interactions('json', geneList=["E2F1","RB1"], taxId=9606)

### <font color='red'>interactions</font> attributes
1. **count**
2. **endpoint**
3. **output_format**
4. **headers**
5. **raw_result**
6. **result**

#### The first few attributes just provide information about the query

In [13]:
print ("count: {0}\n".format(bg_results.count)) #number of records returned
print ("endpoint: {0}\n".format(bg_results.endpoint)) #endpoint used
print ("output_format: {0}\n".format(bg_results.output_format)) #format requested
print ("headers: {0}".format(bg_results.headers)) #column headers

count: 997

endpoint: interactions

output_format: json

headers: ['BioGRID Interaction ID', 'Entrez Gene Interactor A', 'Entrez Gene Interactor B', 'BioGRID ID Interactor A', 'BioGRID ID Interactor B', 'Systematic Name Interactor A', 'Systematic Name Interactor B', 'Official Symbol Interactor A', 'Official Symbol Interactor B', 'Synonyms Interactor A', 'Synonyms Interactor B', 'Experimental System', 'Experimental System Type', 'Author', 'Pubmed ID', 'Organism Interactor A', 'Organism Interactor B', 'Throughput', 'Score', 'Modification', 'Phenotypes', 'Qualifications', 'Tags', 'Source Database']


#### The <font color='green'>raw_result</font> attribute is an unformatted, list of response strings from the webservice.  (in a list due to pagination for large requests)  This attribute is not really meant to be accessed by the user.

In [14]:
# too long, not printing
print (type(bg_results.raw_result))
print (len(bg_results.raw_result)) #len is 1 because the default pagination is set to 10,000 records.

<type 'list'>
1


#### The <font color='green'>result</font> attribute is formatted for downstream use if desired.  json format is easier to interact with programmatically because you can access the result like a dictionary.

In [15]:
bg_results.result['691846'] #json keys have to be str, one example record

{u'BIOGRID_ID_A': 107452,
 u'BIOGRID_ID_B': 111860,
 u'BIOGRID_INTERACTION_ID': 691846,
 u'ENTREZ_GENE_A': u'1017',
 u'ENTREZ_GENE_B': u'5925',
 u'EXPERIMENTAL_SYSTEM': u'Biochemical Activity',
 u'EXPERIMENTAL_SYSTEM_TYPE': u'physical',
 u'MODIFICATION': u'Phosphorylation',
 u'OFFICIAL_SYMBOL_A': u'CDK2',
 u'OFFICIAL_SYMBOL_B': u'RB1',
 u'ONTOLOGY_TERMS': {},
 u'ORGANISM_A': 9606,
 u'ORGANISM_B': 9606,
 u'PUBMED_AUTHOR': u'Kim KS (2002)',
 u'PUBMED_ID': 12190313,
 u'QUALIFICATIONS': u'Cdk1-Cyclin E.',
 u'QUANTITATION': u'-',
 u'SOURCEDB': u'BIOGRID',
 u'SYNONYMS_A': u'CDKN2|p33(CDK2)',
 u'SYNONYMS_B': u'OSRC|PPP1R130|RB|p105-Rb|pRb|pp110',
 u'SYSTEMATIC_NAME_A': u'-',
 u'SYSTEMATIC_NAME_B': u'RP11-174I10.1',
 u'TAGS': u'-',
 u'THROUGHPUT': u'Low Throughput'}

### <font color='red'>interactions</font> methods
1. export
2. toDataFrame

### export
**only two keyword arguments, outdir and filename.  The file extension will be either .tab2 or .json depending on the format you chose.**

In [16]:
bg_results.export(outdir='../examples/example_results', 
                  filename='E2F1_RB1_9606')
print (bg_results.output_format)

json


### toDataFrame
**This method formats the result so that you can analyze the results in a [pandas](http://pandas.pydata.org/) DataFrame for further analysis.** 
**The pandas call depends on the format type you chose**

In [17]:
import pandas as pd

#you could use the following to transform the results regardless of type
try:
    df = pd.read_csv(bg_results.toDataFrame(), sep='\t')
    print ('tab2 detected')
except IOError as e:
    df = pd.read_json(bg_results.toDataFrame(), orient='index')
    print ('json detected')

json detected


In [18]:
df.head()

Unnamed: 0,BIOGRID_ID_A,BIOGRID_ID_B,BIOGRID_INTERACTION_ID,ENTREZ_GENE_A,ENTREZ_GENE_B,EXPERIMENTAL_SYSTEM,EXPERIMENTAL_SYSTEM_TYPE,MODIFICATION,OFFICIAL_SYMBOL_A,OFFICIAL_SYMBOL_B,...,PUBMED_ID,QUALIFICATIONS,QUANTITATION,SOURCEDB,SYNONYMS_A,SYNONYMS_B,SYSTEMATIC_NAME_A,SYSTEMATIC_NAME_B,TAGS,THROUGHPUT
1028335,111860,106543,1028335,5925,25,Reconstituted Complex,physical,-,RB1,ABL1,...,7828850,DNA complex,-,BIOGRID,OSRC|PPP1R130|RB|p105-Rb|pRb|pp110,ABL|JTK7|bcr/abl|c-ABL|c-ABL1|p150|v-abl,RP11-174I10.1,RP11-83J21.1,-,Low Throughput
1028336,111860,108201,1028336,5925,1869,Reconstituted Complex,physical,-,RB1,E2F1,...,7828850,DNA complex,-,BIOGRID,OSRC|PPP1R130|RB|p105-Rb|pRb|pp110,E2F-1|RBAP1|RBBP3|RBP3,RP11-174I10.1,-,-,Low Throughput
1028337,106543,111860,1028337,25,5925,Reconstituted Complex,physical,-,ABL1,RB1,...,7828850,DNA complex,-,BIOGRID,ABL|JTK7|bcr/abl|c-ABL|c-ABL1|p150|v-abl,OSRC|PPP1R130|RB|p105-Rb|pRb|pp110,RP11-83J21.1,RP11-174I10.1,-,Low Throughput
1028723,111860,111831,1028723,5925,5894,Affinity Capture-Western,physical,-,RB1,RAF1,...,19058874,-,-,BIOGRID,OSRC|PPP1R130|RB|p105-Rb|pRb|pp110,CMD1NN|CRAF|NS5|Raf-1|c-Raf,RP11-174I10.1,-,-,Low Throughput
1028724,111831,111860,1028724,5894,5925,Affinity Capture-Western,physical,-,RAF1,RB1,...,19058874,-,-,BIOGRID,CMD1NN|CRAF|NS5|Raf-1|c-Raf,OSRC|PPP1R130|RB|p105-Rb|pRb|pp110,-,RP11-174I10.1,-,Low Throughput


In [19]:
# provides quick access to details about the results:
# What type of evidecne were found with RB1
df['EXPERIMENTAL_SYSTEM'][(df['OFFICIAL_SYMBOL_A'] == 'RB1') | (df['OFFICIAL_SYMBOL_B'] == 'RB1')].value_counts()

Affinity Capture-Western         321
Reconstituted Complex            169
Biochemical Activity             142
Two-hybrid                        44
Affinity Capture-MS               30
Co-fractionation                  11
Co-localization                    5
Co-purification                    4
Affinity Capture-Luminescence      4
Protein-peptide                    3
Co-crystal Structure               3
Far Western                        3
Phenotypic Suppression             2
FRET                               2
Proximity Label-MS                 1
Phenotypic Enhancement             1
Name: EXPERIMENTAL_SYSTEM, dtype: int64

## The keyword arguments <font color='green'>geneList</font>, <font color='green'>evidenceList</font> & <font color='green'>additionalIdentifierTypes</font> *can* be a path to file  and/or a Python list.


In [20]:
bg_results2 = BG.interactions('tab2', geneList=['RB1', 'E2F1'],
                                      evidenceList='/media/sf_ubuntuVbox/biogridpy/examples/evidenceList.list',
                                      includeEvidence='true',
                                      taxId=9606)

## biogridpy handles large requests --  It will paginate when there are more than 10,000 records (default max per page).

In [21]:
bg_results3 = BG.interactions('tab2', geneList='/media/sf_ubuntuVbox/biogridpy/examples/cellcycleGenes.list')

In [22]:
bg_results3.count

11483

In [23]:
# we got all 11482 into the results with minimal effort

df = pd.read_csv(bg_results3.toDataFrame(), sep='\t')
print (df.shape)
df.head()

(11482, 47)


Unnamed: 0,BioGRID Interaction ID,Entrez Gene Interactor A,Entrez Gene Interactor B,BioGRID ID Interactor A,BioGRID ID Interactor B,Systematic Name Interactor A,Systematic Name Interactor B,Official Symbol Interactor A,Official Symbol Interactor B,Synonyms Interactor A,...,10875894,9606,9606.1,Low Throughput,-.1,-.2,-.3,-.4,-.5,BIOGRID
0,3189,5925,1523,111860,107903,RP11-174I10.1,-,RB1,CUX1,OSRC|PPP1R130|RB|p105-Rb|pRb|pp110,...,,,,,,,,,,
1,3224,7251,1026,113102,107460,-,-,TSG101,CDKN1A,TSG10|VPS23,...,,,,,,,,,,
2,3381,7534,993,113366,107428,-,-,YWHAZ,CDC25A,14-3-3-zeta|HEL-S-3|HEL4|KCIP-1|YWHAD,...,,,,,,,,,,
3,3546,1033,1017,107467,107452,-,-,CDKN3,CDK2,CDI1|CIP2|KAP|KAP1,...,,,,,,,,,,
4,3626,5932,5933,111867,111868,-,RP11-382A12.1,RBBP8,RBL1,COM1|CTIP|JWDS|RIM|SAE2|SCKL2,...,,,,,,,,,,


#### additionalIdentifierTypes parameter not working as of 9/24/16 Not even the example from the wiki site
#### it seems like BioGRID may be handling the conversion through the API call, without need for the additionalIdentifierTypes parameter

In [24]:
#bg_results4 = BG.interactions('tab2', geneList=['Q01094', 'P06400'],
#                                      additionalIdentifierTypes=['UNIPROT'])