# <span style='font-family:"Times New Roman"'> <span styel=''> **OncoKB Annotation**

## <span style='font-family:"Times New Roman"'> <span styel=''> *Emile Cohen* 
*March 2020*

**Goal:** In this notebook, we call the OncoKB API to get the annotation at variant level for MSK-Impact cohort. 

---

In [1]:
%run -i '../../utils/setup_environment.ipy'

import warnings
import requests
import click
import os
warnings.filterwarnings('ignore')

data_path = '../../data/'

Setup environment... done!


<span style="color:green">✅ Working on **mskimpact_env** conda environment.</span>

---
For that we need a token to access the API, that could be found after logging in in OncoKB (https://www.oncokb.org/dataAccess) in *Account Settings*.

My Token is : **f08eeec6-fe2e-4414-9352-df71e2c1db32**

---

In [2]:
def retry_request(method, **kwargs):
    """Retry request operation multiple times."""
    url = kwargs.get('url') or ""
    click.secho(f"Querying to {url}", fg="green")
    for i in [0.2, 1, 5, 10, 60, 90, 120, 300]:  # attempt some retries
        try:
            error = None
            response = getattr(requests, method)(verify=False, **kwargs)
        except requests.exceptions.RequestException as request_error:  # pragma: no cover
            error = request_error
            response = None
        if response is not None and not str(response.status_code).startswith("50"):
            break
        else:  # pragma: no cover
            msg = f"Request to {url} failed with error: {error}, retrying in {i}s..."
            click.secho(msg, fg="yellow", err=True)
            time.sleep(i)
    return response


ONCOKB_ENDPOINTS = {
    "api_info": "https://www.oncokb.org/api/v1/info",
    "cancer_genes": "https://www.oncokb.org/api/v1/utils/cancerGeneList",
    "annotate": "https://www.oncokb.org/api/v1/annotate/mutations/byGenomicChange",
}


def oncokb_query(endpoint, method="get", data=None):
    """Make authenticated calls to newest oncokb api to gen cancer genes."""
    oncokb_token = os.environ.get("f08eeec6-fe2e-4414-9352-df71e2c1db32")
    if not oncokb_token:
        raise click.UsageError(
            "oncoKb authentication token is missing. "
            "Add it as enviromental variable:"
            "\n\texport ONCOKB_TOKEN=<your-oncokb-token>"
            "\nOr add them to a .env file."
        )
    headers = {
        "Authorization": f"Bearer {oncokb_token}",
        "Content-Type": "application/json",
    }
    response = retry_request(method, url=endpoint, headers=headers, data=data)
    if not response.ok:
        raise click.UsageError(f"OncoKb query failed. Reason: {response.reason}")
    return json.loads(response.content)


def get_oncokb_treatments(df, genomic_cols, tumor_type):
    """Return df with extra columns for Drug and Evidence Level."""
    chrom_col, start_col, end_col, ref_col, alt_col = genomic_cols
    genomic_changes = [
        [v[chrom_col], v[start_col], v[end_col], v[ref_col], v[alt_col]]
        for v in df.T.to_dict().values()
    ]
    data = [
        {
            "genomicLocation": ",".join(map(str, i)),
            "tumorType": tumor_type,
            "id": "",
            "evidenceTypes": [],
        }
        for i in genomic_changes
    ]
    annotated_variants = oncokb_query(
        ONCOKB_ENDPOINTS["annotate"], method="post", data=json.dumps(data)
    )
    return annotated_variants


def get_oncokb_genes():
    """Get list of genes and biomarker incindence levels from oncokb."""
    return oncokb_query(ONCOKB_ENDPOINTS["cancer_genes"])
