# Target Annotation Example

In [9]:
import pandas as pd
import re
from tqdm import tqdm
import os

from target_annotation import TargetAnnotation
from target_annotation.utils.util import get_ensembl_from_uniprot

## Get list of targets

In [10]:
results_path = "~/results"

targets = ["P05067", "ENSG00000130203"]  # uniprot id and an ensemble id for two targets

targets = [
    get_ensembl_from_uniprot(x) if not re.match("ENSG.*", x) else x
    for x in targets
]

## Create and Run the Workflow

In [11]:
pipe = TargetAnnotation(
    targets=targets,
    disease_code="MONDO_0004975",
    results_path = results_path
)

res = pipe.run()

OT: target annotation...: 100%|██████████| 2/2 [00:00<00:00,  8.15it/s]
OT: disease annotation...: 100%|██████████| 2/2 [00:00<00:00,  2.63it/s]
Pharos: target annotation...: 100%|██████████| 2/2 [00:00<00:00,  7.70it/s]


## Annotation results

In [12]:
res.keys()

dict_keys(['ENSG00000142192', 'ENSG00000130203'])

In [13]:
res["ENSG00000130203"].keys()

dict_keys(['OpenTargets', 'OpenTargets_disease_evidence', 'Pharos'])

### Fields retrieved from OpenTargets for each target

In [14]:
res["ENSG00000130203"]["OpenTargets"].keys()

dict_keys(['id', 'approvedSymbol', 'biotype', 'proteinIds', 'geneOntology', 'targetClass', 'functionDescriptions', 'tractability', 'geneticConstraint', 'pathways', 'expressions', 'associatedDiseases', 'isEssential', 'depMapEssentiality', 'chemicalProbes', 'knownDrugs', 'safetyLiabilities'])

### Examples of the OT fields

In [15]:
res["ENSG00000130203"]["OpenTargets"]["functionDescriptions"]

['APOE is an apolipoprotein, a protein associating with lipid particles, that mainly functions in lipoprotein-mediated lipid transport between organs via the plasma and interstitial fluids (PubMed:6860692, PubMed:1911868, PubMed:14754908). APOE is a core component of plasma lipoproteins and is involved in their production, conversion and clearance (PubMed:6860692, PubMed:2762297, PubMed:1911868, PubMed:1917954, PubMed:9395455, PubMed:14754908, PubMed:23620513). Apolipoproteins are amphipathic molecules that interact both with lipids of the lipoprotein particle core and the aqueous environment of the plasma (PubMed:6860692, PubMed:2762297, PubMed:9395455). As such, APOE associates with chylomicrons, chylomicron remnants, very low density lipoproteins (VLDL) and intermediate density lipoproteins (IDL) but shows a preferential binding to high-density lipoproteins (HDL) (PubMed:6860692, PubMed:1911868). It also binds a wide range of cellular receptors including the LDL receptor/LDLR, the L

In [16]:
[x["term"] for x in res["ENSG00000130203"]["OpenTargets"]["geneOntology"]]

[{'id': 'GO:0005576', 'name': 'extracellular region'},
 {'id': 'GO:0048156', 'name': 'tau protein binding'},
 {'id': 'GO:0070328', 'name': 'triglyceride homeostasis'},
 {'id': 'GO:1902430', 'name': 'negative regulation of amyloid-beta formation'},
 {'id': 'GO:0034364', 'name': 'high-density lipoprotein particle'},
 {'id': 'GO:0043083', 'name': 'synaptic cleft'},
 {'id': 'GO:0008203', 'name': 'cholesterol metabolic process'},
 {'id': 'GO:0005576', 'name': 'extracellular region'},
 {'id': 'GO:0042627', 'name': 'chylomicron'},
 {'id': 'GO:0061771', 'name': 'response to caloric restriction'},
 {'id': 'GO:0000302', 'name': 'response to reactive oxygen species'},
 {'id': 'GO:0034447',
  'name': 'very-low-density lipoprotein particle clearance'},
 {'id': 'GO:0034363', 'name': 'intermediate-density lipoprotein particle'},
 {'id': 'GO:0034361', 'name': 'very-low-density lipoprotein particle'},
 {'id': 'GO:0034360', 'name': 'chylomicron remnant'},
 {'id': 'GO:0008201', 'name': 'heparin binding'}

In [17]:
res["ENSG00000130203"]["OpenTargets"]["pathways"]

[{'pathway': 'Nuclear signaling by ERBB4',
  'topLevelTerm': 'Signal Transduction'},
 {'pathway': 'Retinoid metabolism and transport',
  'topLevelTerm': 'Metabolism'},
 {'pathway': 'Chylomicron clearance',
  'topLevelTerm': 'Transport of small molecules'},
 {'pathway': 'Scavenging by Class A Receptors',
  'topLevelTerm': 'Vesicle-mediated transport'},
 {'pathway': 'Chylomicron assembly',
  'topLevelTerm': 'Transport of small molecules'},
 {'pathway': 'Regulation of Insulin-like Growth Factor (IGF) transport and uptake by Insulin-like Growth Factor Binding Proteins (IGFBPs)',
  'topLevelTerm': 'Metabolism of proteins'},
 {'pathway': 'Post-translational protein phosphorylation',
  'topLevelTerm': 'Metabolism of proteins'},
 {'pathway': 'HDL remodeling', 'topLevelTerm': 'Transport of small molecules'},
 {'pathway': 'Amyloid fiber formation',
  'topLevelTerm': 'Metabolism of proteins'},
 {'pathway': 'Transcriptional regulation by the AP-2 (TFAP2) family of transcription factors',
  'topLev

### Here we show a single evidence for ENSG00000130203 and MONDO_0004975

In [18]:
res["ENSG00000130203"]["OpenTargets_disease_evidence"]["evidences"]["rows"][0]

{'disease': {'id': 'MONDO_0004975', 'name': 'Alzheimer disease'},
 'target': {'id': 'ENSG00000130203', 'approvedSymbol': 'APOE'},
 'urls': None,
 'diseaseFromSource': None,
 'literature': ['36153580'],
 'publicationYear': 2022,
 'datasourceId': 'europepmc',
 'datatypeId': 'literature',
 'score': 1,
 'resourceScore': 142,
 'textMiningSentences': [{'section': 'other',
   'text': 'In addition to its association with an increased risk of AD, we have previously addressed in this review that APOE4 is also an outstanding genetic risk factor for LBD [680].'},
  {'section': 'other',
   'text': 'Nonetheless, the precise mechanisms by which the APOE genotype and diabetes contribute to AD risk are still undetermined.'},
  {'section': 'intro',
   'text': 'APOE4 exacerbates Aβ aggregation, tau pathology, neuroinflammation, and neurodegeneration [39], but the mechanisms through which APOE4 exerts its detrimental effects in AD pathology are still under study.'},
  {'section': 'other',
   'text': 'Henc

### Fields retrieved from Pharos for each target

In [19]:
res["ENSG00000130203"]["Pharos"].keys()

dict_keys(['name', 'preferredSymbol', 'tdl', 'fam', 'sym', 'description', 'novelty', 'pantherClasses', 'dto', 'gwas', 'gwasAnalytics', 'pathways', 'diseaseCounts', 'diseases', 'tissueSpecificity', 'gtex', 'ppis', 'tinx'])

### Examples of Pharos fields

In [20]:
res["ENSG00000130203"]["Pharos"]["tdl"]

'Tbio'

In [21]:
res["ENSG00000130203"]["Pharos"]["name"]

'Apolipoprotein E'

In [22]:
res["ENSG00000130203"]["Pharos"]["pantherClasses"]

[]

In [23]:
res["ENSG00000130203"]["Pharos"]["dto"]

[]

In [24]:
res["ENSG00000130203"]["Pharos"]["description"]

'The protein encoded by this gene is a major apoprotein of the chylomicron. It binds to a specific liver and peripheral cell receptor, and is essential for the normal catabolism of triglyceride-rich lipoprotein constituents. This gene maps to chromosome 19 in a cluster with the related apolipoprotein C1 and C2 genes. Mutations in this gene result in familial dysbetalipoproteinemia, or type III hyperlipoproteinemia (HLP III), in which increased plasma cholesterol and triglycerides are the consequence of impaired clearance of chylomicron and VLDL remnants. [provided by RefSeq, Jun 2016]'

In [25]:
res["ENSG00000130203"]["Pharos"]["pathways"]

[{'name': 'Alzheimer disease',
  'pwid': 12959,
  'targetCounts': [{'name': 'Tbio', 'value': 66},
   {'name': 'Tclin', 'value': 57},
   {'name': 'Tchem', 'value': 41},
   {'name': 'Tdark', 'value': 7}],
  'type': 'KEGG'},
 {'name': 'Cholesterol metabolism',
  'pwid': 23630,
  'targetCounts': [{'name': 'Tbio', 'value': 27},
   {'name': 'Tchem', 'value': 20},
   {'name': 'Tclin', 'value': 4}],
  'type': 'KEGG'},
 {'name': 'Binding and Uptake of Ligands by Scavenger Receptors',
  'pwid': 65693,
  'targetCounts': [{'name': 'Tdark', 'value': 71},
   {'name': 'Tbio', 'value': 37},
   {'name': 'Tchem', 'value': 7},
   {'name': 'Tclin', 'value': 1}],
  'type': 'Reactome'},
 {'name': 'Chylomicron assembly',
  'pwid': 72303,
  'targetCounts': [{'name': 'Tbio', 'value': 6},
   {'name': 'Tchem', 'value': 2},
   {'name': 'Tclin', 'value': 2}],
  'type': 'Reactome'},
 {'name': 'Chylomicron clearance',
  'pwid': 72308,
  'targetCounts': [{'name': 'Tchem', 'value': 3},
   {'name': 'Tbio', 'value': 2}]

In [26]:
res["ENSG00000130203"]["Pharos"]["ppis"]

[{'nid': 9688518,
  'props': [{'name': 'tdl', 'value': 'Tdark'},
   {'name': 'Novelty', 'value': '0.91681036'},
   {'name': 'p_int', 'value': '0.999998892'},
   {'name': 'p_ni', 'value': '0.000001108'},
   {'name': 'Data Source', 'value': 'BioPlex'}],
  'type': 'BioPlex',
  'target': {'preferredSymbol': 'MANSC1'}},
 {'nid': 9687076,
  'props': [{'name': 'tdl', 'value': 'Tbio'},
   {'name': 'Novelty', 'value': '0.01080446'},
   {'name': 'p_int', 'value': '0.999998035'},
   {'name': 'p_ni', 'value': '0.000001965'},
   {'name': 'Data Source', 'value': 'BioPlex'}],
  'type': 'BioPlex',
  'target': {'preferredSymbol': 'HLA-DPA1'}},
 {'nid': 9688420,
  'props': [{'name': 'tdl', 'value': 'Tbio'},
   {'name': 'Novelty', 'value': '0.03378128'},
   {'name': 'p_int', 'value': '0.999960004'},
   {'name': 'p_ni', 'value': '0.000039913'},
   {'name': 'p_wrong', 'value': '8.2e-8'},
   {'name': 'Score', 'value': '0.2'},
   {'name': 'Data Source', 'value': 'BioPlex,STRINGDB'}],
  'type': 'BioPlex,STRIN

In [27]:
res["ENSG00000130203"]["Pharos"]["tissueSpecificity"]

[{'name': 'HPA Protein Tissue Specificity Index', 'value': 0.826923},
 {'name': 'HPA RNA Tissue Specificity Index', 'value': 0.836079},
 {'name': 'HPM Protein Tissue Specificity Index', 'value': 0.750509},
 {'name': 'GTEx Tissue Specificity Index', 'value': 0.888924},
 {'name': 'GTEx Tissue Specificity Index - Male', 'value': 0.879094},
 {'name': 'GTEx Tissue Specificity Index - Female', 'value': 0.888802}]

In [28]:
res["ENSG00000130203"]["Pharos"]["gwas"]

[{'gwasid': 27372,
  'pvalue': 0,
  'snps': [{'name': 'missense_variant', 'value': 'rs429358'}],
  'trait': 'Blood protein levels'},
 {'gwasid': 27381,
  'pvalue': 0,
  'snps': [{'name': 'missense_variant', 'value': 'rs429358'}],
  'trait': 'Blood protein levels'},
 {'gwasid': 63526,
  'pvalue': 0,
  'snps': [{'name': 'missense_variant', 'value': 'rs7412'}],
  'trait': 'Blood protein levels'},
 {'gwasid': 61680,
  'pvalue': 3e-322,
  'snps': [{'name': 'TF_binding_site_variant', 'value': 'rs769446'}],
  'trait': 'Low density lipoprotein cholesterol levels'},
 {'gwasid': 95273,
  'pvalue': 8e-315,
  'snps': [{'name': 'missense_variant', 'value': 'rs7412'}],
  'trait': 'Total cholesterol levels'},
 {'gwasid': 67615,
  'pvalue': 1e-302,
  'snps': [{'name': 'missense_variant', 'value': 'rs7412'}],
  'trait': 'Blood protein levels'},
 {'gwasid': 70737,
  'pvalue': 1e-300,
  'snps': [{'name': 'missense_variant', 'value': 'rs7412'}],
  'trait': 'Low density lipoprotein cholesterol levels'},
 {

## Export Results

In [29]:
pipe.export()