# Target Annotation Example

In [3]:
import pandas as pd
import re
from tqdm import tqdm
import os

from target_annotation import TargetAnnotation
from target_annotation.utils.util import get_ensembl_from_uniprot

## Get list of targets

In [5]:
results_path = "~/results"

targets = ["P05067", "ENSG00000130203"]  # uniprot id and an ensemble id for two targets

targets = [
    get_ensembl_from_uniprot(x) if not re.match("ENSG.*", x) else x
    for x in targets
]

## Create and Run the Workflow

In [7]:
pipe = TargetAnnotation(
    targets=targets,
    disease_code="MONDO_0004975",
    results_path = results_path
)

res = pipe.run()

reading A2A files for model: default: 100%|██████████| 604/604 [00:01<00:00, 519.04it/s]
OT: target annotation...: 100%|██████████| 729/729 [01:10<00:00, 10.39it/s]
OT: disease annotation...: 100%|██████████| 729/729 [01:04<00:00, 11.39it/s]
Pharos: target annotation...: 100%|██████████| 729/729 [01:08<00:00, 10.70it/s]


## The flat simulations results with annotations and corrections

In [8]:
res["TMT_P35237"].keys()

dict_keys(['feature_type', 'ensg', 'all_to_all', 'OpenTargets', 'OpenTargets_disease_evidence', 'Pharos'])

### All To All field

In [26]:
res["TMT_P35237"]["all_to_all"]["default"]["out_mmse"]

{'tailProbabilities': 0.3984375,
 'effectSize': 0.10979897030479194,
 'cohensD': 0.01850407208919436,
 'effectStd': 5.933773375694432,
 'pathFrequency': 1.0}

### Fields retrieved from OpenTargets for each target

In [9]:
res["TMT_P35237"]["OpenTargets"].keys()

dict_keys(['id', 'approvedSymbol', 'biotype', 'proteinIds', 'geneOntology', 'targetClass', 'functionDescriptions', 'tractability', 'geneticConstraint', 'pathways', 'expressions', 'associatedDiseases', 'isEssential', 'depMapEssentiality', 'chemicalProbes', 'knownDrugs', 'safetyLiabilities'])

### Examples of the OT fields

In [10]:
res["TMT_P35237"]["OpenTargets"]["functionDescriptions"]

['May be involved in the regulation of serine proteinases present in the brain or extravasated from the blood (By similarity). Inhibitor of cathepsin G, kallikrein-8 and thrombin. May play an important role in the inner ear in the protection against leakage of lysosomal content during stress and loss of this protection results in cell death and sensorineural hearing loss. {ECO:0000250, ECO:0000269|PubMed:10068683, ECO:0000269|PubMed:17761692, ECO:0000269|PubMed:20451170, ECO:0000269|PubMed:8136380, ECO:0000269|PubMed:8415716}.']

In [11]:
[x["term"] for x in res["TMT_P35237"]["OpenTargets"]["geneOntology"]]

[{'id': 'GO:0002020', 'name': 'protease binding'},
 {'id': 'GO:0062023', 'name': 'collagen-containing extracellular matrix'},
 {'id': 'GO:0005737', 'name': 'cytoplasm'},
 {'id': 'GO:0005829', 'name': 'cytosol'},
 {'id': 'GO:0005886', 'name': 'plasma membrane'},
 {'id': 'GO:0101003', 'name': 'ficolin-1-rich granule membrane'},
 {'id': 'GO:0071470', 'name': 'cellular response to osmotic stress'},
 {'id': 'GO:0005886', 'name': 'plasma membrane'},
 {'id': 'GO:0070062', 'name': 'extracellular exosome'},
 {'id': 'GO:0030667', 'name': 'secretory granule membrane'},
 {'id': 'GO:0005576', 'name': 'extracellular region'},
 {'id': 'GO:0005634', 'name': 'nucleus'},
 {'id': 'GO:0005737', 'name': 'cytoplasm'},
 {'id': 'GO:0005886', 'name': 'plasma membrane'},
 {'id': 'GO:0004867', 'name': 'serine-type endopeptidase inhibitor activity'},
 {'id': 'GO:0005737', 'name': 'cytoplasm'},
 {'id': 'GO:0005737', 'name': 'cytoplasm'},
 {'id': 'GO:0002020', 'name': 'protease binding'},
 {'id': 'GO:0005615', 'nam

In [12]:
res["TMT_P35237"]["OpenTargets"]["pathways"]

[{'pathway': 'Neutrophil degranulation', 'topLevelTerm': 'Immune System'},
 {'pathway': 'Dissolution of Fibrin Clot', 'topLevelTerm': 'Hemostasis'}]

### Here we show a single evidence for TMT_P35237 and MONDO_0004975

In [13]:
res["TMT_P35237"]["OpenTargets_disease_evidence"]["evidences"]["rows"][0]

{'disease': {'id': 'MONDO_0004975', 'name': 'Alzheimer disease'},
 'target': {'id': 'ENSG00000124570', 'approvedSymbol': 'SERPINB6'},
 'diseaseFromSource': None,
 'literature': ['35416570'],
 'publicationYear': 2022,
 'datasourceId': 'europepmc',
 'datatypeId': 'literature',
 'score': 0.09,
 'resourceScore': 9,
 'textMiningSentences': [{'section': 'concl',
   'text': 'Concerning humans, SERPINA3, SERPINB1, SERPINB6, SERPING1, SERPINH1, and SERPINI1 were dysregulated in sCJD patients, whereas only SERPINA3 and SERPINB1 members were differentially expressed in patients at early stages of AD–related pathology.'},
  {'section': 'abstract',
   'text': 'Our analysis revealed that, besides the already observed upregulation of SERPINA3 in patients with prion disease and AD, SERPINB1, SERPINB6, SERPING1, SERPINH1, and SERPINI1 were dysregulated in sCJD individuals compared to controls, while only SERPINB1 was upregulated in AD patients.'},
  {'section': 'results',
   'text': 'SERPINB6, SERPING1

### Fields retrieved from Pharos for each target

In [14]:
res["TMT_P35237"]["Pharos"].keys()

dict_keys(['name', 'preferredSymbol', 'tdl', 'fam', 'sym', 'description', 'novelty', 'pantherClasses', 'dto', 'gwas', 'gwasAnalytics', 'pathways', 'diseaseCounts', 'diseases', 'tissueSpecificity', 'gtex', 'ppis', 'tinx'])

### Examples of Pharos fields

In [15]:
res["TMT_P35237"]["Pharos"]["tdl"]

'Tbio'

In [16]:
res["TMT_P35237"]["Pharos"]["name"]

'Serpin B6'

In [17]:
res["TMT_P35237"]["Pharos"]["pantherClasses"]

[{'name': 'serine protease inhibitor', 'pcid': 'PC00204'},
 {'name': 'protease inhibitor', 'pcid': 'PC00191'},
 {'name': 'enzyme modulator', 'pcid': 'PC00095'}]

In [18]:
res["TMT_P35237"]["Pharos"]["dto"]

[{'name': 'Serpin B6', 'dtoid': 'DTO:05002706'},
 {'name': 'Serine protease inhibitor', 'dtoid': 'DTO:05007588'},
 {'name': 'Protease inhibitor', 'dtoid': 'DTO:05007587'},
 {'name': 'Enzyme modulator', 'dtoid': 'DTO:05007584'},
 {'name': 'protein', 'dtoid': 'PR:000000001'}]

In [19]:
res["TMT_P35237"]["Pharos"]["description"]

'The protein encoded by this gene is a member of the serpin (serine proteinase inhibitor) superfamily, and ovalbumin(ov)-serpin subfamily. It was originally discovered as a placental thrombin inhibitor. The mouse homolog was found to be expressed in the hair cells of the inner ear. Mutations in this gene are associated with nonsyndromic progressive hearing loss, suggesting that this serpin plays an important role in the inner ear in the protection against leakage of lysosomal content during stress, and that loss of this protection results in cell death and sensorineural hearing loss. Alternatively spliced transcript variants have been found for this gene. [provided by RefSeq, Sep 2010]'

In [20]:
res["TMT_P35237"]["Pharos"]["pathways"]

[{'name': 'Amoebiasis',
  'pwid': 14348,
  'targetCounts': [{'name': 'Tbio', 'value': 67},
   {'name': 'Tchem', 'value': 24},
   {'name': 'Tclin', 'value': 12},
   {'name': 'Tdark', 'value': 1}],
  'type': 'KEGG'},
 {'name': 'Dissolution of Fibrin Clot',
  'pwid': 82996,
  'targetCounts': [{'name': 'Tbio', 'value': 8},
   {'name': 'Tchem', 'value': 3},
   {'name': 'Tclin', 'value': 2}],
  'type': 'Reactome'},
 {'name': 'Hemostasis',
  'pwid': 99548,
  'targetCounts': [{'name': 'Tbio', 'value': 391},
   {'name': 'Tchem', 'value': 134},
   {'name': 'Tdark', 'value': 82},
   {'name': 'Tclin', 'value': 59}],
  'type': 'Reactome'},
 {'name': 'Immune System',
  'pwid': 102587,
  'targetCounts': [{'name': 'Tbio', 'value': 1485},
   {'name': 'Tchem', 'value': 384},
   {'name': 'Tdark', 'value': 213},
   {'name': 'Tclin', 'value': 164}],
  'type': 'Reactome'},
 {'name': 'Innate Immune System',
  'pwid': 105438,
  'targetCounts': [{'name': 'Tbio', 'value': 703},
   {'name': 'Tchem', 'value': 253

In [21]:
res["TMT_P35237"]["Pharos"]["ppis"]

[{'nid': 2976643,
  'props': [{'name': 'tdl', 'value': 'Tbio'},
   {'name': 'Novelty', 'value': '0.03308357'},
   {'name': 'p_int', 'value': '0.999999888'},
   {'name': 'p_ni', 'value': '1.6e-8'},
   {'name': 'p_wrong', 'value': '9.6e-8'},
   {'name': 'Score', 'value': '0.346'},
   {'name': 'Data Source', 'value': 'BioPlex,STRINGDB'}],
  'type': 'BioPlex,STRINGDB',
  'target': {'preferredSymbol': 'SERPINB8'}},
 {'nid': 2976630,
  'props': [{'name': 'tdl', 'value': 'Tclin'},
   {'name': 'Novelty', 'value': '0.00017503'},
   {'name': 'fam', 'value': 'Enzyme'},
   {'name': 'p_int', 'value': '0.838717049'},
   {'name': 'p_ni', 'value': '0.161279445'},
   {'name': 'p_wrong', 'value': '0.000003507'},
   {'name': 'Score', 'value': '0.436'},
   {'name': 'Data Source', 'value': 'BioPlex,STRINGDB'}],
  'type': 'BioPlex,STRINGDB',
  'target': {'preferredSymbol': 'PLAT'}},
 {'nid': 2976830,
  'props': [{'name': 'tdl', 'value': 'Tchem'},
   {'name': 'Novelty', 'value': '0.0005322'},
   {'name': 'fa

In [22]:
res["TMT_P35237"]["Pharos"]["tissueSpecificity"]

[{'name': 'HPA Protein Tissue Specificity Index', 'value': 0.385621},
 {'name': 'HPA RNA Tissue Specificity Index', 'value': 0.640257},
 {'name': 'HPM Protein Tissue Specificity Index', 'value': 0.553754},
 {'name': 'GTEx Tissue Specificity Index', 'value': 0.62912},
 {'name': 'GTEx Tissue Specificity Index - Male', 'value': 0.651631},
 {'name': 'GTEx Tissue Specificity Index - Female', 'value': 0.525407}]

In [23]:
res["TMT_P35237"]["Pharos"]["gwas"]

[{'gwasid': 96334,
  'pvalue': 2e-08,
  'snps': [{'name': '5_prime_UTR_variant', 'value': 'rs73718779'}],
  'trait': 'Chronic lymphocytic leukemia'}]

## Export Results

In [24]:
pipe.export()