# Human Phenotype Ontology (HPO)

In [1]:
import pandas as pd
from urllib.request import urlopen
from rdflib import Graph, URIRef

## Download

In [2]:
# List of files to download, with their URLs and save names
files = [
    {'url': 'http://purl.obolibrary.org/obo/hp.owl', 'name': 'hp.owl'},
    {'url': 'https://github.com/obophenotype/human-phenotype-ontology/releases/latest/download/phenotype.hpoa', 'name': 'phenotype.hpoa'}
]

# Download each file
for file in files:
    url = file['url']
    download_file = file['name']

    try:
        # Open the URL and write the contents to a file with the specified name
        with urlopen(url) as response, open(download_file, 'wb') as out_file:
            out_file.write(response.read())
        print(f"File downloaded and saved as '{download_file}'")
    except Exception as e:
        print(f"Failed to download {url}: {e}")


File downloaded and saved as 'hp.owl'
File downloaded and saved as 'phenotype.hpoa'


## Annotation File

In [3]:
ann_file_path = 'phenotype.hpoa'
df_ann = pd.read_csv(ann_file_path, sep='\t', comment='#')
df_ann.head()

  df_ann = pd.read_csv(ann_file_path, sep='\t', comment='#')


Unnamed: 0,database_id,disease_name,qualifier,hpo_id,reference,evidence,onset,frequency,sex,modifier,aspect,biocuration
0,OMIM:619340,Developmental and epileptic encephalopathy 96,,HP:0011097,PMID:31675180,PCS,,1/2,,,P,HPO:probinson[2021-06-21]
1,OMIM:619340,Developmental and epileptic encephalopathy 96,,HP:0002187,PMID:31675180,PCS,,1/1,,,P,HPO:probinson[2021-06-21]
2,OMIM:619340,Developmental and epileptic encephalopathy 96,,HP:0001518,PMID:31675180,PCS,,1/2,,,P,HPO:probinson[2021-06-21]
3,OMIM:619340,Developmental and epileptic encephalopathy 96,,HP:0032792,PMID:31675180,PCS,,1/2,,,P,HPO:probinson[2021-06-21]
4,OMIM:619340,Developmental and epileptic encephalopathy 96,,HP:0011451,PMID:31675180,PCS,,1/2,,,P,HPO:probinson[2021-06-21]


In [6]:
# Rows on Diabetes mellitus, insulin-dependent 1
df_ann[df_ann['database_id'] == "OMIM:222100"]

Unnamed: 0,database_id,disease_name,qualifier,hpo_id,reference,evidence,onset,frequency,sex,modifier,aspect,biocuration
117307,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0000819,OMIM:222100,IEA,,,,,P,HPO:iea[2009-02-17]
117308,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0002960,OMIM:222100,TAS,,,,,P,HPO:skoehler[2018-10-08]
117309,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0001993,OMIM:222100,IEA,,,,,P,HPO:iea[2009-02-17]
117310,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0001959,OMIM:222100,IEA,,,,,P,HPO:iea[2009-02-17]
117311,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0002591,OMIM:222100,IEA,,,,,P,HPO:iea[2009-02-17]
117312,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0003074,OMIM:222100,IEA,,,,,P,HPO:iea[2009-02-17]
117313,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0410050,PMID:9357814;PMID:17659063;PMID:16731998,PCS,,30/30,,,P,HPO:NicoleVasilevsky[2018-02-23];HPO:NicoleVas...
117314,OMIM:222100,"Diabetes mellitus, insulin-dependent-1",,HP:0000103,OMIM:222100,IEA,,,,,P,HPO:iea[2009-02-17]


## Ontology File

In [4]:
g = Graph()
g.parse("hp.owl", format="xml")
print(f"Graph has {len(g)} statements.")

Graph has 896939 statements.


In [5]:
index = 0
for s, p, o in g:
    print(f"Subject: {s}, Predicate: {p}, Object: {o}")
    index+=1
    if index == 10:
        break

Subject: N76bab01e7a75421a886b563960f0db71, Predicate: http://www.w3.org/2002/07/owl#annotatedProperty, Object: http://purl.obolibrary.org/obo/IAO_0000115
Subject: Nb324a8db42ad4e25a357ca59b154b031, Predicate: http://www.geneontology.org/formats/oboInOwl#hasDbXref, Object: https://orcid.org/0000-0003-0986-4123
Subject: N1a12fd0758a544c7883a027c337ec37c, Predicate: http://www.w3.org/2002/07/owl#annotatedSource, Object: http://purl.obolibrary.org/obo/HP_0034399
Subject: N7d3a081ad32742cb899de1ad44e4252a, Predicate: http://www.w3.org/1999/02/22-rdf-syntax-ns#first, Object: N748726fbd6b64e4789b771fa218ed08b
Subject: http://purl.obolibrary.org/obo/HP_0040212, Predicate: http://www.w3.org/2000/01/rdf-schema#subClassOf, Object: http://purl.obolibrary.org/obo/HP_0005324
Subject: N78a05da8e7f64abdb4f14c66d5103250, Predicate: http://www.w3.org/2002/07/owl#intersectionOf, Object: N469de40abf394bc7b0ad192eee8500f9
Subject: http://purl.obolibrary.org/obo/UBERON_0015870, Predicate: http://www.w3.org

In [None]:
# Statements on Decreased level of 1,5 anhydroglucitol in serum
subject_uri = URIRef("http://purl.obolibrary.org/obo/HP_0410050")

# Filter statements where the subject matches the specified URI
filtered_statements = g.triples((subject_uri, None, None))

# Print the filtered statements
for subject, predicate, obj in filtered_statements:
    print(f"Subject: {subject}, Predicate: {predicate}, Object: {obj}")