The goal of this file is to create a connection between a GO term and its name/label

In [1]:
import rdflib
import pandas as pd
import gzip
import requests

In [2]:
url = "http://purl.obolibrary.org/obo/go.owl"

response = requests.get(url)

if response.status_code == 200:
    with open("go.owl", "wb") as file:
        file.write(response.content)
    print("GO .owl file downloaded successfully!")
else:
    print(f"Failed to download the file. Status code: {response.status_code}")

GO .owl file downloaded successfully!


In [2]:
g = rdflib.Graph()
g.parse("go.owl", format="xml")

<Graph identifier=Nc0263a7b8ee54d0b9b22cb31d318dced (<class 'rdflib.graph.Graph'>)>

In [7]:
query = """
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX obo: <http://purl.obolibrary.org/obo/>
PREFIX oboInOwl: <http://www.geneontology.org/formats/oboInOwl#>

SELECT ?go_term ?label (GROUP_CONCAT(?alt_id; separator=", ") AS ?alt_ids)
WHERE {
  ?go_term rdf:type owl:Class .
  ?go_term rdfs:label ?label .
  OPTIONAL {
    ?go_term oboInOwl:id ?main_id .
  }
  OPTIONAL {
    ?go_term oboInOwl:hasAlternativeId ?alt_id .
  }
}
GROUP BY ?go_term ?label
"""

results = g.query(query)

In [16]:
data = []
for row in results:
    data.append({
        "GO Term": str(row.go_term),
        "Label": str(row.label),
        "Alternative IDs": str(row.alt_ids) if row.alt_ids else None,
    })

go_df = pd.DataFrame(data)

go_df["GO Term"] = go_df["GO Term"].apply(lambda x: x.split("/")[-1].replace("_", ":"))
go_df["Alternative IDs"] = go_df["Alternative IDs"].apply(lambda x: x.split(", ") if pd.notnull(x) else [])
go_df = go_df.explode("Alternative IDs", ignore_index=True)
go_df.rename(columns={"Label":"GO Label", "Alternative IDs":"Alternative ID"}, inplace=True)

go_df.to_csv("go_term_label.tsv", sep="\t", index=False)
go_df

Unnamed: 0,GO Term,GO Label,Alternative ID
0,GO:0000001,mitochondrion inheritance,
1,GO:0000002,mitochondrial genome maintenance,
2,GO:0000003,obsolete reproduction,GO:0019952
3,GO:0000003,obsolete reproduction,GO:0050876
4,GO:0000005,obsolete ribosomal chaperone activity,
...,...,...,...
49126,GO:2001313,UDP-4-deoxy-4-formamido-beta-L-arabinopyranose...,
49127,GO:2001314,UDP-4-deoxy-4-formamido-beta-L-arabinopyranose...,
49128,GO:2001315,UDP-4-deoxy-4-formamido-beta-L-arabinopyranose...,
49129,GO:2001316,kojic acid metabolic process,
