In [75]:
from rdflib import Graph, Namespace, URIRef, BNode, Literal
from rdflib.namespace import RDF, RDFS, OWL
import argparse, pathlib, textwrap

In [78]:
OIO       = Namespace("http://www.geneontology.org/formats/oboInOwl#")
IAO_DEF   = URIRef("http://purl.obolibrary.org/obo/IAO_0000115")
HAS_SYN = OIO.hasSynonym
HAS_XREF  = OIO.hasDbXref

In [85]:
def graft_axiom(g: Graph, subj: URIRef, prop: URIRef,
                tgt_literal: Literal, xref_literal: Literal):
    """
    Add:
        subj  prop  tgt_literal .
    and:
        [ rdf:type         owl:Axiom ;
          owl:annotatedSource    subj ;
          owl:annotatedProperty  prop ;
          owl:annotatedTarget    tgt_literal ;
          oio:hasDbXref          xref_literal ] .
    """
    # assertion triple
    g.add((subj, prop, tgt_literal))

    # reified axiom
    ax = BNode()
    g.add((ax, RDF.type,          OWL.Axiom)) # add axiom node
    g.add((ax, OWL.annotatedSource,   subj)) # 
    g.add((ax, OWL.annotatedProperty, prop))
    g.add((ax, OWL.annotatedTarget,   tgt_literal))
    g.add((ax, HAS_XREF,              xref_literal))

In [86]:
# ─────────────────────────────────────────────────────────────────────────────
# Main conversion routine
# ─────────────────────────────────────────────────────────────────────────────
def migrate(in_path: pathlib.Path, out_path: pathlib.Path):
    g = Graph()
    g.parse(in_path)                        # auto-detect format

    for cls in g.subjects(RDFS.seeAlso, None):
        # ---- 1) NCIT ID -----------------------------------------------------
        ncit_raw = str(next(g.objects(cls, RDFS.seeAlso)))
        ncit_id  = ncit_raw if ncit_raw.startswith("NCIT:") else f"NCIT:{ncit_raw}"
        xref_lit = Literal(ncit_id)

        # ---- 2) Split comments into name vs definition ----------------------
        coms = list(g.objects(cls, RDFS.comment))
        if len(coms) < 2:
            continue        # skip if we don't have both pieces
        coms_sorted = sorted(coms, key=lambda lit: len(str(lit))) 
        name_lit, def_lit = coms_sorted[0], coms_sorted[-1] # definition is usually longer than name

        # ---- 3) Add new annotations wrapped in owl:Axiom --------------------
        graft_axiom(g, cls, IAO_DEF,   def_lit,  xref_lit)
        graft_axiom(g, cls, HAS_SYN, name_lit, xref_lit)

        # ---- 4) (optional) cleanup ------------------------------------------
        g.remove((cls, RDFS.comment, name_lit))
        g.remove((cls, RDFS.comment, def_lit))
        g.remove((cls, RDFS.seeAlso, Literal(ncit_raw)))

    g.serialize(out_path, format="xml")     # default RDF/XML
    print(f"✔  Saved enriched ontology to {out_path}")


In [84]:
SRC_FILE = '_hpvco.rdf'
DST_FILE = 'hpvco.rdf'

In [87]:
migrate(pathlib.Path(SRC_FILE), pathlib.Path(DST_FILE))

✔  Saved enriched ontology to hpvco.rdf


In [88]:
import pandas as pd
import os

# Read the Excel file
excel_file = 'CQ_Eval.xlsx'
df = pd.read_excel(excel_file)

# Display the structure of the file
print("Excel file structure:")
print(df.head())
print("\nColumn names:")
print(df.columns.tolist())

Excel file structure:
              Domain    ID                                             SPARQL   
0  General Knowledge  CQ_1  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...  \
1                NaN  CQ_2  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-...   
2                NaN  CQ_3  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...   
3                NaN  CQ_4  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...   
4                NaN  CQ_5  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...   

                                  Coverage Questions   
0         What cancers are related to HPV infection?  \
1  What cancers can be potentially be prevented b...   
2  What are the known risk factors  causally rela...   
3  What anatomical sites are associated with HPV ...   
4                     What does HPV infection cause?   

                                     Expected Result   
0  "cervical cancer"\n"penile cancer"\n"anal canc...  \
1  "cervical cancer"\n"penile cancer"\n"an

In [89]:
# Let's look at just the first few rows and specific columns
print("Shape of dataframe:", df.shape)
print("\nFirst 5 rows of ID and SPARQL columns:")
if len(df.columns) >= 3:
    # Assuming ID is 2nd column (index 1) and SPARQL is 3rd column (index 2)
    id_col = df.columns[1]
    sparql_col = df.columns[2]
    print(f"ID column: '{id_col}'")
    print(f"SPARQL column: '{sparql_col}'")
    print("\nData preview:")
    print(df[[id_col, sparql_col]].head())
else:
    print("Available columns:", df.columns.tolist())

Shape of dataframe: (20, 6)

First 5 rows of ID and SPARQL columns:
ID column: 'ID'
SPARQL column: 'SPARQL'

Data preview:
     ID                                             SPARQL
0  CQ_1  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...
1  CQ_2  PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-...
2  CQ_3  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...
3  CQ_4  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...
4  CQ_5  PREFIX rdfs: <http://www.w3.org/2000/01/rdf-sc...


In [90]:
# Create the directory if it doesn't exist
output_dir = 'evaluation/competency_questions'
os.makedirs(output_dir, exist_ok=True)

# Generate individual SPARQL files
for index, row in df.iterrows():
    if pd.notna(row['ID']) and pd.notna(row['SPARQL']):
        query_id = row['ID']
        sparql_query = row['SPARQL']
        
        # Create filename
        filename = f"{query_id}.sparql"
        filepath = os.path.join(output_dir, filename)
        
        # Write SPARQL query to file
        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(sparql_query)
        
        print(f"✔ Created {filepath}")

print(f"\n✔ Successfully generated {len(df)} SPARQL query files in {output_dir}")

✔ Created evaluation/competency_questions/CQ_1.sparql
✔ Created evaluation/competency_questions/CQ_2.sparql
✔ Created evaluation/competency_questions/CQ_3.sparql
✔ Created evaluation/competency_questions/CQ_4.sparql
✔ Created evaluation/competency_questions/CQ_5.sparql
✔ Created evaluation/competency_questions/CQ_6.sparql
✔ Created evaluation/competency_questions/CQ_7.sparql
✔ Created evaluation/competency_questions/CQ_8.sparql
✔ Created evaluation/competency_questions/CQ_9.sparql
✔ Created evaluation/competency_questions/CQ_10.sparql
✔ Created evaluation/competency_questions/CQ_11.sparql
✔ Created evaluation/competency_questions/CQ_12.sparql
✔ Created evaluation/competency_questions/CQ_13.sparql
✔ Created evaluation/competency_questions/CQ_14.sparql
✔ Created evaluation/competency_questions/CQ_15.sparql
✔ Created evaluation/competency_questions/CQ_16.sparql
✔ Created evaluation/competency_questions/CQ_17.sparql
✔ Created evaluation/competency_questions/CQ_18.sparql
✔ Created evaluatio

In [1]:
SRC_FILE = 'hpvco.owl'
DST_FILE = 'hpvco_1.owl'