In [None]:
import pandas as pd
from rdflib import Graph, Namespace, Literal, RDF, RDFS, URIRef

In [3]:
final = pd.read_csv("final_mimic.csv")
len(final)

220429

In [4]:
final.head(10)


Unnamed: 0.1,Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,anchor_age,note_id,note_seq,text,stay_id,gender,race,name,gsn,ndc,etc_rn,etccode,etcdescription
0,0,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,albuterol sulfate,5039,12280024125,1,5970.0,Asthma/COPD Therapy - Beta 2-Adrenergic Agents...
1,1,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,albuterol sulfate [ProAir HFA],28090,21695042308,1,5970.0,Asthma/COPD Therapy - Beta 2-Adrenergic Agents...
2,2,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,aspirin,4381,10135012601,1,575.0,Salicylate Analgesics
3,3,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,aspirin,4381,10135012601,2,5843.0,Platelet Aggregation Inhibitors - Salicylates
4,4,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,atorvastatin,45772,10135065305,1,2747.0,Antihyperlipidemic - HMG CoA Reductase Inhibit...
5,5,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,blood-glucose meter,19549,10585086506,1,1158.0,Medical Supplies and DME - Glucose Monitoring ...
6,6,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,blood-glucose meter,19549,10585086506,2,5904.0,"Medical Supply, FDB Superset"
7,7,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,cholecalciferol (vitamin D3),28465,10432023701,1,670.0,Vitamins - D Derivatives
8,8,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,fluticasone [Flovent HFA],21251,49999061401,1,371.0,Asthma Therapy - Inhaled Corticosteroids (Gluc...
9,9,10002013,21763296,18,F329,10,53,10002013-DS-12,12,\nName: ___ Unit No: ___\n...,34931809,F,WHITE,furosemide,8208,10544013130,1,250.0,Diuretic - Loop


In [5]:
#filter by keeping only individuals with antidepressants
only_antidepressant = final[final['etcdescription'].str.contains("Antidepressant", na=False)]
len(only_antidepressant), only_antidepressant["subject_id"].nunique(), only_antidepressant["hadm_id"].nunique()

(16461, 7184, 11903)

In [None]:
#ontology namespaces
EX = Namespace("http://example.org/ontology#")
DRUG = Namespace("http://example.org/drug#")

#create rdf graph
g = Graph()
g.bind("ex", EX)
g.bind("drug", DRUG)

#adding ontology classes
g.add((EX.Antidepressant, RDF.type, RDFS.Class))


for _, row in only_antidepressant.iterrows():
    drug_uri = URIRef(DRUG[row['name'].replace(" ", "_")])
    
    #add drug name
    g.add((drug_uri, RDF.type, EX.Antidepressant))
    g.add((drug_uri, EX.gsn, Literal(row['gsn'])))
    g.add((drug_uri, EX.ndc, Literal(row['ndc'])))
    g.add((drug_uri, EX.etccode, Literal(row['etccode'])))
    g.add((drug_uri, EX.etcdescription, Literal(row['etcdescription'])))
    
    #add info from etc description
    class_uri = URIRef(EX[row['etcdescription'].replace(" ", "_")])
    g.add((class_uri, RDF.type, RDFS.Class))
    g.add((drug_uri, RDFS.subClassOf, class_uri))

#saving
ontology_file = "depression_treatment_ontology.owl"
g.serialize(destination=ontology_file, format="xml")

print(f"Ontology saved as {ontology_file}")


Ontology saved as depression_treatment_ontology.owl
