# Installing needed Libraris

##### This will install rdflib to create the rdf version of the IEEE Thesaurus

In [None]:
!pip install rdflib==7.0.0

# Importing

In [1]:
import csv
from rdflib import Graph, Namespace, URIRef, Literal
from rdflib.namespace import RDF, SKOS, RDFS

# Functions

In [2]:
def lower_case(subject:str, object:str)->str:
    return subject.lower(), object.lower()

def replace_space(subject:str, object:str) -> str:
    return subject.replace(' ', '_'), object.replace(' ', '_')

def replace_double_space(subject:str, object:str) -> str:
    return subject.replace('  ', ' '), object.replace('  ', ' ')
    
def eliminate_comma(subject:str, object:str) -> str:
    return subject.replace(',', '_'), object.replace(',', '_')

def eliminate_dot(subject:str, object:str) -> str:
    return subject.replace('.', '_'), object.replace('.', '_')

# Assigning Schema URI

In [3]:
topics_uri = Namespace('https://ieee-thesaurus.org/')

# Creating RDF

In [4]:
def ieee_thesaurus_rdf(thesaurus_csv_file):
    g = Graph()

    with open(thesaurus_csv_file, 'r', newline='', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            subject = row.get('subject', '')
            predicate = row.get('predicate', '')
            obj = row.get('object', '')

            __subject, __obj = replace_double_space(subject, obj)

            subject_trim, obj_trim = eliminate_dot(*eliminate_comma(*replace_space(*lower_case(__subject, __obj))))

            
            subject_uri = URIRef(f"{topics_uri}{subject_trim}")
            object_uri = URIRef(f"{topics_uri}{obj_trim}")

            g.add((subject_uri, RDF.type, SKOS.Concept))
            g.add((object_uri, RDF.type, SKOS.Concept))

            g.add((subject_uri, RDFS.label, Literal(__subject)))
            g.add((object_uri, RDFS.label, Literal(__obj)))

            if predicate == 'BT':
                g.add((subject_uri, SKOS.broader, object_uri))
            elif predicate == 'NT':
                g.add((subject_uri, SKOS.narrower, object_uri))
            elif predicate == 'RT':
                g.add((subject_uri, SKOS.related, object_uri))
            elif predicate == 'USE':
                g.add((subject_uri, SKOS.prefLabel, object_uri))
            elif predicate == 'UF':
                g.add((subject_uri, SKOS.altLabel, object_uri))
                g.add((object_uri, SKOS.prefLabel, subject_uri))
            else:
                pass

    return g

In [5]:
thesaurus_csv_path = './source/cleaned_ieee_thesaurus_2023.csv'

In [6]:
graph = ieee_thesaurus_rdf(thesaurus_csv_path)

In [2]:
print('--- Printing RDF Versions ---')
graph.serialize(destination='./rdf/ieee-thesaurus.xml', format='xml')
graph.serialize(destination='./rdf/ieee-thesaurus.ttl', format='turtle')
graph.serialize(destination='./rdf/ieee-thesaurus.nt', format='nt')

--- Printing RDF Versions ---


In [9]:
# graph.serialize(destination='./rdf/ieee-thesaurus.jsonld', format='json-ld', indent=4)