In [2]:
import pandas as pd
from rdflib import Graph, Literal, RDF, RDFS, URIRef, Namespace, BNode
from rdflib.namespace import XSD
import urllib.parse
from SPARQLWrapper import SPARQLWrapper, JSON

In [3]:
#CSV Files:
coursecsv = 'CSVs/data.csv'
studentcsv = 'CSVs/students.csv'
lecturescsv = 'CSVs/lecture.csv'
topicscsv = 'CSVs/topic.csv'

# TTL Files: (Generated)
uniPath = 'Data/universities.ttl'
coursePath = 'Data/courses.ttl'
studentPath = 'Data/students.ttl'
lecturesPath = 'Data/lectures.ttl'
topicsPath = 'Data/topics.ttl'

# Universities

In [4]:
g = Graph()
ex = Namespace("http://example.org/vocab/")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
dbo = Namespace("http://dbpedia.org/ontology/")
dbr = Namespace("http://dbpedia.org/resource/")

g.bind("ex", ex)
g.bind("foaf", foaf)
g.bind("rdf", rdf)
g.bind("rdfs", rdfs)
g.bind("xsd", xsd)
g.bind("dbo", dbo)
g.bind("dbr", dbr)

In [14]:
import os
print(os.getcwd())

/Users/aryansaxena/Desktop/Intelligent Systems/IS_AK_G_04


In [24]:
g.parse("Data/courses.ttl", format="turtle")

sparql = SPARQLWrapper("http://dbpedia.org/sparql")

sparql.setQuery("""
    SELECT ?university ?universityName WHERE {
        ?university rdf:type dbo:University .
        ?university dbo:country dbr:Canada .
        ?university rdfs:label ?universityName .
        FILTER (LANG(?universityName) = 'en')
    } LIMIT 100
""")
sparql.setReturnFormat(JSON)
results = sparql.query().convert()

# Manually adding concordia
concordia_uri = URIRef("http://dbpedia.org/resource/Concordia_University")
concordia_name = Literal("Concordia University", lang="en")
# Check if Concordia is already in the graph, if not, add it
if (concordia_uri, RDF.type, dbo.University) not in g:
    g.add((concordia_uri, RDF.type, dbo.University))
    g.add((concordia_uri, RDFS.label, concordia_name))

for course_uri in g.subjects(RDF.type, ex.Course):
    g.add((concordia_uri, dbo.offersCourse, course_uri))

for result in results["results"]["bindings"]:
    university_uri = URIRef(result["university"]["value"])
    university_name = Literal(result["universityName"]["value"], lang="en")
    g.add((university_uri, RDF.type, dbo.University))
    g.add((university_uri, RDFS.label, university_name))

g.serialize(destination=uniPath, format='turtle')

<Graph identifier=N5e362dace8d544bda9cfc74150b62057 (<class 'rdflib.graph.Graph'>)>

# Course Data (From Concordia Open Data):

In [21]:
g = Graph()
ex = Namespace("http://example.org/vocab/")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
g.bind("ex", ex)
g.bind("foaf", foaf)
g.bind("rdf", rdf)
g.bind("rdfs", rdfs)
g.bind("xsd", xsd)

In [22]:
catalog_df = pd.read_csv(coursecsv, encoding='utf-16')
for index, row in catalog_df.iterrows():
    course_id_str = str(row['Course ID'])
    course_uri = URIRef(f"http://example.org/vocab/course/{urllib.parse.quote(course_id_str)}")
    g.add((course_uri, RDF.type, ex.Course))
    g.add((course_uri, ex.subject, Literal(row['Subject'], datatype=XSD.string)))
    g.add((course_uri, ex.number, Literal(row['Catalog'], datatype=XSD.string)))
    g.add((course_uri, ex.description, Literal(row['Long Title'], datatype=XSD.string)))
    g.add((course_uri, ex.credits, Literal(row['Class Units'], datatype=XSD.decimal)))
    if not pd.isnull(row['Pre Requisite Description']):
        g.add((course_uri, ex.preRequisiteDescription, Literal(row['Pre Requisite Description'], datatype=XSD.string)))
    if not pd.isnull(row['Equivalent Courses']):
        g.add((course_uri, ex.equivalentCourses, Literal(row['Equivalent Courses'], datatype=XSD.string)))


g.serialize(destination=coursePath, format='turtle')

<Graph identifier=N5b878fbb1efc400ebb4eb3e480c17caa (<class 'rdflib.graph.Graph'>)>

# Students

In [25]:
g = Graph()
ex = Namespace("http://example.org/vocab/")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
g.bind("ex", ex)
g.bind("foaf", foaf)
g.bind("rdf", rdf)
g.bind("rdfs", rdfs)
g.bind("xsd", xsd)

In [26]:
students_df = pd.read_csv(studentcsv)
for index, row in students_df.iterrows():
    student_uri = ex[f'student/{urllib.parse.quote(row["id_number"])}']
    g.add((student_uri, RDF.type, ex.Student))
    g.add((student_uri, foaf.name, Literal(f"{row['first_name']} {row['last_name']}", datatype=XSD.string)))
    g.add((student_uri, foaf.mbox, Literal(row['email'], datatype=XSD.string)))
    g.add((student_uri, ex.studentID, Literal(row['id_number'], datatype=XSD.string)))

    for i in range(1, 5):  
        course_uri = ex[f'course/{row[f"course_{i}_id"]}']
        grade = row[f"course_{i}_grade"]
        course_completion = BNode()
        g.add((student_uri, ex.completedCourse, course_completion))
        g.add((course_completion, RDF.type, URIRef("http://example.org/vocab/CompletedCourse")))
        g.add((course_completion, ex.course, course_uri))
        g.add((course_completion, ex.courseGrade, Literal(grade, datatype=XSD.string)))
    
    for comp_field in ['competent_in_course_id1', 'competent_in_course_id2']:
        competency_course_uri = ex[f'course/{row[comp_field]}']
        g.add((student_uri, ex.hasCompetency, competency_course_uri))


g.serialize(destination=studentPath, format='turtle')

<Graph identifier=N7c58b839ee6c49c6953885173a3af39b (<class 'rdflib.graph.Graph'>)>

# Lecture

In [5]:
g = Graph()
ex = Namespace("http://example.org/vocab/")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
g.bind("ex", ex)
g.bind("foaf", foaf)
g.bind("rdf", rdf)
g.bind("rdfs", rdfs)
g.bind("xsd", xsd)

In [6]:
lectures_df = pd.read_csv(lecturescsv)  
for index, row in lectures_df.iterrows():
    # Create a URI for the lecture based on its Identifier
    lecture_uri = URIRef(f"http://example.org/vocab/lecture/{urllib.parse.quote(str(row['Identifier']))}")
    g.add((lecture_uri, RDF.type, ex.Lecture))
    # Adding Course ID for linking with the Course with lecture of course property

    course_uri = URIRef(f"http://example.org/vocab/course/{urllib.parse.quote(str(row['CourseId']))}")
    g.add((lecture_uri, ex.lectureOfCourse, course_uri))

    # Adding the lecture name
    g.add((lecture_uri, ex.lectureName, Literal(row['Title'], datatype=XSD.string)))

    # Adding the DBPedia Link using seeAlso
    dbpedia_uri = URIRef(f"{(row['seeAlso'])}")
    g.add((lecture_uri, RDFS.seeAlso, dbpedia_uri))

    # Adding the topic Name using ex:lectureTopic a rdf:Property ;
    topic_uri = URIRef(f"http://example.org/vocab/topic/{urllib.parse.quote(row['Topic'])}")
    g.add((lecture_uri, ex.lectureTopic, topic_uri))

# Serialization path should be corrected as per your directory structure
g.serialize(destination=lecturesPath, format='turtle')

<Graph identifier=N934b42a56ba44723b5df6a6fe62642e2 (<class 'rdflib.graph.Graph'>)>

# Topics

In [62]:
g = Graph()
ex = Namespace("http://example.org/vocab/")
foaf = Namespace("http://xmlns.com/foaf/0.1/")
rdf = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
rdfs = Namespace("http://www.w3.org/2000/01/rdf-schema#")
xsd = Namespace("http://www.w3.org/2001/XMLSchema#")
g.bind("ex", ex)
g.bind("foaf", foaf)
g.bind("rdf", rdf)
g.bind("rdfs", rdfs)
g.bind("xsd", xsd)

In [65]:
topics_df = pd.read_csv(topicscsv)
for index, row in topics_df.iterrows():
    # Creating URI with Topic using TopicLink
    topic_uri = URIRef(row['Topic Link'])
    # Create URI for Course
    course_uri = URIRef(f"http://example.org/vocab/course/{row['CourseId']}")
    # Create URI for Lecture
    lecture_uri = URIRef(f"http://example.org/vocab/lecture/{row['Identifier']}")
    # Adding the Topic to the Graph
    g.add((topic_uri, RDF.type, ex.Topic))
    # Adding the Course to the Graph
    g.add((topic_uri, ex.isTopicOfCourse, course_uri))
    # Adding the Lecture to the Graph
    g.add((topic_uri, ex.isTopicOfLecture, lecture_uri))
    # Adding Entity Type
    g.add((topic_uri, ex.entityType, Literal(row['Entity Type'], datatype=XSD.string)))
    # Adding the topic name (Properly Formatting since its in between quotes, can be in multiple lines too)
    g.add((topic_uri, ex.topicName, Literal(row['Topic Name'], datatype=XSD.string)))
    # Adding what Type of Material it is using Type - Worksheet or lecture
    g.add((topic_uri, ex.materialType, Literal(row['Type'], datatype=XSD.string)))

g.serialize(destination=topicsPath, format='turtle')

<Graph identifier=N865cb0e7f8fb4230a81a65982a89f4c8 (<class 'rdflib.graph.Graph'>)>