In [63]:
#installing rdflib and pandas
#!pip install rdflib
#!pip install pandas

In [64]:
#importing libraries
from rdflib import Graph, Literal, Namespace, URIRef
from rdflib.namespace import DCTERMS, RDF, RDFS, SKOS, XSD
import pandas as pd
import urllib

In [65]:
#generating a graph
result_graph = Graph()

#creating namespaces and prefixes in rdf
VOID = Namespace("http://rdfs.org/ns/void#")
DCTERMS = Namespace("http://purl.org/dc/terms/")
DC = Namespace("http://purl.org/dc/elements/1.1/")
OT = Namespace("http://wallscope.co.uk/resource/olympics/team/")
DBO = Namespace("http://dbpedia.org/ontology/")
DBP = Namespace("http://dbpedia.org/property/")
OA = Namespace("http://wallscope.co.uk/resource/olympics/athlete/")
OC = Namespace("http://wallscope.co.uk/resource/olympics/city/")
O = Namespace("http://wallscope.co.uk/resource/olympics/")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
RDFS = Namespace("http://www.w3.org/2000/01/rdf-schema#")
XSD = Namespace("http://www.w3.org/2001/XMLSchema#")
SKOS = Namespace("http://www.w3.org/2004/02/skos/core#")

result_graph.bind("void", VOID)
result_graph.bind("dcterms", DCTERMS)
result_graph.bind("dc", DC)
result_graph.bind("ot", OT)
result_graph.bind("dbo", DBO)
result_graph.bind("dbp", DBP)
result_graph.bind("oa", OA)
result_graph.bind("oc", OC)
result_graph.bind("o", O)
result_graph.bind("foaf", FOAF)
result_graph.bind("rdfs", RDFS)
result_graph.bind("xsd", XSD)
result_graph.bind("skos", SKOS)

BASE_URL = "http://wallscope.co.uk/resource/olympics/athlete/" 

In [66]:
#loading the CSV and printing it
df = pd.read_csv("data/athlete_events_smaller.csv", encoding="utf8") 

In [67]:
#making sure the URIs are valid
def normalizeAndEncodeString(string):
  string = str(string).replace(" ", "")
  string = urllib.parse.quote(string)
  return string

In [68]:
#creating teams like ot:Netherlands a dbo:SportsTeam with a rdfs:label
#adding @en as data literal using the language parameter  
for team in df['Team'].unique():
  try:
      team_uri = URIRef("http://wallscope.co.uk/resource/olympics/team/" + normalizeAndEncodeString(team))
      result_graph.add((team_uri, RDF.type, DBO.SportsTeam))
      result_graph.add((team_uri , RDFS.label, Literal(team, lang='en') ))
  except:
      print("Log: failed to create a team_uri")
        
#creating cities like oc:SquawValley a dbo:City with rdfs:label
#using str(city).replace(" ", "")), because whitespaces in city names like "Squaw Valley" would lead to invalid URIs  
for city in df['City'].unique():
  city_uri = URIRef("http://wallscope.co.uk/resource/olympics/city/" + normalizeAndEncodeString(city) )
  result_graph.add((city_uri, RDF.type, DBO.City))
  # add @en to data literal via lang param
  result_graph.add((city_uri , RDFS.label, Literal(city, lang='en') ))

#creating sports like o:SpeedSkating a dbp:Sport
for sport in df['Sport'].unique():
  sport_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + normalizeAndEncodeString(sport) )
  result_graph.add((sport_uri, RDF.type, DBO.Sport))

#creating seasons like o:Summer a dbo:TimePeriod
for season in df['Season'].unique():
  season_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + normalizeAndEncodeString(season) )
  result_graph.add((season_uri, RDF.type, DBO.TimePeriod))
  result_graph.add((season_uri , RDFS.label, Literal(season, lang='en') ))

#creating disciplines like o:SpeedSkatingWomen500metres a o:Discipline .
for discipline in df['Event'].unique():
  # just for having a valid url; maybe not the best idea ;)
  discipline_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + normalizeAndEncodeString(discipline) )
  result_graph.add((discipline_uri, RDF.type, O.Discipline))
  result_graph.add((discipline_uri , RDFS.label, Literal(discipline, lang='en') ))

#adding Male and Female as skos:Concept
female = URIRef("http://wallscope.co.uk/resource/olympics/F")
male = URIRef("http://wallscope.co.uk/resource/olympics/M")
result_graph.add(( female , RDF.type, SKOS.Concept))
result_graph.add(( female , RDFS.label, Literal("female", lang='en') ))
result_graph.add(( female , RDFS.label, Literal("weiblich", lang='de') ))
result_graph.add(( male, RDF.type, SKOS.Concept))
result_graph.add(( male , RDFS.label, Literal("male", lang='en') ))
result_graph.add(( male , RDFS.label, Literal("männlich", lang='de') ))

<Graph identifier=Nca0fa051254b4c599fb99fa3b28efa6e (<class 'rdflib.graph.Graph'>)>

In [69]:
#making sure the URIs are valid
def normalizeAndEncodeString(string):
  string = str(string).replace(" ", "")
  string = urllib.parse.quote(string)
  return string

#creating teams like ot:Netherlands a dbo:SportsTeam with a rdfs:label
#adding @en as data literal using the language parameter  
for team in df['Team'].unique():
  try:
      team_uri = URIRef("http://wallscope.co.uk/resource/olympics/team/" + normalizeAndEncodeString(team))
      result_graph.add((team_uri, RDF.type, DBO.SportsTeam))
      result_graph.add((team_uri , RDFS.label, Literal(team, lang='en') ))
  except:
      print("Log: failed to create a team_uri")    

#creating cities like oc:SquawValley a dbo:City with rdfs:label
#using str(city).replace(" ", "")), because whitespaces in city names like "Squaw Valley" would lead to invalid URIs  
for city in df['City'].unique():
  city_uri = URIRef("http://wallscope.co.uk/resource/olympics/city/" + normalizeAndEncodeString(city) )
  result_graph.add((city_uri, RDF.type, DBO.City))
  # add @en to data literal via lang param
  result_graph.add((city_uri , RDFS.label, Literal(city, lang='en') ))

#creating sports like o:SpeedSkating a dbp:Sport
for sport in df['Sport'].unique():
  sport_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + normalizeAndEncodeString(sport) )
  result_graph.add((sport_uri, RDF.type, DBO.Sport))

#creating seasons like o:Summer a dbo:TimePeriod
for season in df['Season'].unique():
  season_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + normalizeAndEncodeString(season) )
  result_graph.add((season_uri, RDF.type, DBO.TimePeriod))
  result_graph.add((season_uri , RDFS.label, Literal(season, lang='en') ))

#creating disciplines like o:SpeedSkatingWomen500metres a o:Discipline .
for discipline in df['Event'].unique():
  # just for having a valid url; maybe not the best idea ;)
  discipline_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + normalizeAndEncodeString(discipline) )
  result_graph.add((discipline_uri, RDF.type, O.Discipline))
  result_graph.add((discipline_uri , RDFS.label, Literal(discipline, lang='en') ))

#adding Male and Female as skos:Concept
female = URIRef("http://wallscope.co.uk/resource/olympics/F")
male = URIRef("http://wallscope.co.uk/resource/olympics/M")
result_graph.add(( female , RDF.type, SKOS.Concept))
result_graph.add(( female , RDFS.label, Literal("female", lang='en') ))
result_graph.add(( female , RDFS.label, Literal("weiblich", lang='de') ))
result_graph.add(( male, RDF.type, SKOS.Concept))
result_graph.add(( male , RDFS.label, Literal("male", lang='en') ))
result_graph.add(( male , RDFS.label, Literal("männlich", lang='de') ))

<Graph identifier=Nca0fa051254b4c599fb99fa3b28efa6e (<class 'rdflib.graph.Graph'>)>

In [70]:
#creating athletes like <ol:Athlete rdf:about="https://gams.uni-graz.at/olympia.1#9792"/>
#grouping by value in the ID-column
df_group_by_id = df.groupby('ID')
#iterating over all groups
for ID, df_group in df_group_by_id:

  #getting <http://wallscope.co.uk/resource/olympics/athlete/24> instead of oa:NilsEgilAaness 
  athlete_uri = URIRef(BASE_URL + str(ID))
  result_graph.add((athlete_uri, RDF.type, FOAF.Person))

  #iterating over all elements inside the group
  for row_index, row in df_group.iterrows():
    #names - foaf:name with a rdfs:label (foaf:name "Neil Agius")
    if(row["Name"]):
      result_graph.add(( athlete_uri, RDFS.label, Literal(row["Name"]) ))
      result_graph.add(( athlete_uri, FOAF.name, Literal(row["Name"]) ))  
    #age - foaf:age 17, Weight - dbp:weight 65, Height - dbp:height 169; only integers
    if(row["Age"] and (type(row["Age"]) == int or type(row["Age"]) == float)):
      #integer is the default datatype for number, adding this with datatype=XSD.integer or datatype=XSD.float to the literal
      # check if not nan
      if(row["Age"] > 1):
        result_graph.add(( athlete_uri, FOAF.age, Literal(row["Age"] , datatype=XSD.integer) ))
    if(row["Height"].is_integer()):
      result_graph.add(( athlete_uri, DBP.height, Literal(row["Height"] ) ))
    if(row["Weight"].is_integer()):
      result_graph.add(( athlete_uri, DBP.weight, Literal(row["Weight"] ) ))

    #sex - foaf:gender o:F
    if(row["Sex"] == "F"):
      result_graph.add((athlete_uri, FOAF.gender, URIRef( female ) ))
    if(row["Sex"] == "M"):
      result_graph.add((athlete_uri, FOAF.gender, URIRef( male ) ))

    #team - dbp:team ot:Netherlands
    if(row["Team"]):
      result_graph.add(( athlete_uri, DBP.team, URIRef("http://wallscope.co.uk/resource/olympics/team/" + normalizeAndEncodeString(row["Team"]) )))

    #game - o:Game o:2004Summer with rfd:label (combining it with city, and season)
    if(row["Games"]):
      game_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + str(row["Games"]).replace(" ", ""))
      result_graph.add((game_uri, RDF.type, DBO.Olympics))
      #adding @en to data literal via lang param
      result_graph.add((game_uri , RDFS.label, Literal( row["Games"], lang='en' ) ))
      result_graph.add((game_uri , O.city, URIRef("http://wallscope.co.uk/resource/olympics/city/" + str(row["City"]).replace(" ", "")) ))
      result_graph.add((game_uri , O.season, URIRef("http://wallscope.co.uk/resource/olympics/" + str(row["Season"]).replace(" ", "")) ))

    #sport - o:NordicCombined a dbo:Sport 
    #discipline - :NordicCombinedMen%27sTeam a o:Discipline with rdfs:label
    discipline_string = urllib.parse.quote(str(row["Event"]).replace(" ", ""))
    discipline_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + discipline_string )
    result_graph.add((discipline_uri , O.sport, URIRef("http://wallscope.co.uk/resource/olympics/" + str(row["Sport"]).replace(" ", "")) ))

    #results - o:result.1 a o:Result (combining it with athlete, discipline and game)
    result_uri = URIRef("http://wallscope.co.uk/resource/olympics/" + "result." + str(ID))
    result_graph.add((result_uri, RDF.type, O.Result))
    result_graph.add((result_uri, O.athlete, athlete_uri)) 
    result_graph.add((result_uri, O.discipline, discipline_uri)) 
    result_graph.add((result_uri, O.game, game_uri)) 

In [71]:
#creating an output file
result_graph.serialize(destination = "olympia_output.ttl", format="turtle")

<Graph identifier=Nca0fa051254b4c599fb99fa3b28efa6e (<class 'rdflib.graph.Graph'>)>