# HowLongToBeat RDF Creator

We load the generated CSV files and we serialize all the data into ***turtle format  (TTL)*** relying on ***RDFLib*** Python library.

## Setup

We import all the necessary libraries and we set the paths to the input/output files. In particular, we create a TTL file for each type of data.

In [34]:
# Imports
import os
from datetime import datetime
from pathlib import Path

import pandas as pd
# Load the required libraries
from rdflib import Graph, Literal, RDF, URIRef, Namespace, RDFS
# RDFLib knows about some namespaces, like XSD
from rdflib.namespace import XSD

In [35]:
absPath = str(Path(os.path.abspath(os.getcwd())).absolute())
datasetsPath = os.path.join(absPath, "cleaned_datasets")
rawdatasetsPath = os.path.join(absPath, "raw_datasets")
rdfPath = os.path.join(absPath, "rdf")

# Create dataset directory if not exists
if not os.path.exists(datasetsPath):
    os.mkdir(datasetsPath)

# Create RDF directory if not exists
if not os.path.exists(rdfPath):
    os.mkdir(rdfPath)

# Setup datasets paths
gamesPath = os.path.join(datasetsPath, "games_cleaned.csv")
vgchartzPath = os.path.join(datasetsPath, "vgchartz_cleaned.csv")
indiegamesdevelopersPath = os.path.join(datasetsPath, "indiegamesdevelopers_cleaned_seriesExplode.csv")
platformsPath = os.path.join(datasetsPath, "platforms.csv")
videoGameDevelopersPath = os.path.join(datasetsPath, "videogamesdevelopers_cleaned_seriesexplode.csv")
completionTimePath = os.path.join(datasetsPath, "completion_time.csv")

# Setup raw datasets
rawVGChartsPath = os.path.join(rawdatasetsPath, "vgchartz-7_7_2020.csv")
rawGamesPath = os.path.join(rawdatasetsPath, "games.csv")
rawCountriesRegionsPath = os.path.join(rawdatasetsPath, 'countries-regions.csv')

# Setup Turtle paths
genresTTLPath = os.path.join(rdfPath, "genres.ttl")
gamesTTLPath = os.path.join(rdfPath, "games.ttl")
companiesTTLPath = os.path.join(rdfPath, "companies.ttl")
platformsTTLPath = os.path.join(rdfPath, "platforms.ttl")
platformsSalesTTLPath = os.path.join(rdfPath, "platformsSales.ttl")

videoGameDevelopersTTLPath = os.path.join(rdfPath, "videoGameDevelopers.ttl")
statsTTLPath = os.path.join(rdfPath, "stats.ttl")
gameSalesTTLPath = os.path.join(rdfPath, "gameSales.ttl")
regionsTTLPath = os.path.join(rdfPath, "regions.ttl")
countriesTTLPath = os.path.join(rdfPath, "countries.ttl")


In [36]:
# Country Ontology
CNS = Namespace("http://eulersharp.sourceforge.net/2003/03swap/countries#")

# HLTB Ontology
HLTB = Namespace("http://www.dei.unipd.it/database2/HLTB-db2unipd#")

In [37]:
def createGraph():
    # Create the graph
    g = Graph()

    # Bind the namespaces to a prefix for more readable output
    g.bind("xsd", XSD)
    g.bind("countries", CNS)
    g.bind("hltb", HLTB)

    return g


#create game URI
def createGameID(title):
    # Replace all special chars with "-"
    gameID = ""
    for char in title:
        if char.isalnum():
            gameID += char
        elif len(gameID) > 0 and gameID[-1] != '-':
            gameID += '-'
    return gameID.strip('-').lower()


#Create genre URI
def setGenreID(genre):
    #first half is the original genres, second half are processed and lowercase
    genre = str(genre).replace("/", ", ").replace("nan", "")
    genre = genre.split(", ")
    list = []
    for i in range(len(genre)):
        list.append([])
        list[i].append(genre[i])
        genreID = ""
        for t in genre[i]:
            if t.isalnum():
                genreID = genreID + t
            else:
                genreID = genreID + "-"

        list[i].append(genreID.lower().strip("-"))
    return list


def setPlatformID(platform):
    platformID = ''
    for i in platform:
        if i.isalnum():
            platformID = platformID + i
        elif len(platformID) > 0 and platformID[-1] != '-':
            platformID = platformID + "-"
    return platformID.strip('-').lower()


def setCompanyID(company):
    companyID = ''
    for i in company:
        if i.isalnum():
            companyID = companyID + i
        elif len(companyID) > 0 and companyID[-1] != '-':
            companyID = companyID + "-"
    return companyID.strip('-').lower()


def getCountry2Digits(country):
    rawCountriesRegions = pd.read_csv(rawCountriesRegionsPath, sep=",", index_col='name')
    if 'namibia' == country.lower():
        return 'na'
    if 'united states' == country.lower():
        return 'us'
    if 'united kingdom' in country.lower() or 'england' in country.lower():
        return 'gb'
    if country in rawCountriesRegions.index:
        return rawCountriesRegions[rawCountriesRegions.index == country]['alpha-2'].values[0].lower()
    elif country.lower() in rawCountriesRegions['alpha-2']:
        return country
    return ''

## Serialization

We serialize the data according to the following workflow:

1. Load the CSV file and iterate through it, merging them if needed.
2. Create a unique ID by ourselves based on the name of the class.
3. Add the node to the graph using the unique ID.
4. Add all the data properties.
5. Add all the object properties.
6. Serialize the data and save them into a TTL file.

## Games

Now serializing the Game class

In [38]:
# Create Graph
g = createGraph()

In [39]:
# Load the CSV files in memory
games = pd.read_csv(gamesPath, sep=",", index_col="title")
vgchartz = pd.read_csv(vgchartzPath, sep=",")
platforms = pd.read_csv(platformsPath, sep=",")

merged = pd.merge(vgchartz, platforms, left_on='console', right_on='Acronym', how='left')

In [40]:
# Iterate over the games
for title, row in games.iterrows():
    # Create gameID from its title
    gameID = createGameID(title)

    # Create the node to add to the Graph
    Game = URIRef(HLTB[gameID])

    # Add triples using store's add() method.
    g.add((Game, RDF.type, HLTB.Game))

    # Add the title of the game
    g.add((Game, HLTB["officialName"], Literal(title, datatype=XSD.string)))

    # Add multiplayer focus
    g.add((Game, HLTB["multiplayerFocus"],
           Literal(pd.notnull(row["coop"]) or pd.notnull(row["versus"]), datatype=XSD.boolean)))

    #Add hltb id
    g.add((Game, HLTB["id"], Literal(row["id"], datatype=XSD.int)))

    #Add hasGenre object property
    for iterator in setGenreID(row["genres"]):
        if pd.notnull(iterator[1]) and iterator[1] != '':
            g.add((Game, HLTB["hasGenre"], URIRef(HLTB[iterator[1]])))

    #Add developers
    if pd.notna(row['developers']):
        for iterator in row['developers'].split(', '):
            get_string = lambda s: s.split('(')[0]
            g.add((Game, HLTB['developedBy'], HLTB[setCompanyID(get_string(iterator))]))

    #Add publishers
    if pd.notna(row['publishers']):
        for iterator in row['publishers'].split(', '):
            get_string = lambda s: s.split('(')[0]
            g.add((Game, HLTB['publishedBy'], HLTB[setCompanyID(get_string(iterator))]))

    #Add platform availability
    if pd.notna(row["platforms"]):
        for platform in row["platforms"].split(", "):
            g.add((Game, HLTB["releasedOn"], URIRef(HLTB[setPlatformID(platform)])))

            #Add Stats object property
            g.add((Game, HLTB["hasStats"],
                   URIRef(HLTB["stats-" + str(createGameID(title)) + "___" + str(setPlatformID(platform))])))

            # Add Sales object property
            game = merged.loc[(merged['title'] == title) & (merged['Platform'] == platform)]
            if not game.empty:
                if pd.notna(game["pal_sales"].iloc[0]):
                    GameSalesID = URIRef(
                        HLTB["sales-" + str(createGameID(title)) + "___" + str(setPlatformID(platform)) + "___" + "eu"])
                    g.add((Game, HLTB["sold"], GameSalesID))
                if pd.notna(game["na_sales"].iloc[0]):
                    GameSalesID = URIRef(
                        HLTB["sales-" + str(createGameID(title)) + "___" + str(setPlatformID(platform)) + "___" + "na"])
                    g.add((Game, HLTB["sold"], GameSalesID))
                if pd.notna(game["jp_sales"].iloc[0]):
                    GameSalesID = URIRef(
                        HLTB["sales-" + str(createGameID(title)) + "___" + str(setPlatformID(platform)) + "___" + "jp"])
                    g.add((Game, HLTB["sold"], GameSalesID))
                if pd.notna(game["other_sales"].iloc[0]):
                    GameSalesID = URIRef(HLTB["sales-" + str(createGameID(title)) + "___" + str(
                        setPlatformID(platform)) + "___" + "other"])
                    g.add((Game, HLTB["sold"], GameSalesID))
                if pd.notna(game["total_shipped"].iloc[0]):
                    GameSalesID = URIRef(HLTB["sales-" + str(createGameID(title)) + "___" + str(
                        setPlatformID(platform)) + "___" + "global"])
                    g.add((Game, HLTB["sold"], GameSalesID))

In [41]:
# Save the data in the Turtle format
with open(gamesTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

print("Saved games TTL file.")

Saved games TTL file.


## Genre

Now serializing the Genre class

In [42]:
# Create Graph
g = createGraph()

# Load the CSV files in memory
genres = pd.read_csv(gamesPath, sep=",", index_col="genres")

In [43]:
for genre, row in genres.iterrows():
    allGenres = setGenreID(genre)
    for iterator in allGenres:
        if not (iterator[0] == " " or iterator[0] == ""):
            Genre = URIRef(HLTB[iterator[1]])
            #Add triples using store's add() method.
            g.add((Genre, RDF.type, HLTB.Genre))
            # Add the name of the genre
            g.add((Genre, HLTB["name"], Literal(iterator[0], datatype=XSD.string)))

In [44]:
# Save genre data in the Turtle format
with open(genresTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

## Platforms

In [45]:
# Create Graph
g = createGraph()

# Load the CSV files in memory
platforms = pd.read_csv(platformsPath, sep=",", index_col="Platform")

In [46]:
manufacturerDict = {}

for platform, row in platforms.iterrows():
    Platform = URIRef(HLTB[setPlatformID(platform)])
    #Add triples using store's add() method.
    g.add((Platform, RDF.type, HLTB.Platform))
    # Add the name of the genre
    g.add((Platform, HLTB["officialName"], Literal(platform, datatype=XSD.string)))

    #Add popularity if platform is popular
    if pd.notna(row["Popular"]):
        g.add((Platform, HLTB["popular"], Literal(True, datatype=XSD.boolean)))

    #Add release date if present
    if pd.notna(row["Release date"]):
        time = datetime.combine(datetime.strptime(row["Release date"], '%Y-%M-%d'), datetime.min.time())
        g.add((Platform, HLTB["releaseDate"], Literal(time, datatype=XSD.dateTime)))

    #Add CPU information
    if pd.notna(row["CPU"]):
        g.add((Platform, HLTB["cpu"], Literal(row["CPU"], datatype=XSD.string)))

    #Add CPU bit information
    if pd.notna(row["\"Bits\""]):
        bits = row["\"Bits\""].split("-")[0]
        g.add((Platform, HLTB["bits"], Literal(bits, datatype=XSD.int)))

    #Add acronym information
    if pd.notna(row["Acronym"]):
        g.add((Platform, HLTB["acronym"], Literal(row["Acronym"], datatype=XSD.string)))

    # add the manufacturer name
    if pd.notna(row["Manufacturer"]) or pd.notnull(row["Manufacturer"]):
        manufacturerStr = row["Manufacturer"].strip()
        if ',' in manufacturerStr:
            manufacturerStr = manufacturerStr.replace(',', '/')
        manufacturerSplit = manufacturerStr.split('/')
        for elem in manufacturerSplit:
            elem = elem.strip()
            manufacturerName = ''
            manufacturerCountry = ''
            if '(' in elem:
                elemSplit = elem.split('(')
                manufacturerName = elemSplit[0].strip()
                manufacturerCountry = elemSplit[1].strip()
                if not manufacturerName[-1].isalnum():
                    manufacturerName = manufacturerName[:-1]
                if manufacturerCountry[-1] == ')':
                    manufacturerCountry = manufacturerCountry[:-1]
                manufacturerCountry = ''.join(c for c in manufacturerCountry if c.isalnum() or c == ' ')
                if manufacturerCountry.lower() == 'south korea':
                    manufacturerCountry = 'Korea, Republic of'
                elif manufacturerCountry.lower() == 'us':
                    manufacturerCountry = 'United States of America'
            else:
                manufacturerName = elem
                if manufacturerName == 'Panasonic' or manufacturerName == 'Sega':
                    manufacturerCountry = 'Japan'

            manufacturerDict[manufacturerName] = manufacturerCountry
            if manufacturerName != '':
                # get manufacturer URI
                manufacturerURI = URIRef(HLTB[setCompanyID(manufacturerName)])

                # Add platform's manufacturer
                g.add((Platform, HLTB["createdBy"], manufacturerURI))

In [47]:
# Save data in the Turtle format
with open(platformsTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

## Stats


In [48]:
# Create Graph
g = createGraph()

# Load the CSV files in memory
games = pd.read_csv(gamesPath, sep=",")
vgchartz = pd.read_csv(vgchartzPath, sep=",")
completionTime = pd.read_csv(completionTimePath, sep=",")
platforms = pd.read_csv(platformsPath, sep=",")

# Create merged datasets
merged_temp = pd.merge(games, completionTime, left_on='id', right_on='gameID')
merged_temp1 = pd.merge(vgchartz, platforms, left_on='console', right_on='Acronym', how='left')
merged = pd.merge(merged_temp, merged_temp1, left_on=["title", "platform"], right_on=["title", "Platform"], how='left')

In [49]:
for id, row in merged.iterrows():
    if pd.notna(row["title"]) and pd.notna(row["platform"]):
        StatsID = URIRef(HLTB[createGameID("stats-" + str(createGameID(row["title"]))) + "___" + str(
            setPlatformID(row["platform"]))])

        #Adding node type
        g.add((StatsID, HLTB.Type, HLTB.Stats))

        #Add Time information
        if pd.notna(row["count_comp"]) and int(row["count_comp"]) > 1:
            g.add((StatsID, HLTB["polledTime"], Literal(int(row["count_comp"]), datatype=XSD.int)))
            g.add((StatsID, HLTB["mainTime"], Literal(int(row["comp_main"]), datatype=XSD.int)))
            g.add((StatsID, HLTB["mainPlusTime"], Literal(int(row["comp_plus"]), datatype=XSD.int)))
            g.add((StatsID, HLTB["completionistTime"], Literal(int(row["comp_100"]), datatype=XSD.int)))
            g.add((StatsID, HLTB["slowestTime"], Literal(int(row["comp_high"]), datatype=XSD.int)))
            g.add((StatsID, HLTB["fastestTime"], Literal(int(row["comp_low"]), datatype=XSD.int)))

        #Add remaining stats
        if pd.notna(row["critic_score"]) and (float(row["critic_score"]) > 0.1):
            g.add((StatsID, HLTB["criticScore"], Literal(row["critic_score"], datatype=XSD.float)))
        if pd.notna(row["user_score"]) and (float(row["user_score"]) > 0.1):
            g.add((StatsID, HLTB["userScore"], Literal(row["user_score"], datatype=XSD.float)))

        if pd.notna(row["release_date"]):
            time = datetime.combine(datetime.strptime(row["release_date"], '%Y-%M-%d'), datetime.min.time())
            g.add((StatsID, HLTB["releaseDate"], Literal(time, datatype=XSD.dateTime)))

        # Add "onPlatform" object property
        g.add((StatsID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["platform"])])))

In [50]:
# Save the data in the Turtle format
with open(statsTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

print("Saved stats TTL file.")

Saved stats TTL file.


## Game sales

In [51]:
# Create Graph
g = createGraph()

# Load the CSV files in memory
vgchartz = pd.read_csv(vgchartzPath, sep=",")
platforms = pd.read_csv(platformsPath, sep=",")

# Create merged datasets
merged = pd.merge(vgchartz, platforms, left_on='console', right_on='Acronym', how='left')

In [52]:
for id, row in merged.iterrows():
    if pd.notna(row["Platform"]):

        if pd.notna(row["pal_sales"]) and (row["pal_sales"] > 0):
            GameSalesID = URIRef(HLTB["sales-" + str(createGameID(row["title"])) + "___" + str(
                setPlatformID(row["Platform"])) + "___" + "eu"])
            g.add((GameSalesID, HLTB.Type, HLTB.Sale))
            g.add((GameSalesID, HLTB["unitsSold"], Literal(row["pal_sales"], datatype=XSD.float)))
            g.add((GameSalesID, HLTB["locatedIn"], URIRef(HLTB["eu"])))
            g.add((GameSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["jp_sales"]) and (row["jp_sales"] > 0):
            GameSalesID = URIRef(HLTB["sales-" + str(createGameID(row["title"])) + "___" + str(
                setPlatformID(row["Platform"])) + "___" + "jp"])
            g.add((GameSalesID, HLTB.Type, HLTB.Sale))
            g.add((GameSalesID, HLTB["unitsSold"], Literal(row["jp_sales"], datatype=XSD.float)))
            g.add((GameSalesID, HLTB["locatedIn"], URIRef(HLTB["jp"])))
            g.add((GameSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["na_sales"]) and (row["na_sales"] > 0):
            GameSalesID = URIRef(HLTB["sales-" + str(createGameID(row["title"])) + "___" + str(
                setPlatformID(row["Platform"])) + "___" + "na"])
            g.add((GameSalesID, HLTB.Type, HLTB.Sale))
            g.add((GameSalesID, HLTB["unitsSold"], Literal(row["na_sales"], datatype=XSD.float)))
            g.add((GameSalesID, HLTB["locatedIn"], URIRef(HLTB["na"])))
            g.add((GameSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["other_sales"]) and (row["other_sales"] > 0):
            GameSalesID = URIRef(HLTB["sales-" + str(createGameID(row["title"])) + "___" + str(
                setPlatformID(row["Platform"])) + "___" + "other"])
            g.add((GameSalesID, HLTB.Type, HLTB.Sale))
            g.add((GameSalesID, HLTB["unitsSold"], Literal(row["other_sales"], datatype=XSD.float)))
            g.add((GameSalesID, HLTB["locatedIn"], URIRef(HLTB["other"])))
            g.add((GameSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["total_shipped"]) and (row["total_shipped"] > 0):
            GameSalesID = URIRef(HLTB["sales-" + str(createGameID(row["title"])) + "___" + str(
                setPlatformID(row["Platform"])) + "___" + "global"])
            g.add((GameSalesID, HLTB.Type, HLTB.Sale))
            g.add((GameSalesID, HLTB["unitsSold"], Literal(row["total_shipped"], datatype=XSD.float)))
            g.add((GameSalesID, HLTB["locatedIn"], URIRef(HLTB["global"])))
            g.add((GameSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

In [53]:
# Save the data in the Turtle format
with open(gameSalesTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

print("Saved game-sales TTL file.")

Saved game-sales TTL file.


## Companies

### indie-game-developers

In [54]:
# Create Graph
g = createGraph()

# Load the CSV files in memory
indiegamesdevelopers = pd.read_csv(indiegamesdevelopersPath, sep=",")

In [55]:
for index, row in indiegamesdevelopers.iterrows():
    # Create the node to add to the Graph
    Company = URIRef(HLTB[setCompanyID(row['Developer'])])

    # Add triples using store's add() method.
    g.add((Company, RDF.type, HLTB.Company))

    # Add the Company
    g.add((Company, HLTB["indieDeveloper"], Literal(pd.notnull(row['Developer']), datatype=XSD.boolean)))
    g.add((Company, HLTB['officialName'], Literal(row['Developer'], datatype=XSD.string)))

    # Add the company's country
    code = getCountry2Digits(row['Country'])
    if code != '':
        g.add((Company, HLTB['basedIn'], CNS[code]))

    # Add the notable games
    if pd.notnull(row['selected_titles_series']) or pd.notna(row['selected_titles_series']):
        g.add((Company, HLTB['hasNotableGame'], HLTB[createGameID(row['selected_titles_series'])]))

### Video-game-developers

In [56]:
# Create merged datasets
videoGameDevelopers = pd.read_csv(videoGameDevelopersPath, sep=",")

In [57]:
for index, row in videoGameDevelopers.iterrows():
    # Create the node to add to the Graph
    Company = URIRef(HLTB[setCompanyID(row['Developer'])])

    # Add triples using store's add() method.
    g.add((Company, RDF.type, HLTB.Company))

    # Add the Company
    g.add((Company, HLTB['officialName'], Literal(row['Developer'], datatype=XSD.string)))

    # Add the country
    code = getCountry2Digits(row['Country'])
    if code != '':
        g.add((Company, HLTB['basedIn'], CNS[code]))

    # Add the notable games
    if pd.notnull(row['selected_titles_series']) or pd.notna(row['selected_titles_series']):
        g.add((Company, HLTB['hasNotableGame'], HLTB[createGameID(row['selected_titles_series'])]))

## Games

In [58]:
# Iterate over the games
for title, row in games.iterrows():
    if pd.notna(row['developers']):
        for iterator in row['developers'].split(', '):
            get_string = lambda s: s.split('(')[0]
            # Add company
            Company = URIRef(HLTB[setCompanyID(get_string(iterator))])
            g.add((Company, RDF.type, HLTB.Company))

    #Add publishers
    if pd.notna(row['publishers']):
        for iterator in row['publishers'].split(', '):
            get_string = lambda s: s.split('(')[0]
            # Add company
            Company = URIRef(HLTB[setCompanyID(get_string(iterator))])
            g.add((Company, RDF.type, HLTB.Company))

### Manufacturers

In [59]:
for manufacturer, country in manufacturerDict.items():
    manufacturerURI = URIRef(HLTB[setCompanyID(manufacturer)])
    g.add((manufacturerURI, RDF.type, HLTB.Company))
    g.add((manufacturerURI, HLTB['officialName'], Literal(manufacturer, datatype=XSD.string)))
    # Add manufacturer's country
    g.add((manufacturerURI, HLTB['basedIn'], CNS[getCountry2Digits(country)]))

In [60]:
# Save the data in the Turtle format
with open(companiesTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

print("Saved company TTL file.")

Saved company TTL file.


## Country

In [61]:
# Create Graph
g = createGraph()

# Create merged datasets
rawCountriesRegions = pd.read_csv(rawCountriesRegionsPath, sep=",", index_col='name')

In [62]:
countriesRegions = rawCountriesRegions.replace(to_replace=['Asia', 'Europe', 'Africa', 'Oceania', 'Americas', 'NaN'],
                                               value=['other', 'eu', 'other', 'other', 'other', 'other'])
northAmericaCountries = ['ai', 'ag', 'aw', 'ag', 'aw', 'bs', 'bb', 'bz', 'bm', 'bq', 'vg', 'ca', 'ky', 'cr', 'cu', 'cw',
                         'dm', 'do', 'sv', 've', 'gl', 'gd', 'gp', 'gt', 'ht', 'hn', 'jm', 'mq', 'mx', 'ms', 'ni', 'pa',
                         'pr', 'bl', 'kn', 'lc', 'mf', 'pm', 'vc', 'sx', 'tt', 'tc', 'us', 'vi']
for index, row in countriesRegions.iterrows():
    code = getCountry2Digits(index)
    region = row['region']
    if code == 'jp':
        region = 'jp'
    elif code == 'aq':
        region = 'other'
    elif code in northAmericaCountries:
        region = 'na'
    g.add((CNS[code], HLTB['locatedIn'], HLTB[region]))

In [63]:
# Save the data in the Turtle format
with open(countriesTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

print("Saved Countries TTL file.")

Saved Countries TTL file.


## Platform sales

In [64]:
g = createGraph()
platforms = pd.read_csv(platformsPath, sep=",")

In [65]:
for id, row in platforms.iterrows():
    if pd.notna(row["Platform"]):

        if pd.notna(row["Europe"]) and (row["Europe"] > 0):
            PlatformSalesID = URIRef(HLTB["sales-" + str(setPlatformID(row["Platform"])) + "___" + "eu"])
            g.add((PlatformSalesID, HLTB.Type, HLTB.Sale))
            g.add((PlatformSalesID, HLTB["unitsSold"], Literal(row["Europe"], datatype=XSD.float)))
            g.add((PlatformSalesID, HLTB["locatedIn"], URIRef(HLTB["eu"])))
            g.add((PlatformSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["Japan"]) and (row["Japan"] > 0):
            PlatformSalesID = URIRef(HLTB["sales-" + str(setPlatformID(row["Platform"])) + "___" + "jp"])

            g.add((PlatformSalesID, HLTB.Type, HLTB.Sale))
            g.add((PlatformSalesID, HLTB["unitsSold"], Literal(row["Japan"], datatype=XSD.float)))
            g.add((PlatformSalesID, HLTB["locatedIn"], URIRef(HLTB["jp"])))
            g.add((PlatformSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["North America"]) and (row["Japan"] > 0):
            PlatformSalesID = URIRef(HLTB["sales-" + str(setPlatformID(row["Platform"])) + "___" + "na"])

            g.add((PlatformSalesID, HLTB.Type, HLTB.Sale))
            g.add((PlatformSalesID, HLTB["unitsSold"], Literal(row["North America"], datatype=XSD.float)))
            g.add((PlatformSalesID, HLTB["locatedIn"], URIRef(HLTB["na"])))
            g.add((PlatformSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["Rest of World"]) and (row["Rest of World"] > 0):
            PlatformSalesID = URIRef(HLTB["sales-" + str(setPlatformID(row["Platform"])) + "___" + "other"])

            g.add((PlatformSalesID, HLTB.Type, HLTB.Sale))
            g.add((PlatformSalesID, HLTB["unitsSold"], Literal(row["Rest of World"], datatype=XSD.float)))
            g.add((PlatformSalesID, HLTB["locatedIn"], URIRef(HLTB["other"])))
            g.add((PlatformSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

        if pd.notna(row["Global"]) and (row["Global"] > 0):
            PlatformSalesID = URIRef(HLTB["sales-" + str(setPlatformID(row["Platform"])) + "___" + "global"])
            g.add((PlatformSalesID, HLTB.Type, HLTB.Sale))
            g.add((PlatformSalesID, HLTB["unitsSold"], Literal(row["Global"], datatype=XSD.float)))
            g.add((PlatformSalesID, HLTB["locatedIn"], URIRef(HLTB["global"])))
            g.add((PlatformSalesID, HLTB["onPlatform"], URIRef(HLTB[setPlatformID(row["Platform"])])))

In [66]:
# Save the data in the Turtle format
with open(platformsSalesTTLPath, "w", encoding="utf-8") as fp:
    fp.write(g.serialize(format="turtle"))

print("Saved platforms Sales TTL file.")

Saved platforms Sales TTL file.
