### Import Libraries

In [78]:
import pandas as pd
import rdflib
from rdflib import FOAF
from rdflib import Namespace, Literal, Graph
from rdflib.namespace import RDF, XSD

### Scanning Files

In [79]:
df = pd.read_csv('src/nba2k_full.csv')

In [80]:
df.isnull().sum()

full_name       0
rating          0
jersey          0
team           23
position        0
b_day           0
height          0
weight          0
salary          0
country         0
draft_year      0
draft_round     0
draft_peak      0
college        76
version         0
dtype: int64

In [81]:
df.dtypes

full_name      object
rating          int64
jersey         object
team           object
position       object
b_day          object
height         object
weight         object
salary         object
country        object
draft_year      int64
draft_round    object
draft_peak     object
college        object
version        object
dtype: object

### Cleaning Data

In [82]:
df.isnull().sum()

full_name       0
rating          0
jersey          0
team           23
position        0
b_day           0
height          0
weight          0
salary          0
country         0
draft_year      0
draft_round     0
draft_peak      0
college        76
version         0
dtype: int64

### Execution

In [83]:
graph = rdflib.Graph()

In [84]:
example_namespace = Namespace("https://nba-players.org/")
nba_namespace = Namespace("https://nba-players.org/player-dataset#")

In [85]:
graph.bind("nba_namespace", nba_namespace)
graph.bind("foaf", FOAF)

In [86]:
for index, row in df.iterrows():
    player_uri = nba_namespace['player_' + str(index)]
    graph.add((player_uri, RDF.type, nba_namespace['NBAPlayer']))

    graph.add((player_uri, nba_namespace['full_name'], Literal(row['full_name'])))
    graph.add((player_uri, nba_namespace['rating'], Literal(row['rating'], datatype=XSD.integer)))
    graph.add((player_uri, nba_namespace['jersey'], Literal(row['jersey'].lstrip('#'))))
    graph.add((player_uri, nba_namespace['team'], Literal(row['team'])))
    graph.add((player_uri, nba_namespace['position'], Literal(row['position'])))
    graph.add((player_uri, nba_namespace['b_day'], Literal(row['b_day'])))

    height_data = row['height']
    height_value_meters = float(height_data.split('/')[1].strip())
    graph.add((player_uri, nba_namespace['height'], Literal(height_value_meters, datatype=XSD.float)))

    weight_data = row['weight']
    weight_value_kg = float(weight_data.split('/')[1].strip().split()[0])  # Extract weight in kilograms
    graph.add((player_uri, nba_namespace['weight'], Literal(weight_value_kg, datatype=XSD.float)))

    salary_data = row['salary']
    salary_value = float(salary_data.strip('$').replace(',', ''))
    salary_formatted = '${:,.0f}'.format(salary_value)
    graph.add((player_uri, nba_namespace['salary'], Literal(salary_formatted)))

    graph.add((player_uri, nba_namespace['country'], Literal(row['country'])))
    graph.add((player_uri, nba_namespace['draft_year'], Literal(row['draft_year'], datatype=XSD.integer)))
    graph.add((player_uri, nba_namespace['draft_round'], Literal(row['draft_round'])))
    graph.add((player_uri, nba_namespace['draft_peak'], Literal(row['draft_peak'])))

    college_data = row['college']
    if pd.isna(college_data):
        college_data = 'No College Data'
    graph.add((player_uri, nba_namespace['college'], Literal(college_data)))

    graph.add((player_uri, nba_namespace['version'], Literal(row['version'])))

graph.serialize('nba-players-2.ttl', format='turtle')

<Graph identifier=N9c8c4a9b0ad94718b9fc41995af717e5 (<class 'rdflib.graph.Graph'>)>

In [87]:
url = 'nba-players-2.ttl'
result = graph.parse(url, format='turtle')

In [88]:
def turtle_to_dot(turtle_file, dot_file):
    graph = Graph()
    graph.parse(turtle_file, format='turtle')

    with open(dot_file, "w") as f:
        f.write("digraph G {\n")
        f.write('    node [shape=box];\n')

        for s, p, o in graph:
            if p == RDF.type and o == nba_namespace.NBAPlayer:
                full_name = str(graph.value(s, nba_namespace.full_name))
                team = str(graph.value(s, nba_namespace.team))
                height = str(graph.value(s, nba_namespace.height))
                weight = str(graph.value(s, nba_namespace.weight))
                salary = str(graph.value(s, nba_namespace.salary))
                country = str(graph.value(s, nba_namespace.country))
                rating = str(graph.value(s, nba_namespace.rating))
                
                rating_number = int(rating) if rating.isdigit() else 0
                if rating_number > 80:
                    f.write(f'    "{full_name}" -> "{team}" [label="currentTeam"];\n')
                    f.write(f'    "{full_name}" -> "{height}m" [label="hasHeight"];\n')
                    f.write(f'    "{full_name}" -> "{weight}kg" [label="hasWeight"];\n')
                    f.write(f'    "{full_name}" -> "{rating}" [label="ratingNumber"];\n')
                    f.write(f'    "{full_name}" -> "{salary}" [label="annualSalary"];\n')
                    f.write(f'    "{full_name}" -> "{country}" [label="from"];\n')

        f.write("}")
# Replace "input.ttl" with the path to your Turtle file
turtle_to_dot("nba-players-2.ttl", "docs/output_final_rating_system.dot")

In [89]:
from graphviz import Graph

In [91]:
from graphviz import Source
dot_file_path = 'docs/dummy_player.dot'

with open(dot_file_path, 'r') as dot_file:
    dot_data = dot_file.read()

graph = Source(dot_data)
output = 'docs/output/final_output_test'
graph.format = 'png'
graph.render(output, view=True)

'docs\\output\\final_output_test.png'