In [114]:
import pandas as pd

In [115]:
df = pd.read_csv('../../src/data/nba2k_data.csv')

In [116]:
df.drop(columns=['draft_peak', 'draft_round'], inplace=True)
df.to_csv('../../src/data/nba2k_new.csv', index=False)

In [117]:
nd = pd.read_csv('../../src/data/nba2k_new.csv')

______

In [118]:
nd.head(5)

Unnamed: 0,full_name,rating,jersey,team,position,b_day,height,weight,salary,country,draft_year,college,version
0,LeBron James,97,#23,Los Angeles Lakers,F,12/30/84,6-9 / 2.06,250 lbs. / 113.4 kg.,$37436858,USA,2003,,NBA2k20
1,Kawhi Leonard,97,#2,Los Angeles Clippers,F,06/29/91,6-7 / 2.01,225 lbs. / 102.1 kg.,$32742000,USA,2011,San Diego State,NBA2k20
2,Giannis Antetokounmpo,96,#34,Milwaukee Bucks,F-G,12/06/94,6-11 / 2.11,242 lbs. / 109.8 kg.,$25842697,Greece,2013,,NBA2k20
3,Kevin Durant,96,#7,Brooklyn Nets,F,09/29/88,6-10 / 2.08,230 lbs. / 104.3 kg.,$37199000,USA,2007,Texas,NBA2k20
4,James Harden,96,#13,Houston Rockets,G,08/26/89,6-5 / 1.96,220 lbs. / 99.8 kg.,$38199000,USA,2009,Arizona State,NBA2k20


In [119]:
from rdflib import Graph, Literal, Namespace, RDF, URIRef, FOAF, XSD
import urllib.parse
import re

In [120]:
def create_uri(name):
    name = name.lower()
    name = re.sub(r'\W+', '', name)
    return name

In [121]:
graph = Graph()
ns = Namespace("http://nba-players.com/profile/")
rdf = RDF

In [122]:
graph.bind("nba_players", ns)
graph.bind("foaf", FOAF)

In [123]:
processed_uri = set()

In [129]:
try:
    for index, row in nd.iterrows():
        full_name_uri = create_uri(row['full_name'])

        if full_name_uri in processed_uri:
            continue

        processed_uri.add(full_name_uri)

        player_uri = URIRef(ns[full_name_uri])
        graph.add((player_uri, RDF.type, ns['object']))

        graph.add((player_uri, ns['full_name'], Literal(row['full_name'])))
        graph.add((player_uri, ns['rating'], Literal(row['rating'], datatype=XSD.integer)))
        graph.add((player_uri, ns['jersey'], Literal(row['jersey'].lstrip('#'))))
        graph.add((player_uri, ns['team'], Literal(row['team'])))
        graph.add((player_uri, ns['position'], Literal(row['position'])))
        graph.add((player_uri, ns['b_day'], Literal(row['b_day'])))

        height_data = row['height']
        height_value_meters = float(height_data.split('/')[1].strip())
        graph.add((player_uri, ns['height'], Literal(height_value_meters, datatype=XSD.float)))

        weight_data = row['weight']
        weight_value_kg = float(weight_data.split('/')[1].strip().split()[0])  # Extract weight in kilograms
        graph.add((player_uri, ns['weight'], Literal(weight_value_kg, datatype=XSD.float)))

        salary_data = row['salary']
        salary_value = float(salary_data.strip('$').replace(',', ''))
        salary_formatted = '${:,.0f}'.format(salary_value)
        graph.add((player_uri, ns['salary'], Literal(salary_formatted)))

        graph.add((player_uri, ns['country'], Literal(row['country'])))
        graph.add((player_uri, ns['draft_year'], Literal(row['draft_year'], datatype=XSD.integer)))

        college_data = row['college']
        if pd.isna(college_data):
            college_data = 'No College Data'
        graph.add((player_uri, ns['college'], Literal(college_data)))

        graph.add((player_uri, ns['version'], Literal(row['version'])))

    rdf_file = '../../output/rdf/output.rdf'
    graph.serialize(destination=rdf_file, format='xml')

    print(f'File written success.')
except IOError as e:
    print(f'File is not written successfully due to {e}')

File written success.


In [125]:
import graphviz

In [128]:
rdf_file = '../../output/rdf/output.rdf'

try:
    graph.parse(rdf_file, format='xml')

    gv_graph = graphviz.Graph(strict=True, format='svg', engine='neato')
    node_labels = {}

    for subject, predicate, obj in graph:
        subject_str = str(subject)
        predicate_str = str(predicate)
        obj_str = str(obj)
    
        # Get the local name from the URI
        subject_str_local = subject_str.split("/")[-1].split("#")[-1]
        obj_str_local = obj_str.split("/")[-1].split("#")[-1]
    
        # Check if the local names are valid, otherwise, use the full URIs as node labels
        subject_label = subject_str_local if subject_str_local else subject_str
        obj_label = obj_str_local if obj_str_local else obj_str
    
        # Add nodes and edges to the Graphviz graph
        gv_graph.node(subject_label)
        gv_graph.node(obj_label)
        gv_graph.edge(subject_label, obj_label, label=predicate_str)

    output_file = 'output_graph.svg'
    gv_graph.render(output_file, view=True)

    print(f"Visualization saved as {output_file}")
except FileNotFoundError:
    print(f'Existing file with path of {rdf_file} is not found.')

Visualization saved as output_graph.svg
