### Import Libraries

### PC BRANCH

In [1]:
import pandas as pd
from rdflib import FOAF
from rdflib import Namespace, Literal, Graph
from rdflib.namespace import RDF, XSD

### Scanning Files

In [2]:
df = pd.read_csv('src/nba2k_full.csv')

In [3]:
graph = Graph()

In [4]:
example_namespace = Namespace("https://nba-players.org/")
nba_namespace = Namespace("https://nba-players.org/player-dataset#")

In [5]:
graph.bind("nba_namespace", nba_namespace)
graph.bind("foaf", FOAF)

In [6]:
for index, row in df.iterrows():
    player_uri = nba_namespace['player_' + str(index)]
    graph.add((player_uri, RDF.type, nba_namespace['NBAPlayer']))

    graph.add((player_uri, nba_namespace['full_name'], Literal(row['full_name'])))
    graph.add((player_uri, nba_namespace['rating'], Literal(row['rating'], datatype=XSD.integer)))
    graph.add((player_uri, nba_namespace['jersey'], Literal(row['jersey'].lstrip('#'))))
    graph.add((player_uri, nba_namespace['team'], Literal(row['team'])))
    graph.add((player_uri, nba_namespace['position'], Literal(row['position'])))
    graph.add((player_uri, nba_namespace['b_day'], Literal(row['b_day'])))

    height_data = row['height']
    height_value_meters = float(height_data.split('/')[1].strip())
    graph.add((player_uri, nba_namespace['height'], Literal(height_value_meters, datatype=XSD.float)))

    weight_data = row['weight']
    weight_value_kg = float(weight_data.split('/')[1].strip().split()[0])  # Extract weight in kilograms
    graph.add((player_uri, nba_namespace['weight'], Literal(weight_value_kg, datatype=XSD.float)))

    salary_data = row['salary']
    salary_value = float(salary_data.strip('$').replace(',', ''))
    salary_formatted = '${:,.0f}'.format(salary_value)
    graph.add((player_uri, nba_namespace['salary'], Literal(salary_formatted)))

    graph.add((player_uri, nba_namespace['country'], Literal(row['country'])))
    graph.add((player_uri, nba_namespace['draft_year'], Literal(row['draft_year'], datatype=XSD.integer)))
    graph.add((player_uri, nba_namespace['draft_round'], Literal(row['draft_round'])))
    graph.add((player_uri, nba_namespace['draft_peak'], Literal(row['draft_peak'])))

    college_data = row['college']
    if pd.isna(college_data):
        college_data = 'No College Data'
    graph.add((player_uri, nba_namespace['college'], Literal(college_data)))

    graph.add((player_uri, nba_namespace['version'], Literal(row['version'])))

graph.serialize('nba-players-2.ttl', format='turtle')

<Graph identifier=Nefca38cdfcf243a695970087006be5f7 (<class 'rdflib.graph.Graph'>)>

In [7]:
url = 'nba-players-2.ttl'
result = graph.parse(url, format='turtle')

In [8]:
def turtle_to_dot(turtle_file, dot_file):
    graph.parse(turtle_file, format='turtle')

    with open(dot_file, "w") as f:
        f.write("digraph G {\n")
        f.write('    node [shape=box];\n')

        for s, p, o in graph:
            if p == RDF.type and o == nba_namespace.NBAPlayer:
                full_name = str(graph.value(s, nba_namespace.full_name))
                team = str(graph.value(s, nba_namespace.team))
                height = str(graph.value(s, nba_namespace.height))
                weight = str(graph.value(s, nba_namespace.weight))
                salary = str(graph.value(s, nba_namespace.salary))
                country = str(graph.value(s, nba_namespace.country))
                rating = str(graph.value(s, nba_namespace.rating))
                
                rating_number = int(rating) if rating.isdigit() else 0
                if rating_number > 80:
                    f.write(f'    "{full_name}" -> "{team}" [label="currentTeam"];\n')
                    f.write(f'    "{full_name}" -> "{height}m" [label="hasHeight"];\n')
                    f.write(f'    "{full_name}" -> "{weight}kg" [label="hasWeight"];\n')
                    f.write(f'    "{full_name}" -> "{rating}" [label="ratingNumber"];\n')
                    f.write(f'    "{full_name}" -> "{salary}" [label="annualSalary"];\n')
                    f.write(f'    "{full_name}" -> "{country}" [label="from"];\n')

        f.write("}")
turtle_to_dot("nba-players-2.ttl", "docs/output_final_rating_system.dot")

In [9]:
graph.parse('nba-players-2.ttl', format='turtle')

<Graph identifier=Nefca38cdfcf243a695970087006be5f7 (<class 'rdflib.graph.Graph'>)>

In [10]:
from rdflib.plugins.sparql import prepareQuery

num_players = 20
query = prepareQuery('''
    PREFIX nba: <https://nba-players.org/player-dataset#>
    SELECT ?player ?name ?team ?height ?weight ?salary
    WHERE {
        ?player a nba:NBAPlayer ;
            nba:full_name ?name ;
            nba:team ?team ;
            nba:height ?height ;
            nba:weight ?weight ;
            nba:salary ?salary .
    }
''', initNs={'nba_namespace': nba_namespace})

In [11]:
# players_data = {}
# results = graph.query(query)
# for row in results.bindings:
#     player_uri = row['player']
#     name = row['name']
#     team = row['team']
#     if team != "":
#         team = row['team']
#     else:
#         team = "Free Agent"
#     height = row['height']
#     weight = row['weight']
#     salary = row['salary']
#     player_index = int(player_uri.split('_')[-1])  # Extract the player index from the URI
#     player_uri_name = name.replace(" ", "")
#     players_data[player_index] = {
#         'Player URI': player_uri,
#         'Player URI Name': nba_namespace[player_uri_name],
#         'Name': name,
#         'Current Team': team,
#         'Height': f'{height} m',
#         'Weight': f'{weight} kg ',
#         'Annual Salary': salary,
#     }
    
players_data = {}
data = []
results = graph.query(query)
for row in results.bindings:
    player_uri = row['player']
    name = row['name'].value
    team = row['team'].value
    height = row['height'].value
    weight = row['weight'].value
    salary = row['salary'].value

    player_uri_name = name.replace(" ", "")

    data.append({
        'Player URI': player_uri,
        'Name': name,
        'Team': team,
        'Height': f'{height} m',
        'Weight': f'{weight} kg',
        'Annual Salary': salary,
    })
df = pd.DataFrame(data)
print(df)

                                            Player URI                   Name  \
0      https://nba-players.org/player-dataset#player_0           LeBron James   
1      https://nba-players.org/player-dataset#player_1          Kawhi Leonard   
2      https://nba-players.org/player-dataset#player_2  Giannis Antetokounmpo   
3      https://nba-players.org/player-dataset#player_3           Kevin Durant   
4      https://nba-players.org/player-dataset#player_4           James Harden   
..                                                 ...                    ...   
459  https://nba-players.org/player-dataset#player_459             Chris Paul   
460  https://nba-players.org/player-dataset#player_460           Bradley Beal   
461  https://nba-players.org/player-dataset#player_461            Rudy Gobert   
462  https://nba-players.org/player-dataset#player_462             Kyle Lowry   
463  https://nba-players.org/player-dataset#player_463     Kristaps Porzingis   

                      Team 

In [12]:
# for player_index in range(21):  # Loop from player_0 to player_10
#     if player_index in players_data:
#         player_info = players_data[player_index]
#         print(f"Player Index: {player_index}")
#         for key, value in player_info.items():
#             print(f"{key}: {value}")
#         print("-------------------")

In [13]:
# from graphviz import Source
# dot_file_path = 'docs/dummy_player.dot'
# 
# with open(dot_file_path, 'r') as dot_file:
#     dot_data = dot_file.read()
# 
# graph_source = Source(dot_data)
# output = 'docs/output/final_output_test'
# graph_source.format = 'png'
# graph_source.render(output, view=True)

In [14]:
from graphviz import Digraph
from rdflib import Namespace

# Define the namespace for players
nba_namespace = Namespace("https://nba-players.org/player-dataset#")

# Create a Graphviz Digraph object
graph = Digraph(strict=True)

# Add nodes for players and teams
player_uris = set()  # To keep track of player URIs
for row in data:
    player_uri = row['Player URI']
    team = row['Team']

    # Check if the player URI belongs to a player (in the namespace)
    if player_uri.startswith(nba_namespace):
        graph.node(player_uri, label=row['Name'], shape='box')
        player_uris.add(player_uri)

    # Add nodes for teams (not in the namespace)
    if isinstance(team, str):
        # Fix the team URI by replacing the double slash with a single slash
        team = team.replace("//", "/")
        graph.node(team, shape='ellipse')

# Add edges between players and their teams
for row in data:
    player_uri = row['Player URI']
    team = row['Team']

    # Check if the player URI belongs to a player (in the namespace)
    if player_uri.startswith(nba_namespace) and isinstance(team, str):
        graph.edge(player_uri, team)  # Connect player to their team

# Optionally, you can print the player information
print("Player Information:")
for row in data:
    for key, value in row.items():
        print(f"{key}: {value}")
    print("------------------")

# Render and save the graph as a PNG file
output_path = 'docs/output/final_output_test'
graph.format = 'png'
graph.render(output_path, view=True)


Player Information:
Player URI: https://nba-players.org/player-dataset#player_0
Name: LeBron James
Team: Los Angeles Lakers
Height: 2.06 m
Weight: 113.4 kg
Annual Salary: $37,436,858
------------------
Player URI: https://nba-players.org/player-dataset#player_1
Name: Kawhi Leonard
Team: Los Angeles Clippers
Height: 2.01 m
Weight: 102.1 kg
Annual Salary: $32,742,000
------------------
Player URI: https://nba-players.org/player-dataset#player_2
Name: Giannis Antetokounmpo
Team: Milwaukee Bucks
Height: 2.11 m
Weight: 109.8 kg
Annual Salary: $25,842,697
------------------
Player URI: https://nba-players.org/player-dataset#player_3
Name: Kevin Durant
Team: Brooklyn Nets
Height: 2.08 m
Weight: 104.3 kg
Annual Salary: $37,199,000
------------------
Player URI: https://nba-players.org/player-dataset#player_4
Name: James Harden
Team: Houston Rockets
Height: 1.96 m
Weight: 99.8 kg
Annual Salary: $38,199,000
------------------
Player URI: https://nba-players.org/player-dataset#player_5
Name: Step

dot: graph is too large for cairo-renderer bitmaps. Scaling by 0.436998 to fit


'docs\\output\\final_output_test.png'