In [5]:
import pandas as pd
import unicodedata
import re
import duckdb
from io import StringIO
from google.cloud import storage
import wget

def remove_accents(text):
    """
    Remove accent marks from input text while preserving the base characters.
    Also handles special characters like Đ/đ.
    
    Example:
    "Nikola Đurišić" -> "Nikola Durisic"
    """
    # First, handle special characters that need specific replacements
    special_chars = {
        'Đ': 'D', 'đ': 'd',  # Serbian/Croatian D with stroke
        'Ł': 'L', 'ł': 'l',  # Polish L with stroke
        'Ø': 'O', 'ø': 'o',  # Danish/Norwegian O with stroke
        'Ŧ': 'T', 'ŧ': 't',  # Sami T with stroke
        'Æ': 'AE', 'æ': 'ae',  # Æ/æ ligature
        'Œ': 'OE', 'œ': 'oe',  # Œ/œ ligature
        'ß': 'ss',  # German eszett
    }
    
    for char, replacement in special_chars.items():
        text = text.replace(char, replacement)
    
    # Normalize the text to decompose characters into base character and accent mark
    normalized_text = unicodedata.normalize('NFKD', text)
    
    # Filter out the non-spacing marks (accent marks)
    result = ''.join(c for c in normalized_text if not unicodedata.category(c).startswith('Mn'))
    
    return result

# URL of the CSV file
filename = 'playeroftheweek.csv'
url = f'https://storage.googleapis.com/nba_award_predictor/nba_data/{filename}'
wget.download(url)
# Read in the playeroftheweek csv
playeroftheweek_df = pd.read_csv(filename)

# Clean each player's full name
playeroftheweek_df["player"] = playeroftheweek_df["player"].apply(remove_accents)

# Bring in name mapping table for names to help match all names to the format seen in the NBA API
filename = 'name_mappings.csv'
url = f'https://storage.googleapis.com/nba_award_predictor/nba_data/{filename}'
wget.download(url)
# Read in the name_mappings csv
name_mapping_df = pd.read_csv(filename)

# Bring in nba player lookup table to map the cleaned names to player IDs. Same player IDs from the NBA API.
filename = 'nba_player_lookup.csv'
url = f'https://storage.googleapis.com/nba_award_predictor/nba_data/{filename}'
wget.download(url)
# Read in the nba_player_lookup csv
nba_player_lookup_df = pd.read_csv(filename)

# Clean each player's full name
nba_player_lookup_df["player_name"] = nba_player_lookup_df["player_name"].apply(remove_accents)

query = """
WITH CTE AS (
SELECT * FROM playeroftheweek_df
LEFT JOIN name_mapping_df
ON playeroftheweek_df.player = name_mapping_df.in_table_name
)
,CTE2 AS (
SELECT *,
CASE WHEN nba_lookup_name IS NULL THEN player
ELSE nba_lookup_name
END AS player_full_name
FROM CTE
)

SELECT CTE2.*
,nba_player_lookup_df.player_id
FROM CTE2
LEFT JOIN nba_player_lookup_df
ON CTE2.player_full_name = nba_player_lookup_df.player_name

EXCEPT 

SELECT CTE2.*
,nba_player_lookup_df.player_id
FROM CTE2
JOIN nba_player_lookup_df
ON CTE2.player_full_name = nba_player_lookup_df.player_name

"""

player_of_the_week_df = duckdb.query(query).df()#.drop(['person_id', 'player', 'display_last_comma_first', 'display_fi_last', 'player_slug', 'in_table_name', 'nba_lookup_name'], axis=1)
player_of_the_week_df

# #Rearrange columns
# cols = common_player_info_df.columns.tolist()
# new_cols = [cols[-1], cols[-2]] + cols[:-2]
# common_player_info_df = common_player_info_df[new_cols]
# common_player_info_df.to_csv('common-player-info.csv')


# # Path to your credentials file
# credentials_path = 'cis-5450-final-project-485661e2f371.json'

# # Set up the client with your credentials
# storage_client = storage.Client.from_service_account_json(credentials_path)

# # Specify your bucket name
# bucket_name = 'nba_award_predictor'
# bucket = storage_client.bucket(bucket_name)

# # Define blob (file in GCS) and upload from the local file
# blob = bucket.blob('nba_data/common-player-info.csv')
# blob.cache_control = "max-age=0"
# blob.upload_from_filename('common-player-info.csv')

# print(f"File uploaded to gs://{bucket_name}/nba_data/common-player-info.csv")


Unnamed: 0,season,player,conference,date,team,pos,height,weight,age,Pre-Draft Team,Draft Yr,yos,in_table_name,nba_lookup_name,player_full_name,player_id
0,1980-1981,Tiny Archibald,,1981-01-11,Boston Celtics,PG,6-1,150,32,UTEP,1970,10,,,Tiny Archibald,
1,1987-1988,Bobby Hansen,,1988-03-06,Utah Jazz,SG,6-6,190,27,Iowa,1983,4,,,Bobby Hansen,
2,1995-1996,Penny Hardaway,,1995-11-12,Orlando Magic,PG,6-7,195,24,Memphis,1993,2,,,Penny Hardaway,
3,1979-1980,Billy Ray Bates,,1980-03-23,Portland Trail Blazers,SG,6-4,210,23,Kentucky State,1978,1,,,Billy Ray Bates,
4,1987-1988,Fat Lever,,1988-04-17,Denver Nuggets,PG,6-3,170,27,Arizona State,1982,5,,,Fat Lever,
5,1986-1987,Fat Lever,,1987-02-01,Denver Nuggets,PG,6-3,170,26,Arizona State,1982,4,,,Fat Lever,
6,1997-1998,Steve Smith,,1998-01-11,Atlanta Hawks,SG,6-7,200,28,Michigan State,1991,6,,,Steve Smith,
7,1995-1996,Cliff Robinson,,1996-01-07,Portland Trail Blazers,PF,6-10,225,29,UConn,1989,6,,,Cliff Robinson,
8,2012-2013,J.R. Smith,East,2013-04-01,New York Knicks,SG,6-6,225,27,Saint Benedict's Preparatory School (New Jersey),2004,8,,,J.R. Smith,
9,2018-2019,C.J. McCollum,West,2018-11-12,Portland Trail Blazers,G,6-3,190,27,Lehigh,2013,5,,,C.J. McCollum,


In [6]:
playeroftheweek_df.dtypes

season            object
player            object
conference        object
date              object
team              object
pos               object
height            object
weight             int64
age                int64
Pre-Draft Team    object
Draft Yr           int64
yos                int64
dtype: object

In [7]:
name_mapping_df.dtypes

in_table_name      object
nba_lookup_name    object
dtype: object

In [8]:
name_mapping_df

Unnamed: 0,in_table_name,nba_lookup_name
0,Jimmy Butler,Jimmy Butler III
1,J.D. Davison,JD Davison
2,Ha Ha,Ha Seung-jin
3,"Bear, The Body Hoffman",Paul 'The Bear' Hoffman
4,Kenyon Martin Jr.,KJ Martin
5,"Ted, Hound Dog McClain",Ted 'Hound Dog' McClain
6,Tre Scott,Trevon Scott
7,Sun Sun,Sun Yue
8,O.G. Anunoby,OG Anunoby
9,Brandon Boston Jr.,Brandon Boston


In [15]:
# # Bring in nba player lookup table to map the cleaned names to player IDs. Same player IDs from the NBA API.
# url = 'https://storage.googleapis.com/nba_award_predictor/nba_data/nba_player_lookup.csv'
# response = requests.get(url)
# if response.status_code == 200:
#     # Read in the name_mapping csv
#     nba_player_lookup_df = pd.read_csv('nba_player_lookup.csv')
# else:
#     print(f"Failed to retrieve data: Status code {response.status_code}")
#     print(response.text)  # Print the response content for debugging

query = """
SELECT * FROM nba_player_lookup_df
WHERE player_name LIKE '%World%'
"""

testing2 = duckdb.query(query).df()
testing2

Unnamed: 0,player_id,player_name,first_name,last_name,is_active
0,76753,World Free,World,Free,False
1,1897,Metta World Peace,Metta,World Peace,False


In [14]:
query = """
SELECT * FROM nba_player_lookup_df
WHERE player_name LIKE '%World%'
"""

testing2 = duckdb.query(query).df()
testing2

Unnamed: 0,player_id,player_name,first_name,last_name,is_active
0,76753,World Free,World,Free,False
1,1897,Metta World Peace,Metta,World Peace,False
