In [3]:
import pandas as pd
import numpy as np
import nba_api

In [8]:
import logging
from nba_api.stats.static import players
from nba_api.stats.endpoints import playercareerstats
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from flask import Flask, render_template_string
import plotly.express as px
import plotly.io as pio

# Set up logging
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')

def get_player_stats(player_id, season='2023-24'):
    try:
        career = playercareerstats.PlayerCareerStats(player_id=player_id)
        df = career.get_data_frames()[0]
        return df[df['SEASON_ID'] == season].iloc[0]
    except Exception as e:
        logging.error(f"Error getting stats for player {player_id}: {str(e)}")
        return None

def prepare_data():
    active_players = [player for player in players.get_active_players()]
    data = []
    for player in active_players[:50]:  # Limit to 50 players for this example
        try:
            stats = get_player_stats(player['id'])
            if stats is not None:
                data.append({
                    'PLAYER_ID': stats['PLAYER_ID'],
                    'PLAYER_NAME': player['full_name'],
                    'PTS': stats['PTS'],
                    'AST': stats['AST'],
                    'REB': stats['REB'],
                    'STL': stats['STL'],
                    'BLK': stats['BLK']
                })
        except Exception as e:
            logging.error(f"Error processing player {player['full_name']}: {str(e)}")
    return pd.DataFrame(data)

def cluster_players(df, n_clusters=5):
    features = ['PTS', 'AST', 'REB', 'STL', 'BLK']
    X = df[features]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    df['Cluster'] = kmeans.fit_predict(X_scaled)
    return df

# Main execution
if __name__ == "__main__":
    df = prepare_data()
    clustered_df = cluster_players(df)
    print(clustered_df.head())

2024-09-24 21:20:13,626 - DEBUG - Starting new HTTPS connection (1): stats.nba.com:443
2024-09-24 21:20:14,225 - DEBUG - https://stats.nba.com:443 "GET /stats/playercareerstats?LeagueID=&PerMode=Totals&PlayerID=1630173 HTTP/11" 200 1439
2024-09-24 21:20:14,265 - DEBUG - Starting new HTTPS connection (1): stats.nba.com:443
2024-09-24 21:20:14,584 - DEBUG - https://stats.nba.com:443 "GET /stats/playercareerstats?LeagueID=&PerMode=Totals&PlayerID=1628389 HTTP/11" 200 1934
2024-09-24 21:20:14,650 - DEBUG - Starting new HTTPS connection (1): stats.nba.com:443
2024-09-24 21:20:15,145 - DEBUG - https://stats.nba.com:443 "GET /stats/playercareerstats?LeagueID=&PerMode=Totals&PlayerID=1630534 HTTP/11" 200 1190
2024-09-24 21:20:15,198 - DEBUG - Starting new HTTPS connection (1): stats.nba.com:443
2024-09-24 21:20:15,386 - DEBUG - https://stats.nba.com:443 "GET /stats/playercareerstats?LeagueID=&PerMode=Totals&PlayerID=1630583 HTTP/11" 200 1145
2024-09-24 21:20:15,415 - DEBUG - Starting new HTTPS

   PLAYER_ID               PLAYER_NAME   PTS  AST  REB  STL  BLK  Cluster
0    1630173          Precious Achiuwa   193   44  136   16   12        0
1    1628389               Bam Adebayo  1367  278  737   81   66        2
2    1630534              Ochai Agbaji   274   47  126   27   29        4
3    1630583              Santi Aldama   654  138  352   43   54        3
4    1629638  Nickeil Alexander-Walker   655  204  167   64   42        3




In [9]:
clustered_df[

Unnamed: 0,PLAYER_ID,PLAYER_NAME,PTS,AST,REB,STL,BLK,Cluster
0,1630173,Precious Achiuwa,193,44,136,16,12,0
1,1628389,Bam Adebayo,1367,278,737,81,66,2
2,1630534,Ochai Agbaji,274,47,126,27,29,4
3,1630583,Santi Aldama,654,138,352,43,54,3
4,1629638,Nickeil Alexander-Walker,655,204,167,64,42,3
