In [94]:
import requests
import pandas as pd

# Your API key
api_key = '4b55c535f3d69ddc9efb596afc4046f8'

# Define the base URL for The Odds API and the list of player prop markets
base_url = 'https://api.the-odds-api.com/v4/sports'
nba_player_prop_markets = [
    'player_points', 'player_rebounds', 'player_assists',
    'player_threes', 'player_blocks', 'player_steals',
    'player_blocks_steals', 'player_turnovers',
    'player_points_rebounds_assists', 'player_points_rebounds',
    'player_points_assists', 'player_rebounds_assists',
    'player_first_basket', 'player_double_double',
    'player_triple_double', 'player_points_alternate',
    'player_rebounds_alternate', 'player_assists_alternate',
    'player_blocks_alternate', 'player_steals_alternate',
    'player_threes_alternate', 'player_points_assists_alternate',
    'player_points_rebounds_alternate', 'player_rebounds_assists_alternate',
    'player_points_rebounds_assists_alternate'
]

# Initialize a list to store data for each game
all_games_data = []

# Fetch the list of NBA games
games_url = f"{base_url}/basketball_nba/events"
games_params = {'apiKey': api_key}
games_response = requests.get(games_url, params=games_params)

if games_response.status_code == 200:
    games_data = games_response.json()
    
    # Iterate over each game to fetch player props odds
    for game in games_data:
        game_id = game['id']
        
        # Initialize a dictionary to store odds data for this game
        game_odds_data = {
            'Game ID': game_id,
            'Home Team': game['home_team'],
            'Away Team': game['away_team'],
            'Player Props': []
        }
        
        # Fetch odds for player props for this game
        for market in nba_player_prop_markets:
            odds_url = f"{base_url}/basketball_nba/events/{game_id}/odds"
            odds_params = {
                'apiKey': api_key,
                'markets': market,  # Fetch odds for the current market
                'regions': 'us'  # Specify your desired regions
            }
            odds_response = requests.get(odds_url, params=odds_params)
            
            if odds_response.status_code == 200:
                odds_data = odds_response.json()
                # Append the odds data for this market to the game_odds_data
                game_odds_data['Player Props'].append(odds_data)
        
        # Append the collected odds data for this game to the all_games_data list
        all_games_data.append(game_odds_data)

# Convert the list of game data into a pandas DataFrame
df_games = pd.DataFrame(all_games_data)




In [95]:
print(df_games.head())

                            Game ID            Home Team         Away Team  \
0  9768de636bd9b326cee827df527c6b0f       Indiana Pacers   Toronto Raptors   
1  dada6a3a3971143c47a4386c2562db81      New York Knicks   Detroit Pistons   
2  66a9cfa921a31e1a9e58fdb41894bba8    Memphis Grizzlies     Brooklyn Nets   
3  410e8f25287ad1c8d96be214730b1a17     Sacramento Kings        Miami Heat   
4  709971c24dfc3bea07590c093fd5abc4  Cleveland Cavaliers  Dallas Mavericks   

  Player Props  
0           []  
1           []  
2           []  
3           []  
4           []  


In [71]:
import pandas as pd
import ast
import json

# Assuming `df_games` is a DataFrame loaded from your CSV or constructed with the provided data

# Convert the 'Player Props' column from string to actual Python objects
#df_games['Player Props'] = df_games['Player Props'].apply(lambda x: json.loads(x))


# Initialize an empty list to collect data
collected_data = []

# Iterate over each row in the DataFrame to process player props
for index, row in df_games.iterrows():
    game_id = row['Game ID']
    home_team = row['Home Team']
    away_team = row['Away Team']
    
    # Iterate over each player prop in 'Player Props' column
    for player_prop in row['Player Props']:
        # Assuming each 'player_prop' is a dictionary containing 'bookmakers' and other details
        for bookmaker in player_prop.get('bookmakers', []):
            for market in bookmaker.get('markets', []):
                market_key = market.get('key')
                last_update = market.get('last_update', '')
                for outcome in market.get('outcomes', []):
                    player_name = outcome.get('description', '')
                    over_under = outcome.get('name', '')
                    price = outcome.get('price')
                    point = outcome.get('point', '')

                    collected_data.append({
                        'Game ID': game_id,
                        'Home Team': home_team,
                        'Away Team': away_team,
                        'Player Name': player_name,
                        'Market': market_key,
                        'Over/Under': over_under,
                        'Price': price,
                        'Point': point,
                        'last_update': last_update
                    })

# Convert the collected data into a DataFrame
df_processed = pd.DataFrame(collected_data)


#filter for only Brandon Miller
#df_processed = df_processed[df_processed['Player Name'] == 'Caleb Martin']


# Convert 'last_update' (format: 2024-02-07T15:39:22Z) by replacing 'T' with a space and removing the 'Z' at the end
df_processed['last_update'] = df_processed['last_update'].str.replace('T', ' ').str.replace('Z', '')
df_processed['last_update'] = pd.to_datetime(df_processed['last_update'])
#print(df_processed['last_update'])
# Sort by 'Player Name', 'Market', 'Over/Under', and 'last_update' in descending order to bring the latest updates to the top
df_processed.sort_values(by=['Player Name', 'last_update'], ascending=[True, False], inplace=True) #'Market', 'Over/Under',
#print(df_processed)

# Drop duplicates, keeping only the first (most recent) entry for each combination of 'Player Name', 'Market', and 'Over/Under'
df_latest_odds = df_processed.drop_duplicates(subset=['Player Name', 'Market', 'Over/Under', 'last_update'], keep='first')

#filter for Over/Under as 'Yes'
#df_latest_odds = df_latest_odds[df_latest_odds['Over/Under'] == 'Yes']

# Create separate columns for Over Price and Under Price
df_latest_odds['Over Price'] = df_latest_odds.apply(lambda x: x['Price'] if x['Over/Under'] == 'Over' else None, axis=1)
df_latest_odds['Under Price'] = df_latest_odds.apply(lambda x: x['Price'] if x['Over/Under'] == 'Under' else None, axis=1)

# Pivot the table for points (points are the same for both Over and Under)
df_pivoted_points = df_latest_odds.pivot_table(
    index=['Game ID', 'Home Team', 'Away Team', 'Player Name'],
    columns='Market',
    values='Point',
    aggfunc='first'
).reset_index()

# Pivot the table for prices
df_pivoted_prices = df_latest_odds.pivot_table(
    index=['Game ID', 'Home Team', 'Away Team', 'Player Name'],
    columns='Market',
    values=['Over Price', 'Under Price'],
    aggfunc='first'
).reset_index()

# Flatten MultiIndex for prices
df_pivoted_prices.columns = [' '.join(col).strip() for col in df_pivoted_prices.columns.values]

# Merge points and prices DataFrames
odds_df = pd.merge(df_pivoted_points, df_pivoted_prices, on=['Game ID', 'Home Team', 'Away Team', 'Player Name'], how='left')

print(odds_df.head())


KeyError: 'last_update'

Pull out the season stats

In [86]:
#%%player_analysis/players_game_logs.py
from nba_api.stats.endpoints import commonallplayers, playergamelog, leaguedashplayerstats
import pandas as pd
from datetime import datetime
from nba_api.stats.static import teams
import time

# Function to get the current NBA season year
def get_current_nba_season_year():
    current_date = datetime.now()
    if current_date.month > 9:  # NBA season starts in October
        return str(current_date.year) + "-" + str(current_date.year + 1)[2:]
    else:
        return str(current_date.year - 1) + "-" + str(current_date.year)[2:]

# Input for minimum average playing minutes
min_avg_minutes = 30.0  # Set minimum average minutes here

# Retrieve all players
print("Retrieving all players...")
all_players = commonallplayers.CommonAllPlayers(is_only_current_season=0).get_data_frames()[0]

print(f"Total players retrieved: {len(all_players)}")

# Retrieve player stats to find averages
current_season = get_current_nba_season_year()
print(f"Retrieving player stats for the {current_season} season...")
player_stats = leaguedashplayerstats.LeagueDashPlayerStats(season=current_season).get_data_frames()[0]

# Calculate average minutes per game for each player
player_stats['AVG_MIN'] = player_stats['MIN'] / player_stats['GP']


# Filter for players who average 30 or more minutes per game
eligible_players = player_stats[player_stats['AVG_MIN'] >= min_avg_minutes]
print(f"Players averaging more than {min_avg_minutes} minutes per game: {len(eligible_players)}")

# Fetch team data and create a mapping from abbreviations to full names
teams_list = teams.get_teams()
team_abbrev_to_full_name = {team['abbreviation']: team['full_name'] for team in teams_list}
nba_team_abbreviations = set(team_abbrev_to_full_name.keys())  # Set of all NBA team abbreviations

# Initialize DataFrame for new player data
new_players_data = pd.DataFrame()

print("Fetching game logs for eligible players...")
# Loop over each eligible player and pull game logs
for index, player in eligible_players.iterrows():
    player_id = player['PLAYER_ID']
    player_name = player['PLAYER_NAME']
    
    print(f"Processing game logs for {player_name}...")
    player_log = playergamelog.PlayerGameLog(player_id=player_id, season=current_season)
    player_data = player_log.get_data_frames()[0]
    
    if not player_data.empty:
        player_data['PLAYER_NAME'] = player_name
        
        # Extract team and opponent abbreviations from MATCHUP
        player_data['TEAM_ABBREVIATION'] = player_data['MATCHUP'].str[:3]
        player_data['OPPONENT_ABBREVIATION'] = player_data['MATCHUP'].apply(lambda x: x.split(' ')[2] if 'vs.' in x else x.split(' ')[-1])
        
        # Map team and opponent abbreviations to full names
        player_data['TEAM_NAME'] = player_data['TEAM_ABBREVIATION'].map(team_abbrev_to_full_name)
        player_data['OPPONENT_NAME'] = player_data['OPPONENT_ABBREVIATION'].map(team_abbrev_to_full_name)
        
        # Ensure game logs are only for NBA teams
        player_data = player_data[player_data['TEAM_ABBREVIATION'].isin(nba_team_abbreviations)]
        
        new_players_data = pd.concat([new_players_data, player_data], ignore_index=True)
    
    # Include a sleep to respect rate limits
    time.sleep(0.6)  # Adjusted to optimize the rate limit adherence

print("Game log fetching complete.")
print(new_players_data[['PLAYER_NAME', 'TEAM_NAME', 'OPPONENT_NAME', 'MIN']].head())

#save to csv in player_analysis
new_players_data.to_csv('player_analysis/player_game_logs.csv', index=False)


Retrieving all players...
Total players retrieved: 4914
Retrieving player stats for the 2023-24 season...
Players averaging more than 30.0 minutes per game: 96
Fetching game logs for eligible players...
Processing game logs for Aaron Gordon...
Processing game logs for Alperen Sengun...
Processing game logs for Anfernee Simons...
Processing game logs for Anthony Davis...
Processing game logs for Anthony Edwards...
Processing game logs for Austin Reaves...
Processing game logs for Bam Adebayo...
Processing game logs for Bojan Bogdanovic...
Processing game logs for Bradley Beal...
Processing game logs for Brandon Ingram...
Processing game logs for Brandon Miller...
Processing game logs for Brook Lopez...
Processing game logs for CJ McCollum...
Processing game logs for Cade Cunningham...
Processing game logs for Chet Holmgren...
Processing game logs for Coby White...
Processing game logs for D'Angelo Russell...
Processing game logs for Damian Lillard...
Processing game logs for Darius Garl

In [88]:
from nba_api.stats.static import teams
from nba_api.stats.endpoints import leaguegamefinder
import pandas as pd
import datetime

# Assuming new_players_data is already defined and contains 'GAME_DATE', 'TEAM_NAME', and 'OPPONENT_NAME' columns

# Get all games using LeagueGameFinder for the current season
#probably includes preseason
all_games = leaguegamefinder.LeagueGameFinder(season_nullable='2023-24').get_data_frames()[0]

# Convert 'GAME_DATE' to datetime format for all_games
all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE'])

#filter from the beginning of the season
all_games = all_games[all_games['GAME_DATE'] > pd.to_datetime('2023-10-24')]

# Sort games by date
all_games = all_games.sort_values('GAME_DATE')

# Calculate cumulative wins and games played for each team
all_games['WIN'] = all_games['WL'].apply(lambda x: 1 if x == 'W' else 0)
all_games['CUMULATIVE_WINS'] = all_games.groupby('TEAM_NAME')['WIN'].cumsum()
all_games['CUMULATIVE_GAMES'] = all_games.groupby('TEAM_NAME').cumcount() + 1  # +1 since cumcount starts at 0
all_games['CUMULATIVE_WIN_RATE'] = all_games['CUMULATIVE_WINS'] / all_games['CUMULATIVE_GAMES']

# For each game in new_players_data, find the team and opponent win rate up to that date
def get_win_rate(row, team_column):
    game_date = row['GAME_DATE']
    team_name = row[team_column]
    # Find the last game of the team up to the date
    team_games = all_games[(all_games['TEAM_NAME'] == team_name) & (all_games['GAME_DATE'] < game_date)]
    if not team_games.empty:
        return team_games.iloc[-1]['CUMULATIVE_WIN_RATE']
    else:
        return None

new_players_data['TEAM_WIN_RATE'] = new_players_data.apply(lambda row: get_win_rate(row, 'TEAM_NAME'), axis=1)
new_players_data['OPPONENT_WIN_RATE'] = new_players_data.apply(lambda row: get_win_rate(row, 'OPPONENT_NAME'), axis=1)

# Example to check the new columns added
print(new_players_data[['PLAYER_NAME', 'TEAM_NAME', 'OPPONENT_NAME', 'GAME_DATE', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE']].head())
print(new_players_data.columns)


    PLAYER_NAME       TEAM_NAME           OPPONENT_NAME     GAME_DATE  \
0  Aaron Gordon  Denver Nuggets   Golden State Warriors  FEB 25, 2024   
1  Aaron Gordon  Denver Nuggets  Portland Trail Blazers  FEB 23, 2024   
2  Aaron Gordon  Denver Nuggets      Washington Wizards  FEB 22, 2024   
3  Aaron Gordon  Denver Nuggets        Sacramento Kings  FEB 14, 2024   
4  Aaron Gordon  Denver Nuggets         Milwaukee Bucks  FEB 12, 2024   

   TEAM_WIN_RATE  OPPONENT_WIN_RATE  
0       0.660714           0.537037  
1       0.654545           0.277778  
2       0.648148           0.166667  
3       0.660377           0.566038  
4       0.673077           0.641509  
Index(['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
       'PTS', 'PLUS_MINUS', 'VIDEO_AVAILABLE', 'PLAYER_NAME',
       'TEAM_ABBREVIATION', 'OPPONENT_ABBREV

N Game Averages
* grouped by home/away, player, team

In [103]:
#%%writefile player_analysis/player_game_logs.py
import pandas as pd
from datetime import datetime
from nba_api.stats.endpoints import commonallplayers, playergamelog, leaguedashplayerstats, leaguegamefinder
from nba_api.stats.static import teams
import time

def get_current_nba_season_year():
    current_date = datetime.now()
    if current_date.month > 9:  # NBA season starts in October
        return str(current_date.year) + "-" + str(current_date.year + 1)[2:]
    else:
        return str(current_date.year - 1) + "-" + str(current_date.year)[2:]

def calculate_cumulative_win_rates(season):
    try:
        # Adjust the season start date based on the typical NBA season start dates
        season_start_date = season.split('-')[0] + "-10-01"  # Assuming October 1st as a generic start date
        all_games = leaguegamefinder.LeagueGameFinder(season_nullable=season).get_data_frames()[0]
        all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE'])
        all_games = all_games[all_games['GAME_DATE'] > pd.to_datetime(season_start_date)]
        all_games = all_games.sort_values('GAME_DATE')
        all_games['WIN'] = all_games['WL'].apply(lambda x: 1 if x == 'W' else 0)
        all_games['CUMULATIVE_WINS'] = all_games.groupby('TEAM_NAME')['WIN'].cumsum()
        all_games['CUMULATIVE_GAMES'] = all_games.groupby('TEAM_NAME').cumcount() + 1
        all_games['CUMULATIVE_WIN_RATE'] = all_games['CUMULATIVE_WINS'] / all_games['CUMULATIVE_GAMES']
        return all_games
    except Exception as e:
        print(f"Error calculating cumulative win rates: {e}")
        return pd.DataFrame()
    

def get_win_rate(row, team_type, all_games):
    game_date = row['GAME_DATE']
    team_name = row[team_type]
    team_games = all_games[(all_games['TEAM_NAME'] == team_name) & (all_games['GAME_DATE'] < game_date)]
    if not team_games.empty:
        return team_games.iloc[-1]['CUMULATIVE_WIN_RATE']
    else:
        return 0.0


def load_nba_player_game_logs(seasons, min_avg_minutes=30.0, save_path='player_analysis/player_game_logs.csv'):
    if not isinstance(seasons, list):
        seasons = [seasons]

    new_players_data = pd.DataFrame()

    for season in seasons:
        print(f"Processing season {season}...")
        try:
            all_players = commonallplayers.CommonAllPlayers(is_only_current_season=0).get_data_frames()[0]
            player_stats = leaguedashplayerstats.LeagueDashPlayerStats(season=season).get_data_frames()[0]
        except Exception as e:
            print(f"Error fetching player stats for season {season}: {e}")
            continue

        player_stats['AVG_MIN'] = player_stats['MIN'] / player_stats['GP']
        eligible_players = player_stats[player_stats['AVG_MIN'] >= min_avg_minutes]
        teams_list = teams.get_teams()
        team_abbrev_to_full_name = {team['abbreviation']: team['full_name'] for team in teams_list}

        all_games = calculate_cumulative_win_rates(season)
        if all_games.empty:
            print("Skipping win rate calculation due to an error.")
            continue

        for index, player in eligible_players.iterrows():
            try:
                player_id = player['PLAYER_ID']
                player_name = player['PLAYER_NAME']
                player_log = playergamelog.PlayerGameLog(player_id=player_id, season=season)
                player_data = player_log.get_data_frames()[0]
                if player_data.empty:
                    continue
                player_data['PLAYER_NAME'] = player_name
                player_data['TEAM_ABBREVIATION'] = player_data['MATCHUP'].str[:3]
                player_data['OPPONENT_ABBREVIATION'] = player_data['MATCHUP'].apply(lambda x: x.split(' ')[2] if 'vs.' in x else x.split(' ')[-1])
                player_data['TEAM_NAME'] = player_data['TEAM_ABBREVIATION'].map(team_abbrev_to_full_name)
                player_data['OPPONENT_NAME'] = player_data['OPPONENT_ABBREVIATION'].map(team_abbrev_to_full_name)
                player_data = player_data[player_data['TEAM_ABBREVIATION'].isin(team_abbrev_to_full_name.keys())]
                new_players_data = pd.concat([new_players_data, player_data], ignore_index=True)
            except Exception as e:
                print(f"Error processing player {player_name}: {e}")
                continue
            time.sleep(0.6)  # To respect rate limits

        # Calculate team and opponent win rates
        new_players_data['GAME_DATE'] = pd.to_datetime(new_players_data['GAME_DATE'])
        new_players_data['TEAM_WIN_RATE'] = new_players_data.apply(lambda row: get_win_rate(row, 'TEAM_NAME', all_games), axis=1)
        new_players_data['OPPONENT_WIN_RATE'] = new_players_data.apply(lambda row: get_win_rate(row, 'OPPONENT_NAME', all_games), axis=1)

        new_players_data['home_away'] = new_players_data['MATCHUP'].str.split(' ').str[1]
        new_players_data['home_away'] = new_players_data['home_away'].apply(lambda x: 'Home' if '@' in x else 'Away')
        new_players_data.reset_index(drop=True)
    if not new_players_data.empty:
        new_players_data.to_csv(save_path, index=False)
        print(f"Player game logs saved to {save_path}")
    else:
        print("No player game logs to save.")



# Example usage
seasons = ['2022-23']  # You can adjust seasons as needed
load_nba_player_game_logs(seasons, min_avg_minutes=30.0, save_path='player_analysis/player_game_logs_winr.csv')


Processing season 2022-23...


  new_players_data['GAME_DATE'] = pd.to_datetime(new_players_data['GAME_DATE'])


Player game logs saved to player_analysis/player_game_logs_winr.csv


In [96]:
#%%writefile player_analysis/metrics_functions.py
import pandas as pd
import numpy as np

#load in data from player_game_logs_winr.csv
data = pd.read_csv('player_analysis/player_game_logs_winr.csv')

#filter for only Brandon Miller
data = data[data['PLAYER_NAME'] == 'Tyrese Haliburton']
#print(data.head())

def prepare_data(df, n_games=10):
    # Aggregate data with mean for the last n games
    agg_funcs_mean = {stat: 'mean' for stat in ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE']}
    agg_funcs_std = {stat: 'std' for stat in ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN']}
    
    # Filter last n_games for each player in each home/away category
    grouped = df.groupby(['home_away', 'PLAYER_NAME', 'TEAM_NAME'])
    
    # Initialize an empty DataFrame to store results
    result_df = pd.DataFrame()

    for name, group in grouped:
        last_n_games = group.tail(n_games)
        
        # Calculate mean and std for the group, ensuring at least 1 row to avoid NaN in std
        if len(last_n_games) > 1:
            stats_mean = last_n_games.agg(agg_funcs_mean).to_frame().T
            stats_std = last_n_games.agg(agg_funcs_std).to_frame().T
        else:
            stats_mean = last_n_games.agg(agg_funcs_mean).to_frame().T
            stats_std = pd.DataFrame(0, index=stats_mean.index, columns=stats_mean.columns)  # Fill with 0 if only 1 game
        
        # Combine mean and std data, prefixing std columns
        stats_std.columns = [f'{col}_std' for col in stats_std.columns]
        combined_stats = pd.concat([stats_mean, stats_std], axis=1)
        combined_stats['home_away'] = name[0]
        combined_stats['PLAYER_NAME'] = name[1]
        combined_stats['TEAM_NAME'] = name[2]
        combined_stats['games_count'] = len(last_n_games)
        
        result_df = pd.concat([result_df, combined_stats], ignore_index=True)

    # Flatten MultiIndex columns if needed and reset index
    result_df = result_df.reset_index(drop=True)
    
    return result_df

# Apply the updated function to your data
aggregated_data = prepare_data(data)

# Sort by player name
aggregated_data = aggregated_data.sort_values(by='PLAYER_NAME')

print(aggregated_data.head())
print(aggregated_data.columns)


    PTS  FGM   FGA  FG3M  FG3A  FTM  FTA   AST  OREB  DREB  ...  DREB_std  \
0  23.9  8.4  17.4   3.1   7.9  4.0  4.3  10.9   0.4   2.9  ...  1.286684   
1  25.9  9.1  17.7   4.0   9.3  3.7  4.6  13.4   0.8   3.8  ...  1.988858   

    REB_std   TOV_std   STL_std   BLK_std   MIN_std  home_away  \
0  1.702939  1.032796  0.823273  0.674949  4.532598       Away   
1  2.366432  2.540779  1.286684  0.674949  4.001389       Home   

         PLAYER_NAME       TEAM_NAME  games_count  
0  Tyrese Haliburton  Indiana Pacers           10  
1  Tyrese Haliburton  Indiana Pacers           10  

[2 rows x 36 columns]
Index(['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB',
       'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE',
       'OPPONENT_WIN_RATE', 'PTS_std', 'FGM_std', 'FGA_std', 'FG3M_std',
       'FG3A_std', 'FTM_std', 'FTA_std', 'AST_std', 'OREB_std', 'DREB_std',
       'REB_std', 'TOV_std', 'STL_std', 'BLK_std', 'MIN_std', 'home_away',
       'PLAYER_NAME', 'TEA

In [100]:
import pandas as pd
import numpy as np

#filter for only Brandon Miller
data = data[data['PLAYER_NAME'] == 'Tyrese Haliburton']
#print(data.head())

def opp_team_averages(df):
    # Aggregate data with mean and std for the entire season
    agg_funcs_mean = {stat: 'mean' for stat in ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE']}
    agg_funcs_std = {stat: 'std' for stat in ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN']}
    
    # Group by home/away, player name, team name, and opponent name
    grouped = df.groupby(['home_away', 'PLAYER_NAME', 'TEAM_NAME', 'OPPONENT_NAME'])
    
    # Initialize an empty DataFrame to store results
    result_df = pd.DataFrame()

    for name, group in grouped:
        # Calculate mean and std for the entire season for the group
        stats_mean = group.agg(agg_funcs_mean).to_frame().T
        stats_std = group.agg(agg_funcs_std).to_frame().T
        
        # Combine mean and std data, prefixing std columns
        stats_std.columns = [f'{col}_std' for col in stats_std.columns]
        combined_stats = pd.concat([stats_mean, stats_std], axis=1)
        combined_stats['home_away'] = name[0]
        combined_stats['PLAYER_NAME'] = name[1]
        combined_stats['TEAM_NAME'] = name[2]
        combined_stats['OPPONENT_NAME'] = name[3]
        combined_stats['games_count'] = len(group)
        
        result_df = pd.concat([result_df, combined_stats], ignore_index=True)

    # Flatten MultiIndex columns if needed and reset index
    result_df = result_df.reset_index(drop=True)
    
    return result_df


# Apply the updated function to your data
opp_team_avgs = opp_team_averages(data)

# Sort by player name
opp_team_avgs = opp_team_avgs.sort_values(by='PLAYER_NAME')

print(opp_team_avgs.head())
print(opp_team_avgs.columns)


     PTS   FGM   FGA      FG3M       FG3A       FTM  FTA   AST      OREB  \
0   21.5   7.5  17.5  3.500000  10.000000  3.000000  3.0  10.0  2.000000   
28  21.0   6.0  14.0  2.000000   7.000000  7.000000  9.0  17.0  0.000000   
27  29.0  10.0  16.0  5.000000   9.000000  4.000000  4.5  16.0  0.500000   
26  18.5   6.5  12.0  3.000000   7.500000  2.500000  3.0   8.5  0.000000   
25  25.0  10.0  19.0  1.666667   6.666667  3.333333  4.0  11.0  1.666667   

        DREB  ...   REB_std   TOV_std   STL_std   BLK_std   MIN_std  \
0   6.000000  ...  2.828427  2.828427  2.828427  0.000000  3.535534   
28  2.000000  ...       NaN       NaN       NaN       NaN       NaN   
27  3.500000  ...  4.242641  0.000000  0.000000  0.000000  0.707107   
26  3.500000  ...  0.707107  0.000000  0.000000  0.707107  5.656854   
25  5.333333  ...  2.000000  1.527525  0.577350  0.577350  3.464102   

    home_away        PLAYER_NAME       TEAM_NAME           OPPONENT_NAME  \
0        Away  Tyrese Haliburton  Indian

In [106]:
#%%writefile player_analysis/metrics_functions.py
import pandas as pd
import numpy as np

def aggregate_stats(group, stats, n_games):
    """
    Calculate mean and standard deviation for the specified statistics over the last n games.
    
    Parameters:
    - group (DataFrame): The player's game log data.
    - stats (list): The list of statistics to aggregate.
    - n_games (int): Number of recent games to consider.

    Returns:
    - DataFrame: Aggregated statistics with mean and standard deviation.
    """
    last_n_games = group.tail(n_games)
    agg_funcs_mean = {stat: 'mean' for stat in stats}
    agg_funcs_std = {stat: 'std' for stat in stats}

    if len(last_n_games) > 1:
        stats_mean = last_n_games.agg(agg_funcs_mean).to_frame().T
        stats_std = last_n_games.agg(agg_funcs_std).to_frame().T
    else:
        stats_mean = last_n_games.agg(agg_funcs_mean).to_frame().T
        stats_std = pd.DataFrame(0, index=stats_mean.index, columns=stats_mean.columns)

    stats_std.columns = [f'{col}_std' for col in stats_std.columns]
    combined_stats = pd.concat([stats_mean, stats_std], axis=1)

    return combined_stats

def prepare_data(df, n_games=10, current_date=None, current_season=None):
    """
    Prepare aggregated data for players over the last n games up to the current date and within the current season,
    considering home/away context.

    Parameters:
    - df (DataFrame): The dataset containing player game logs.
    - n_games (int): Number of recent games to aggregate statistics for.
    - current_date (str): The current date in format 'YYYY-MM-DD' to filter games up to this date.
    - current_season (str): The current season identifier to filter games within the current season.

    Returns:
    - DataFrame: The aggregated data.
    """
    stats = ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE']
    
    # Filter based on current date and season, if specified
    if current_date:
        df = df[df['GAME_DATE'] <= current_date]
    if current_season:
        df = df[df['SEASON'] == current_season]

    grouped = df.groupby(['home_away', 'PLAYER_NAME', 'TEAM_NAME'])
    result_df = pd.DataFrame()

    for name, group in grouped:
        combined_stats = aggregate_stats(group, stats, n_games)
        combined_stats['home_away'] = name[0]
        combined_stats['PLAYER_NAME'] = name[1]
        combined_stats['TEAM_NAME'] = name[2]
        combined_stats['games_count'] = len(group.tail(n_games))
        result_df = pd.concat([result_df, combined_stats], ignore_index=True)

    result_df = result_df.reset_index(drop=True)
    return result_df


#load in data from player_game_logs_winr.csv
data = pd.read_csv('player_analysis/player_game_logs_winr.csv')

# Assuming 'data' is your DataFrame loaded from 'player_game_logs_winr.csv'
aggregated_data = prepare_data(data, n_games=10)
print(aggregated_data.columns)





Index(['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB',
       'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE',
       'OPPONENT_WIN_RATE', 'PTS_std', 'FGM_std', 'FGA_std', 'FG3M_std',
       'FG3A_std', 'FTM_std', 'FTA_std', 'AST_std', 'OREB_std', 'DREB_std',
       'REB_std', 'TOV_std', 'STL_std', 'BLK_std', 'MIN_std',
       'TEAM_WIN_RATE_std', 'OPPONENT_WIN_RATE_std', 'home_away',
       'PLAYER_NAME', 'TEAM_NAME', 'games_count'],
      dtype='object')


ODDs join


In [91]:

#Can we just get unique player names and team_name
uniques_player_teams = aggregated_data[['PLAYER_NAME', 'TEAM_NAME']].drop_duplicates()
#print(uniques_player_teams)

#filter for only Brandon Miller
#uniques_player_teams = uniques_player_teams[uniques_player_teams['PLAYER_NAME'] == 'Brandon Miller']
#print(uniques_player_teams.head())
#filter for odds_df
#odds_df = odds_df[odds_df['Player Name'] == 'Brandon Miller']
#print(odds_df.head())

# Merge the odds data with the player data
merged_data = pd.merge(odds_df, uniques_player_teams, left_on=['Player Name', 'Home Team'],  right_on=['PLAYER_NAME', 'TEAM_NAME'], how='left')
print(merged_data.head())
#print(merged_data.columns)

NameError: name 'odds_df' is not defined

In [None]:
#streamlit app that adjusts these to different values
# Configuration Variables
n_games_overall = 10  # Last 10 games for overall average
n_games_home = 5  # Last 5 home games for home average
n_games_away = 5  # Last 5 away games for away average
weight_overall = 1  # Weight for overall average
weight_home = 1.5  # Weight for home game average
weight_away = 1.5  # Weight for away game average
player = 'Brandon Miller'  # Player name to analyze



In [None]:
#%%writefile player_analysis/main.py

import streamlit as st


#load data
