Pull out the season stats

In [142]:
%%writefile modular/odds_api_pull.py
import requests
import pandas as pd
import datetime
import json
#********************odds api pull EXAMPLE********************************
# This is an example of how to use the odds API to fetch odds data for a specific market for a specific game to get columns as needed
# Define your API key and base URL
# api_key = ''
# Access your secret
# api_key = st.secrets["api_key"]
# base_url = 'https://api.the-odds-api.com/v4/sports/basketball_nba/events'

# # Set up parameters for the API call
# games_params = {
#     'apiKey': api_key,
#     'regions': 'us'
# }

# # Make the API call to fetch games
# games_response = requests.get(base_url, params=games_params)
# games_data = games_response.json()

# # Assuming you have games to work with, take the first game's ID
# if games_data:
#     game_id = games_data[0]['id']

#     # Now fetch odds for a specific market for this game
#     market = 'player_points'  # Example market
#     odds_url = f"{base_url}/{game_id}/odds"
#     odds_params = {
#         'apiKey': api_key,
#         'markets': market,
#         'regions': 'us'
#     }
#     odds_response = requests.get(odds_url, params=odds_params)
#     odds_data = odds_response.json()

#     # Print a formatted sample of the odds data
#     print(json.dumps(odds_data, indent=4))

#     # Now you can visually inspect the structure of the odds data
#     # and determine where the player names are located


#********************odds api pull EXAMPLE********************************


# Load the combined player data for today's date
df_combined = pd.read_csv('data/combined_data.csv')
df_combined['GAME_DATE'] = pd.to_datetime(df_combined['GAME_DATE']).dt.date
today = datetime.datetime.now().date()
df_filtered_combined = df_combined[df_combined['GAME_DATE'] == today]
players_today = df_filtered_combined['PLAYER_NAME'].unique()

#example for players_today
#players_today = ['Brandon Miller', 'Miles Bridges']

# Your API key
api_key = ''

# Define the base URL for The Odds API
base_url = 'https://api.the-odds-api.com/v4/sports'

# Example of the target markets
#nba_player_prop_markets = ['player_points']  # Simplified for demonstration
# Define the target markets
nba_player_prop_markets = [
    'player_points', 'player_rebounds', 'player_assists',
    'player_threes', 'player_blocks', 'player_steals',
    'player_blocks_steals', 'player_turnovers',
    'player_points_rebounds_assists', 'player_points_rebounds',
    'player_points_assists', 'player_rebounds_assists',
    'player_first_basket', 'player_double_double',
    'player_triple_double', 'player_points_alternate',
    'player_rebounds_alternate', 'player_assists_alternate',
    'player_blocks_alternate', 'player_steals_alternate',
    'player_threes_alternate', 'player_points_assists_alternate',
    'player_points_rebounds_alternate', 'player_rebounds_assists_alternate',
    'player_points_rebounds_assists_alternate'
]

# Initialize a list to store data for today's players across all games
betting_data_list = []

# Fetch the list of NBA games for today
games_url = f"{base_url}/basketball_nba/events"
games_params = {'apiKey': api_key, 'regions': 'us'}
games_response = requests.get(games_url, params=games_params)

if games_response.status_code == 200:
    games_data = games_response.json()

    # Loop through each game
    for game in games_data:
        game_id = game['id']
        commence_time = game['commence_time']
        
        # Loop through the target markets for each game
        for market in nba_player_prop_markets:
            odds_url = f"{base_url}/basketball_nba/events/{game_id}/odds"
            odds_params = {'apiKey': api_key, 'markets': market, 'regions': 'us'}
            odds_response = requests.get(odds_url, params=odds_params)

            if odds_response.status_code == 200:
                odds_data = odds_response.json()
                
                # Loop through each bookmaker's markets
                for bookmaker in odds_data.get('bookmakers', []):
                    for market_data in bookmaker.get('markets', []):
                        # Filter outcomes for players playing today
                        for outcome in market_data.get('outcomes', []):
                            if outcome.get('description') in players_today:
                                betting_data_list.append({
                                    'GAME_ID': game_id,
                                    'COMMENCE_TIME': pd.to_datetime(commence_time),
                                    'HOME_TEAM': game['home_team'],
                                    'AWAY_TEAM': game['away_team'],
                                    'PLAYER_NAME': outcome.get('description'),
                                    'MARKET': market_data.get('key'),
                                    'OVER_UNDER': outcome.get('name'),
                                    'PRICE': outcome.get('price'),
                                    'POINT': outcome.get('point'),
                                    'LAST_UPDATE': pd.to_datetime(market_data.get('last_update'))
                                })


# Convert the betting data list to a DataFrame
df_betting = pd.DataFrame(betting_data_list)

# Convert 'COMMENCE_TIME' and 'LAST_UPDATE' to datetime format explicitly
df_betting['COMMENCE_TIME'] = pd.to_datetime(df_betting['COMMENCE_TIME'], utc=True)
df_betting['LAST_UPDATE'] = pd.to_datetime(df_betting['LAST_UPDATE'], utc=True)

# Now attempt to remove timezone information
df_betting['COMMENCE_TIME'] = df_betting['COMMENCE_TIME'].apply(lambda x: x.replace(tzinfo=None))
df_betting['LAST_UPDATE'] = df_betting['LAST_UPDATE'].apply(lambda x: x.replace(tzinfo=None))

# Extract GAME_DATE from COMMENCE_TIME
df_betting['GAME_DATE'] = df_betting['COMMENCE_TIME'].dt.date

# Sort by 'Last Update' to ensure the most recent entries are first
df_betting.sort_values(by=['PLAYER_NAME', 'MARKET', 'OVER_UNDER', 'LAST_UPDATE'], ascending=[True, True, True, False], inplace=True)

# Drop duplicates to keep only the latest entry for each type of market per player
df_betting.drop_duplicates(subset=['PLAYER_NAME', 'MARKET', 'OVER_UNDER'], keep='first', inplace=True)


# Generate 'OVER_PRICE' and 'UNDER_PRICE' columns
df_betting['OVER_PRICE'] = df_betting.apply(lambda x: x['PRICE'] if x['OVER_UNDER'] == 'Over' else pd.NA, axis=1)
df_betting['UNDER_PRICE'] = df_betting.apply(lambda x: x['PRICE'] if x['OVER_UNDER'] == 'Under' else pd.NA, axis=1)

# Prepare separate DataFrames for Over and Under prices
df_over = df_betting[df_betting['OVER_UNDER'] == 'Over'][['PLAYER_NAME', 'GAME_DATE', 'MARKET', 'OVER_PRICE', 'POINT', 'HOME_TEAM', 'AWAY_TEAM']].copy()
df_under = df_betting[df_betting['OVER_UNDER'] == 'Under'][['PLAYER_NAME', 'GAME_DATE', 'MARKET', 'UNDER_PRICE', 'POINT', 'HOME_TEAM', 'AWAY_TEAM']].copy()


# Merge Over and Under DataFrames to have a single row per player prop
df_over_under = pd.merge(df_over, df_under, on=['PLAYER_NAME', 'GAME_DATE', 'MARKET', 'POINT', 'HOME_TEAM', 'AWAY_TEAM'], how='outer')


# Merge with player data to include 'TEAM_NAME'
df_final = pd.merge(df_over_under, df_filtered_combined[['PLAYER_NAME', 'TEAM_NAME']], on='PLAYER_NAME', how='left')

# Determine if the player is playing at home or away and who the opponent is
df_final['HOME_AWAY'] = df_final.apply(lambda x: 'Home' if x['TEAM_NAME'] == x['HOME_TEAM'] else 'Away', axis=1)
df_final['OPPONENT_NAME'] = df_final.apply(lambda x: x['AWAY_TEAM'] if x['HOME_AWAY'] == 'Home' else x['HOME_TEAM'], axis=1)

# Drop any unnecessary columns if needed and reset index
df_final = df_final.reset_index(drop=True)

# print(df_final.head())

# concat to the bottom of the csv file
with open('data/final_odds_api_pull.csv', 'a') as f:
    df_final.to_csv(f, header=f.tell()==0, index=False)

# df_final.to_csv('data/final_odds_api_pull.csv', index=False)


Overwriting modular/odds_api_pull.py


In [140]:
%%writefile parlay_app_test.py
import pandas as pd
import streamlit as st
import numpy as np
import os

#------------------------------------------Notes------------------------------------------
# This is a simple parlay betting app that allows users to select player prop bets and calculate potential payouts
# The app uses a CSV file to store parlays, and allows users to save, delete, and download parlays
# The app also uses session state to store selected parlays across Streamlit reruns
#todos:
# 1. Add in bets odds based on n games
#-----------------------------------------------------------------------------------------

# Initialize session state for selected parlays if it doesn't exist
if 'selected_parlays' not in st.session_state:
    st.session_state.selected_parlays = []

# Path to save the parlay bets CSV
parlay_bets_csv = 'data/parlay_bets.csv'

# Initialize or load existing parlays
if os.path.exists(parlay_bets_csv):
    parlays_df = pd.read_csv(parlay_bets_csv)
else:
    parlays_df = pd.DataFrame(columns=['Bet Info', 'Price'])


# Load data from CSV
df = pd.read_csv('data/final_odds_api_pull.csv') 

# Add alternate markets if they do not exist for every player
alternate_markets = {
    'player_rebounds_alternate': [4.5, 7.5, 10.5],
    'player_assists_alternate': [4.5, 7.5, 10.5],
    'player_points_alternate': [9.5, 19.5, 29.5],
}

for player in df['PLAYER_NAME'].unique():
    for game_date in df['GAME_DATE'].unique():
        for market, points_options in alternate_markets.items():
            for point in points_options:
                if not ((df['PLAYER_NAME'] == player) & (df['GAME_DATE'] == game_date) & (df['MARKET'] == market) & (df['POINT'] == point)).any():
                    # Add row with placeholder prices, adjust as necessary
                    df = pd.concat([df, pd.DataFrame.from_records([{
                        'PLAYER_NAME': player,
                        'GAME_DATE': game_date,
                        'MARKET': market,
                        'POINT': point,
                        'OVER_PRICE': np.nan,  # Placeholder
                        'UNDER_PRICE': np.nan,  # Placeholder
                        # Add other necessary columns with defaults
                    }])], ignore_index=True)

# UI components for date, team, and player selection
date = st.selectbox('Select Date:', df['GAME_DATE'].unique())
team = st.selectbox('Select Team:', ['All'] + list(df['TEAM_NAME'].unique()))
players = df[df['TEAM_NAME'] == team]['PLAYER_NAME'].unique() if team != 'All' else df['PLAYER_NAME'].unique()
player = st.selectbox('Select Player:', players)

# Display odds table for selected player
filtered_df = df[(df['GAME_DATE'] == date) & (df['PLAYER_NAME'] == player)]
st.table(filtered_df[['MARKET', 'POINT', 'OVER_PRICE', 'UNDER_PRICE']])

# Mechanism to ensure no duplicate stat types for a player
already_selected_stats = set()
for parlay in st.session_state.selected_parlays:
    bet_player, bet_market = parlay['Bet Info'].split(' - ')[0], parlay['Bet Info'].split(' - ')[1]
    if bet_player == player:
        stat_type = bet_market.split(' ')[0]  # Extracting base stat type (e.g., "player_points")
        already_selected_stats.add(stat_type)

# Filter out indices for stats that have already been selected
def is_stat_available(market):
    base_stat = market.split('_')[1]  # 'player_points' -> 'points'
    alternate_forms = [base_stat, f"{base_stat}_alternate"]
    return not any(alternate in already_selected_stats for alternate in alternate_forms)

available_indices = [i for i in filtered_df.index if is_stat_available(filtered_df.loc[i, 'MARKET'])]

# Select statistics for betting
selected_indices = st.multiselect("Select stats for betting:", available_indices, format_func=lambda x: f"{filtered_df.loc[x, 'MARKET']} at {filtered_df.loc[x, 'POINT']} points")

# Process selected bets
for i in selected_indices:
    row = df.loc[i]
    over_under = st.radio(f"{row['PLAYER_NAME']} - {row['MARKET']} at {row['POINT']} points: Choose Over or Under:", ('Over', 'Under'), key=f"over_under_{i}")
    
    default_price = row['OVER_PRICE'] if over_under == 'Over' else row['UNDER_PRICE']
    price = st.number_input(f"Enter price for {row['MARKET']} ({'Over' if over_under == 'Over' else 'Under'}):", value=float(default_price) if pd.notnull(default_price) else 0.01, key=f"price_input_{i}")
    
    bet_info = f"{row['PLAYER_NAME']} - {row['MARKET']} at {row['POINT']} points: {over_under} at price {price}"
    
    if st.button(f"Add '{bet_info}' to parlay", key=f"add_to_parlay_{i}"):
        st.session_state.selected_parlays.append({'Bet Info': bet_info, 'Price': price})
        st.success(f"Added to parlay: {bet_info}")
        already_selected_stats.add(row['MARKET'].split('_')[1])  # Update already selected stats

# Display current parlays from session state
if st.session_state.selected_parlays:
    st.write("Current Parlays:")
    current_parlays = pd.DataFrame(st.session_state.selected_parlays)
    st.table(current_parlays)

# Input for how much to bet, after parlays selection
bet_amount = st.number_input("Enter your total bet amount:", min_value=0.01, value=1.00, step=0.01, key='bet_amount')

# Calculate and display potential payout based on bet amount
if st.session_state.selected_parlays:
    total_odds = np.prod([float(parlay['Price']) for parlay in st.session_state.selected_parlays])
    potential_payout = total_odds * bet_amount
    st.markdown(f"**Potential payout from a ${bet_amount:,.2f} bet on current parlays: ${potential_payout:,.2f}**")  # Enhanced formatting


# Buttons for saving, deleting, and downloading parlays
if st.button('Save Current Parlays'):
    current_parlays = pd.DataFrame(st.session_state.selected_parlays)
    current_parlays.to_csv(parlay_bets_csv, index=False)
    st.success("Current parlays saved.")

if st.button('Delete All Parlays'):
    st.session_state.selected_parlays = []
    if os.path.exists(parlay_bets_csv):
        os.remove(parlay_bets_csv)
    st.success("All parlays deleted.")

# Calculate and display potential payout based on bet amount
if st.session_state.selected_parlays:
    total_odds = np.prod(current_parlays['Price'].astype(float))
    potential_payout = total_odds * bet_amount
    payout_text = f"Potential payout from a ${bet_amount:,.2f} bet on current parlays: ${potential_payout:,.2f}"
    st.markdown(f"**{payout_text}**")  # Use markdown for consistent font

if os.path.exists(parlay_bets_csv):
    with open(parlay_bets_csv, "rb") as file:
        st.download_button(label="Download Current Parlays as CSV", data=file, file_name="current_parlays.csv", mime="text/csv")

Overwriting parlay_app_test.py


In [23]:
%%writefile modular/player_game_logs.py
import pandas as pd
from datetime import datetime, timedelta
from nba_api.stats.endpoints import commonallplayers, playergamelog, leaguedashplayerstats, leaguegamefinder
from nba_api.stats.static import teams
import time
import numpy as np


def get_current_nba_season_year():
    current_date = datetime.now()
    if current_date.month > 9:  # NBA season starts in October
        return str(current_date.year) + "-" + str(current_date.year + 1)[2:]
    else:
        return str(current_date.year - 1) + "-" + str(current_date.year)[2:]

def calculate_cumulative_win_rates(season):
    try:
        # Adjust the season start date based on the typical NBA season start dates
        season_start_date = season.split('-')[0] + "-10-01"  # Assuming October 1st as a generic start date
        all_games = leaguegamefinder.LeagueGameFinder(season_nullable=season).get_data_frames()[0]
        all_games['GAME_DATE'] = pd.to_datetime(all_games['GAME_DATE'])
        all_games = all_games[all_games['GAME_DATE'] > pd.to_datetime(season_start_date)]
        all_games = all_games.sort_values('GAME_DATE')
        all_games['WIN'] = all_games['WL'].apply(lambda x: 1 if x == 'W' else 0)
        all_games['CUMULATIVE_WINS'] = all_games.groupby('TEAM_NAME')['WIN'].cumsum()
        all_games['CUMULATIVE_GAMES'] = all_games.groupby('TEAM_NAME').cumcount() + 1
        all_games['CUMULATIVE_WIN_RATE'] = all_games['CUMULATIVE_WINS'] / all_games['CUMULATIVE_GAMES']
        return all_games
    except Exception as e:
        print(f"Error calculating cumulative win rates: {e}")
        return pd.DataFrame()
    

def get_win_rate(row, team_type, all_games):
    game_date = row['GAME_DATE']
    team_name = row[team_type]
    team_games = all_games[(all_games['TEAM_NAME'] == team_name) & (all_games['GAME_DATE'] < game_date)]
    if not team_games.empty:
        return team_games.iloc[-1]['CUMULATIVE_WIN_RATE']
    else:
        return 0.0


def load_nba_player_game_logs(seasons, min_avg_minutes=30.0, save_path='data/player_game_logs.csv'):
    if not isinstance(seasons, list):
        seasons = [seasons]

    new_players_data = pd.DataFrame()

    for season in seasons:
        print(f"Processing season {season}...")
        try:
            all_players = commonallplayers.CommonAllPlayers(is_only_current_season=0).get_data_frames()[0]
            player_stats = leaguedashplayerstats.LeagueDashPlayerStats(season=season).get_data_frames()[0]
        except Exception as e:
            print(f"Error fetching player stats for season {season}: {e}")
            continue

        # Filter for players who meet the minimum average minutes threshold
        player_stats['AVG_MIN'] = player_stats['MIN'] / player_stats['GP']
        eligible_players = player_stats[player_stats['AVG_MIN'] >= min_avg_minutes]
        teams_list = teams.get_teams()
        team_abbrev_to_full_name = {team['abbreviation']: team['full_name'] for team in teams_list}

        all_games = calculate_cumulative_win_rates(season)
        if all_games.empty:
            print("Skipping win rate calculation due to an error.")
            continue

        for index, player in eligible_players.iterrows():
            try:
                player_id = player['PLAYER_ID']
                player_name = player['PLAYER_NAME']
                player_log = playergamelog.PlayerGameLog(player_id=player_id, season=season)
                player_data = player_log.get_data_frames()[0]
                if player_data.empty:
                    continue
                player_data['PLAYER_NAME'] = player_name
                player_data['TEAM_ABBREVIATION'] = player_data['MATCHUP'].str[:3]
                player_data['OPPONENT_ABBREVIATION'] = player_data['MATCHUP'].apply(lambda x: x.split(' ')[2] if 'vs.' in x else x.split(' ')[-1])
                player_data['TEAM_NAME'] = player_data['TEAM_ABBREVIATION'].map(team_abbrev_to_full_name)
                player_data['OPPONENT_NAME'] = player_data['OPPONENT_ABBREVIATION'].map(team_abbrev_to_full_name)
                player_data = player_data[player_data['TEAM_ABBREVIATION'].isin(team_abbrev_to_full_name.keys())]
                new_players_data = pd.concat([new_players_data, player_data], ignore_index=True)
            except Exception as e:
                print(f"Error processing player {player_name}: {e}")
                continue
            time.sleep(0.6)  # To respect rate limits

        # Calculate team and opponent win rates
        new_players_data['GAME_DATE'] = pd.to_datetime(new_players_data['GAME_DATE'])
        new_players_data['TEAM_WIN_RATE'] = new_players_data.apply(lambda row: get_win_rate(row, 'TEAM_NAME', all_games), axis=1)
        new_players_data['OPPONENT_WIN_RATE'] = new_players_data.apply(lambda row: get_win_rate(row, 'OPPONENT_NAME', all_games), axis=1)

        new_players_data['HOME_AWAY'] = new_players_data['MATCHUP'].str.split(' ').str[1]
        new_players_data['HOME_AWAY'] = new_players_data['HOME_AWAY'].apply(lambda x: 'Away' if '@' in x else 'Home')
        new_players_data.reset_index(drop=True)
    if not new_players_data.empty:
        new_players_data.to_csv(save_path, index=False)
        print(f"Player game logs saved to {save_path}")
    else:
        print("No player game logs to save.")



# Example usage
#seasons = ['2022-23']  # You can adjust seasons as needed
#load_nba_player_game_logs(seasons, min_avg_minutes=30.0, save_path='data/player_game_logs_winr.csv')
        
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from nba_api.stats.static import teams

def prepare_upcoming_games_data(season_games_csv, player_game_logs_csv, expand_with_players=False):
    # Load season games data
    data = pd.read_csv(season_games_csv)
    
    # Process home and away data
    home_data = data[['DATE', 'Start (ET)', 'Home/Neutral']].copy()
    home_data['HOME_AWAY'] = 'Home'
    home_data['MATCHUP'] = home_data['Home/Neutral'] + ' vs. ' + data['Visitor/Neutral']
    home_data.rename(columns={'Home/Neutral': 'Team'}, inplace=True)
    home_data['WL_encoded'] = np.nan
    
    away_data = data[['DATE', 'Start (ET)', 'Visitor/Neutral']].copy()
    away_data['HOME_AWAY'] = 'Away'
    away_data['MATCHUP'] = away_data['Visitor/Neutral'] + ' @ ' + home_data['Team']  # Adjusted to use '@' for away games
    away_data.rename(columns={'Visitor/Neutral': 'Team'}, inplace=True)
    away_data['WL_encoded'] = np.nan
    
    final_data = pd.concat([home_data, away_data], ignore_index=True)
    final_data.sort_values(by=['DATE', 'Start (ET)', 'HOME_AWAY'], inplace=True)
    final_data.reset_index(drop=True, inplace=True)
    
    # Convert 'DATE' column to datetime format
    final_data['DATE'] = pd.to_datetime(final_data['DATE'], format='%a, %b %d, %Y')
    
    # Get unique team information from the NBA API
    teams_info = teams.get_teams()
    teams_df = pd.DataFrame(teams_info)
    teams_df.rename(columns={'id': 'TEAM_ID', 'full_name': 'TEAM_NAME', 'abbreviation': 'TEAM_ABBREVIATION'}, inplace=True)
    
    # Merge final_data with teams_df to include TEAM_ID and abbreviations
    final_data = pd.merge(final_data, teams_df[['TEAM_NAME', 'TEAM_ID', 'TEAM_ABBREVIATION']], left_on='Team', right_on='TEAM_NAME', how='left')
    
    # Ensure all team names in MATCHUP are abbreviations
    for index, row in final_data.iterrows():
        for _, team_row in teams_df.iterrows():
            final_data.at[index, 'MATCHUP'] = final_data.at[index, 'MATCHUP'].replace(team_row['TEAM_NAME'], team_row['TEAM_ABBREVIATION'])
    
    # Extract and filter for upcoming games
    today = pd.Timestamp.now().floor('D')  # Normalize to avoid time part
    week_out = today + timedelta(days=7)
    upcoming_games = final_data[(final_data['DATE'] >= today) & (final_data['DATE'] <= week_out)]
    upcoming_games.sort_values(by='DATE', inplace=True)
    upcoming_games.reset_index(drop=True, inplace=True)
    
    # Format the 'DATE' column to match the example output's 'GAME_DATE' format
    upcoming_games['GAME_DATE'] = upcoming_games['DATE'].dt.strftime('%Y-%m-%d')
    
    # Correct the column name for consistency
    upcoming_games.rename(columns={'HOME_AWAY': 'HOME_AWAY'}, inplace=True)
    
    # Drop unnecessary columns and adjust to match the target dataset structure
    upcoming_games = upcoming_games[['GAME_DATE', 'MATCHUP', 'HOME_AWAY', 'TEAM_ID', 'TEAM_NAME']]
    
    # Create OPPOSING_TEAM column
    upcoming_games['OPPONENT_NAME'] = np.nan  # Placeholder for opposing team names
    
    # Populate TEAM_NAME and OPPOSING_TEAM with correct names
    for index, row in upcoming_games.iterrows():
        if row['HOME_AWAY'] == 'Home':
            # If it's a home game, the home team is TEAM_NAME and the visitor team is OPPOSING_TEAM
            home_team_abbr = row['MATCHUP'].split(' vs. ')[0]
            away_team_abbr = row['MATCHUP'].split(' vs. ')[1]
        else:
            # If it's an away game, the visitor team is TEAM_NAME and the home team is OPPOSING_TEAM
            away_team_abbr = row['MATCHUP'].split(' @ ')[0]
            home_team_abbr = row['MATCHUP'].split(' @ ')[1]

        home_team_full_name = teams_df[teams_df['TEAM_ABBREVIATION'] == home_team_abbr]['TEAM_NAME'].values[0]
        away_team_full_name = teams_df[teams_df['TEAM_ABBREVIATION'] == away_team_abbr]['TEAM_NAME'].values[0]

        upcoming_games.at[index, 'TEAM_NAME'] = home_team_full_name if row['HOME_AWAY'] == 'Home' else away_team_full_name
        upcoming_games.at[index, 'OPPONENT_NAME'] = away_team_full_name if row['HOME_AWAY'] == 'Home' else home_team_full_name


    # Load player game logs to use for fetching rosters
    player_game_logs = pd.read_csv(player_game_logs_csv)
    print(player_game_logs.columns)
    print(upcoming_games.columns)
    
    if expand_with_players:
        expanded_games_with_players = pd.DataFrame()

        # Assuming player_game_logs_csv is correctly loaded into player_game_logs DataFrame
        player_game_logs = pd.read_csv('data\player_game_logs_winr.csv')

        expanded_rows = []

        for _, game in upcoming_games.iterrows():
            team_name = game['TEAM_NAME']
            team_players = player_game_logs[player_game_logs['TEAM_NAME'] == team_name]

            for _, player in team_players.iterrows():
                expanded_row = game.copy().to_dict()
                expanded_row['Player_ID'] = player['Player_ID']
                expanded_row['PLAYER_NAME'] = player['PLAYER_NAME']
                expanded_rows.append(expanded_row)

        expanded_games_with_players = pd.DataFrame(expanded_rows)

        #drop duplicate players and game_dates
        expanded_games_with_players = expanded_games_with_players.drop_duplicates(subset=['GAME_DATE', 'PLAYER_NAME'], keep='first')

        # only include these columns: ['GAME_DATE', 'MATCHUP', 'HOME_AWAY', 'TEAM_NAME','OPPOSING_TEAM', 'Player_ID', 'PLAYER_NAME']
        expanded_games_with_players = expanded_games_with_players[['GAME_DATE', 'MATCHUP', 'HOME_AWAY', 'TEAM_NAME', 'OPPONENT_NAME', 'Player_ID', 'PLAYER_NAME']]
        
        # Return the expanded DataFrame
        return expanded_games_with_players


    return upcoming_games

# Example usage with file paths
#season_games_csv = 'data/23_24_season_games.csv'
#player_game_logs_csv = 'data/player_game_logs_winr.csv'
#upcoming_games_df = prepare_upcoming_games_data(season_games_csv, player_game_logs_csv, expand_with_players=True)
#print(upcoming_games_df.head())
#print(len(upcoming_games_df))


#Final data prepare
import pandas as pd





Overwriting modular/player_game_logs.py


In [2]:
%%writefile modular/metrics_functions.py
import pandas as pd
import numpy as np

def calculate_running_stats(group, stats):
    """
    Calculate running mean and standard deviation for the specified statistics.
    """
    # Calculate mean and std for the last n games
    running_means = group[stats].mean()
    running_stds = group[stats].std(ddof=0)  # ddof=0 for population standard deviation
    return running_means, running_stds

def prepare_mean_std_data(df, n_games=10, current_date=None, current_season=None, game_location='All'):
    """
    Prepare aggregated data for players over the last n games up to the current date and within the current season,
    considering home/away context.
    """
    stats = ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE']
    
    if current_date:
        df = df[df['GAME_DATE'] <= current_date]
    if current_season:
        df = df[df['SEASON'] == current_season]
    if game_location in ['Home', 'Away']:
        df = df[df['HOME_AWAY'] == game_location]

    grouped = df.groupby(['PLAYER_NAME', 'TEAM_NAME'])
    result_list = []

    for (player_name, team_name), group in grouped:
        group = group.sort_values(by='GAME_DATE', ascending=False).head(n_games)
        mean_values, std_values = calculate_running_stats(group, stats)
        
        mean_values['TYPE'] = 'mean_'+str(n_games)+'_games'
        std_values['TYPE'] = 'std_'+str(n_games)+'_games'
        mean_values['PLAYER_NAME'] = player_name
        mean_values['TEAM_NAME'] = team_name
        mean_values['HOME_AWAY'] = game_location
        
        std_values['PLAYER_NAME'] = player_name
        std_values['TEAM_NAME'] = team_name
        std_values['HOME_AWAY'] = game_location

        result_list.append(mean_values)
        result_list.append(std_values)

    result_df = pd.DataFrame(result_list).reset_index(drop=True)
    return result_df


# Example usage
# Load in data
data = pd.read_csv('data/player_game_logs_winr.csv')
# Filter for a specific player, e.g., Cade Cunningham
data = data[data['PLAYER_NAME'] == 'Cade Cunningham']
# Assuming 'data' is your DataFrame loaded from 'player_game_logs_winr.csv'
aggregated_data = prepare_mean_std_data(data, n_games=10, game_location='Home')
#print(aggregated_data.head())
#print(aggregated_data.columns)

def calculate_league_stats(df, stats, n_games=10, current_date=None, current_season=None, game_location='All'):
    """
    Calculate league-wide standard deviation for the specified statistics over the last n games,
    considering the filters applied for date, season, and location.
    """
    if current_date:
        df = df[df['GAME_DATE'] <= current_date]
    if current_season:
        df = df[df['SEASON'] == current_season]
    if game_location in ['Home', 'Away']:
        df = df[df['HOME_AWAY'] == game_location]

    std_values = df[stats].std(ddof=0)  # Using population standard deviation

    # Correctly creating a DataFrame with the intended structure
    std_values_df = pd.DataFrame([std_values.values], columns=stats)  # Wrap in a list to create a single row DataFrame
    std_values_df['TYPE'] = 'league_std_' + str(n_games) + '_games'
    std_values_df['PLAYER_NAME'] = 'League'
    std_values_df['TEAM_NAME'] = 'All'
    std_values_df['HOME_AWAY'] = game_location

    return std_values_df


def prepare_league_std_data(df, n_games=10, current_date=None, current_season=None, game_location='All'):
    """
    Prepare league-wide aggregated standard deviation data over the last n games up to the current date and within the current season,
    considering home/away context.
    """
    stats = ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE']
    league_stats = calculate_league_stats(df, stats, n_games, current_date, current_season, game_location)
    
    result_df = pd.DataFrame(league_stats).reset_index(drop=True)
    return result_df

# Example usage
# Assuming 'data' is your DataFrame loaded from 'player_game_logs_winr.csv'
#league_data = prepare_league_std_data(data, n_games=10, game_location='Home')
#print(league_data.head())

#example concatenated data
# Concatenate league_data and aggregated_data
#combined_data = pd.concat([league_data, aggregated_data], ignore_index=True)

# Check the first few rows of the combined dataframe to ensure it looks correct
#print(combined_data.head())

# Optionally, check the structure and summary of the combined dataframe
#print(combined_data.info())


def prepare_performance_against_all_teams(df):
    """
    Prepare aggregated data for each player against each team they've played against in the dataset.
    
    Parameters:
    - df (DataFrame): The dataset containing player game logs.
    
    Returns:
    - DataFrame: The aggregated data with running averages for each player against each team.
    """
    stats = ['PTS', 'FGM', 'FGA', 'FG3M', 'FG3A', 'FTM', 'FTA', 'AST', 'OREB', 'DREB', 'REB', 'TOV', 'STL', 'BLK', 'MIN', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE']
    unique_players = df['PLAYER_NAME'].unique()
    unique_teams = df['OPPONENT_NAME'].unique()
    
    result_list = []

    for player in unique_players:
        for team in unique_teams:
            player_games = df[(df['PLAYER_NAME'] == player) & (df['OPPONENT_NAME'] == team)]
            
            if not player_games.empty:
                mean_values, _ = calculate_running_stats(player_games, stats)
                mean_values['PLAYER_NAME'] = player
                mean_values['OPPONENT_NAME'] = team
                mean_values['TYPE'] = f'mean_vs_{team}'
                result_list.append(mean_values)
    
    result_df = pd.DataFrame(result_list).reset_index(drop=True)
    return result_df

#Example usage
#performance_against_all_teams = prepare_performance_against_all_teams(data)
#print(performance_against_all_teams)




Overwriting modular/metrics_functions.py


In [156]:
%%writefile app.py
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from modular.player_game_logs import load_nba_player_game_logs, prepare_upcoming_games_data
from modular.metrics_functions import prepare_mean_std_data, prepare_league_std_data, prepare_performance_against_all_teams
from modular.betting_functions import calculate_probability, calculate_bet_outcome, generate_betting_options, evaluate_bets, evaluate_bets_n_games_debug
import os

#file paths
prev_data_file_path = os.path.join('data', 'player_game_logs_winr.csv')
upcoming_games_file_path = os.path.join('data', '23_24_season_games.csv')



# Add a new section in your sidebar for navigation
st.sidebar.header("Navigation")
page = st.sidebar.radio("Select a page:", ["Player Analysis", "Forecasting Player Statistics"])


#------------Loading data with caching---------------
st.sidebar.header("Refresh Data for Select Season Year with Players at the Minimum Average Minutes Played")
# Season Year Selection slider
season_years = ['2022-23', '2023-24', '2024-25']  # Update this list with available seasons
selected_season = st.sidebar.selectbox('Select Season Year', season_years)

# min avg selection
min_avg_minutes =st.sidebar.slider('Minimum Average Minutes Played', min_value=1, max_value=60, value=20, step=1)

# Option to reload data
if st.sidebar.button('Load/Refresh Data'):
    load_nba_player_game_logs([selected_season], min_avg_minutes=min_avg_minutes, save_path=prev_data_file_path)
    st.sidebar.success(f"Data for the {selected_season} season loaded successfully.")

# Loading data with caching
@st.cache(ttl=3600, max_entries=10, show_spinner=False)
def load_data():
    data = pd.read_csv(prev_data_file_path)
    data['GAME_DATE'] = pd.to_datetime(data['GAME_DATE'])
    data.sort_values(by='GAME_DATE', inplace=True)
    return data

#pull in upcoming games to concatenate to data and input averages onto it
upcoming_games = prepare_upcoming_games_data(upcoming_games_file_path, prev_data_file_path, expand_with_players=True)

# Load the existing games data
previous_games = load_data()

# Ensure GAME_DATE is in datetime format for comparison
upcoming_games['GAME_DATE'] = pd.to_datetime(upcoming_games['GAME_DATE'])

# Filter out upcoming games that have dates already in previous games
unique_upcoming_games = upcoming_games[~upcoming_games['GAME_DATE'].isin(previous_games['GAME_DATE'])]

# Concatenate the unique upcoming games to the previous games dataset
data = pd.concat([previous_games, unique_upcoming_games], ignore_index=True)

# Sort the concatenated data by GAME_DATE to maintain chronological order
data.sort_values(by='GAME_DATE', inplace=True)

# Reset the index of the concatenated DataFrame
data.reset_index(drop=True, inplace=True)
#------------Loading data with caching---------------

# Use if-else to control the page display based on the sidebar selection
if page == "Player Analysis":
    # Title of the app
    st.title("NBA Player Game Logs and Statistical Insights")

    # Sidebar for user inputs
    st.sidebar.header("User Input Features")

    #------------Date/Player data---------------


    # Unique Data
    unique_dates = data['GAME_DATE'].dt.strftime('%Y-%m-%d').unique()
    #players = data['PLAYER_NAME'].unique()
    games = data['MATCHUP'].unique()

    # App functions
    # --------Date Selection----------
    selected_date = st.sidebar.selectbox('Select a Date', unique_dates)
    # to provide data for the selected date
    current_data = data[data['GAME_DATE'] == pd.to_datetime(selected_date)]
    players = current_data['PLAYER_NAME'].unique()
    # --------Player Search Selection----------
    # Use a text input for search instead of a dropdown
    search_query = st.sidebar.text_input("Search Player Name")
    # Filter the list of players based on the search query
    filtered_players = [player for player in players if search_query.lower() in player.lower()]
    # If there are too many matches, you might want to limit the number displayed or adjust UI accordingly
    if len(filtered_players) > 1:
        st.sidebar.write("Please refine your search to see the results")
    # --------Player Dropdown Selection----------
    # Allow the user to select a player from the filtered list
    selected_player = st.sidebar.selectbox("Select a Player", filtered_players)

    # Datasets to use
    current_data = data[data['GAME_DATE'] == pd.to_datetime(selected_date)]
    player_data = data[data['PLAYER_NAME'] == selected_player]
    player_date_data = player_data[player_data['GAME_DATE'] == pd.to_datetime(selected_date)]
    current_stats_data = data[data['GAME_DATE'] <= selected_date]
    total_games_played = current_stats_data[current_stats_data['PLAYER_NAME'] == selected_player].shape[0]


    if not player_data.empty:
        game_location = 'Home' if player_data['HOME_AWAY'].iloc[0] == 'Home' else 'Away'
        game_opposing_team = player_data['OPPONENT_NAME'].iloc[0]
        st.write(f"Data for {selected_player} ({game_location} game) against {game_opposing_team} on {selected_date}:")
        #st.dataframe(player_data[['GAME_DATE', 'TEAM_NAME', 'HOME_AWAY', 'PLAYER_NAME', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE', 'PTS', 'FG3M', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'MIN']])
    else:
        st.write(f"No data available for {selected_player} on {selected_date}.")


    # Filter data for the selected date
    current_stats_data = data[data['GAME_DATE'] <= selected_date]
    # Total games played by the player in the dataset
    total_games_played = current_stats_data[current_stats_data['PLAYER_NAME'] == selected_player].shape[0]
    # Display total games played
    print(f"Total games played by {selected_player} in the dataset: {total_games_played}")

    # Computing averages and league standard deviation
    # (Ensure functions like prepare_mean_std_data and prepare_league_std_data are correctly implemented)
    total_averages_data = prepare_mean_std_data(current_stats_data, n_games=total_games_played, game_location='All') #, current_date=current_date
    n_game_aggregated_data_all = prepare_mean_std_data(current_stats_data, n_games=10, game_location='All') #, current_date=current_date
    n_game_aggregated_data_home_or_away = prepare_mean_std_data(current_stats_data, n_games=10, game_location=game_location) #, current_date=current_date
    league_std_data = prepare_league_std_data(current_stats_data, n_games=10, game_location=game_location)

    # Performance against all teams
    performance_against_all_teams = prepare_performance_against_all_teams(current_stats_data)
    #filter against opposing_team
    performance_against_all_teams = performance_against_all_teams[performance_against_all_teams['OPPONENT_NAME'] == game_opposing_team]

    performance_against_all_teams = performance_against_all_teams.drop(columns=['OPPONENT_NAME'])
    #print(performance_against_all_teams.head())

    # Concatenate data for analysis
    combined_data = pd.concat([total_averages_data, n_game_aggregated_data_all, n_game_aggregated_data_home_or_away, league_std_data, performance_against_all_teams], ignore_index=True)

    # Filter for selected player and league standard
    combined_data_filtered = combined_data[combined_data['PLAYER_NAME'].isin([selected_player, 'League'])]
    #print(combined_data_filtered[['PLAYER_NAME', 'PTS', 'TYPE', 'HOME_AWAY', 'TEAM_NAME']])

    st.write(f"Total games played by {selected_player} in the dataset: {total_games_played}")
    st.dataframe(combined_data_filtered[['TEAM_NAME', 'HOME_AWAY', 'PLAYER_NAME', 'TYPE', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE', 'PTS', 'FG3M', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'MIN']])

    # Move Statistic Selection to Main Body
    stats_options = ['PTS', 'REB', 'AST', 'STL', 'BLK']  # Extend with more stats as needed
    selected_stat = st.selectbox('Select a Statistic for Graph', stats_options)

    # Graph Visualization
    fig, ax = plt.subplots()
    player_season_data = data[data['PLAYER_NAME'] == selected_player]
    ax.plot(player_season_data['GAME_DATE'], player_season_data[selected_stat], marker='o', linestyle='-', label=selected_stat)
    ax.set_title(f"{selected_stat} Trend for {selected_player} During the Season")
    ax.set_xlabel('Game Date')
    ax.set_ylabel(selected_stat)
    plt.xticks(rotation=45)
    plt.legend()
    st.pyplot(fig)

    # Betting Analysis Section
    st.header("Betting Analysis")

    # Ensure that player_season_data is filtered for the selected player
    player_season_data = combined_data_filtered[combined_data_filtered['PLAYER_NAME'] == selected_player]


    #Testing the generate_betting_options filter-------------------------------------------------------------------------------------

    # Ensure GAME_DATE is in datetime format
    player_data['GAME_DATE'] = pd.to_datetime(player_data['GAME_DATE'])

    # Filter for selected date directly
    selected_date_dt = pd.to_datetime(selected_date)

    # Get today's date as a datetime object at midnight
    today = datetime.now()
    today = pd.to_datetime(today)
    yesterday = today - timedelta(days=1)
    yesterday = pd.to_datetime(yesterday)

    # Filter for selected date
    # If the selected date is today or in the future, filter data up to yesterday
    if selected_date_dt >= today:
        print("Selected date is today or in the future.")
        player_data_filt = player_data[player_data['GAME_DATE'] <= yesterday]
    # If the selected date is before today, filter for the selected day
    else:
        print("Selected date is before today.")
        player_data_filt = player_data[player_data['GAME_DATE'] <= selected_date_dt]

    # print("Filtered player data for selected date:", player_data_filt)

    #Testing the generate_betting_options filter-------------------------------------------------------------------------------------



    # Sidebar for interactive parameters
    n_games = st.sidebar.slider('Number of Games', min_value=1, max_value=60, value=10)
    league_std_rate = st.sidebar.slider('League Standard Deviation Above Rate', min_value=0.0, max_value=1.0, value=0.9, step=0.01)
    probability_high = st.sidebar.slider('High Probability Threshold', min_value=0.0, max_value=1.0, value=0.9, step=0.01)
    probability_low = st.sidebar.slider('Low Probability Threshold', min_value=0.0, max_value=1.0, value=0.1, step=0.01)

    with st.form("betting_form"):
        selected_stat_for_bet = st.selectbox('Select Statistic for Betting', stats_options)
        bet_stat_projection = st.number_input('Enter Bet Stat Projection', value=0.0, format="%.2f")
        bet_amount = st.number_input('Enter Bet Amount ($)', value=0.0, format="%.2f")
        odds = st.number_input('Enter Odds (American format, e.g., +150 or -150)', value=100)
        submit_bet = st.form_submit_button("Calculate")

    # When calling calculate_probability
    if submit_bet and not player_data.empty:
        probability, against_team_probability, number_of_games_against_team, player_std, std_dev_comparison, league_std, number_of_games_above_projection, number_of_games = calculate_probability(
            player_data, selected_stat_for_bet, bet_stat_projection, league_std_data, n_games, league_std_rate, game_opposing_team
        )

        
        # Call calculate_bet_outcome to get expected_profit and expected_loss
        expected_profit, expected_loss, probability_weighted_to_profit = calculate_bet_outcome(bet_amount, odds, probability)

        # Now you can correctly display the expected profit and loss
        st.write(f"Probability of achieving projection: {probability*100:.2f}% out of {number_of_games} games")
        if against_team_probability is not None:
            st.write(f"Probability against {game_opposing_team}: {against_team_probability*100:.2f}% with {number_of_games_against_team} games above projection")
        st.write(f"Player's Std Dev: {player_std:.2f}, Better than league's by 10%: {'Yes' if std_dev_comparison else 'No'}")

        # Your recommendation logic
        if probability >= probability_high and std_dev_comparison:
            recommendation = "Betting above the projection might be more favorable due to high probability and player's consistency."
        elif probability <= probability_low and std_dev_comparison:
            recommendation = "Betting below the projection might be more favorable due to low probability and player's consistency."
        else:
            recommendation = "Consider other options or exercise caution due to lower probability or player's inconsistency."
        st.write(recommendation)

        # Correctly output the expected profit and loss
        st.write(f"Expected Profit if Win: ${expected_profit:.2f}, Expected Loss if Lose: -${expected_loss:.2f}")
        st.write(f"Weighted Probability to Profit: {probability_weighted_to_profit:.2f}")

        st.write(f"Total games played by {selected_player} in the dataset: {total_games_played}")
        st.dataframe(combined_data_filtered[['TEAM_NAME', 'HOME_AWAY', 'PLAYER_NAME', 'TYPE', 'TEAM_WIN_RATE', 'OPPONENT_WIN_RATE', 'MIN', selected_stat_for_bet]])

        #print("Preview of player_data:")
        #print(player_data.head())
        #print("Preview of league_std_data:")
        #print(league_std_data.head())

        betting_options_df = generate_betting_options(
            player_data_filt, league_std_data, selected_player, game_opposing_team, 
            all_players=False, n_games=n_games, league_std_rate=league_std_rate, 
            probability_high=probability_high, probability_low=probability_low
        )

        #print("betting_options_df.head()=", betting_options_df.head())
        #filter for Stats
        print("selected stat=", selected_stat_for_bet)
        print("betting option columns =", betting_options_df.columns)
        #print("betting option head =", betting_options_df.head(1))
        #betting_options_df = betting_options_df[betting_options_df['Stat'] == selected_stat_for_bet]
        print("betting option columns =", betting_options_df.columns)
        #print("betting option head =", betting_options_df.head())
        st.dataframe(betting_options_df)

    # Ensure the correct datetime format and sort order
    print("Step 1: Data Preparation Completed")

    # Step 2: Generate Betting Options for all historical data
    betting_options_df = generate_betting_options(
        player_data_filt, league_std_data, selected_player, game_opposing_team, all_players=True, n_games=n_games, league_std_rate=league_std_rate, 
        probability_high=probability_high, probability_low=probability_low)
    print(f"Step 2: Generated {len(betting_options_df)} betting options for all historical data.")
    

    # Step 3: Filter Betting Options for the selected date
    selected_date_dt = pd.to_datetime(selected_date)
    if not betting_options_df.empty:
        betting_options_df_selected_date = betting_options_df[betting_options_df['GAME_DATE'] == selected_date_dt]
        print(f"Step 3: Filtered {len(betting_options_df_selected_date)} betting options for the selected date ({selected_date}):")
        #print(betting_options_df_selected_date[['PLAYER_NAME', 'Stat', 'Threshold', 'GAME_DATE']])
        st.dataframe(betting_options_df_selected_date)

        # Step 4: Evaluate Bets for an overall evaluation based on the last n games
        unique_dates = betting_options_df['GAME_DATE'].unique()
        if len(unique_dates) > n_games:
            unique_dates_n_games = unique_dates[-n_games:]
            betting_options_df_n_games = betting_options_df[betting_options_df['GAME_DATE'].isin(unique_dates_n_games)]
        else:
            betting_options_df_n_games = betting_options_df

        evaluated_bets_df = evaluate_bets(betting_options_df_n_games, player_data_filt)
        print("Step 4: Evaluated bets based on the last {n_games} games.")

        # Debugging prints for evaluated bets
        print(f"Evaluated Bets DataFrame has {len(evaluated_bets_df)} rows after evaluation.")

        #filter out NaN 
        evaluated_bets_df = evaluated_bets_df.dropna(subset=['Bet Outcome'])

        #get min and max
        min_date = evaluated_bets_df['GAME_DATE'].min()
        max_date = evaluated_bets_df['GAME_DATE'].max()

        # Display the last values of Running Correct and Running Incorrect
        if not evaluated_bets_df.empty:
            final_corrects = evaluated_bets_df.iloc[-1]['Running Correct']
            final_incorrects = evaluated_bets_df.iloc[-1]['Running Incorrect']
            overall_bets = final_corrects + final_incorrects
            print(f"Final Corrects: {final_corrects}, Final Incorrects: {final_incorrects}")
        else:
            print("No bets evaluated.")
        
        #Display Percentage of correct bets through Running Correct and Running Incorrect
        if final_corrects + final_incorrects > 0:
            correct_percentage = final_corrects / overall_bets
            st.write(f"Percentage of correct bets: {correct_percentage*100:.2f}% out of {overall_bets} chances over the last {n_games} games ({min_date} to {max_date})")
        else:
            st.write("No bets evaluated yet.")
    else:
        print("No betting options generated yet.")
        st.write("No betting options generated yet.")




#-------------------------------New Tab for All Around Dashboard for Betting--------------------------------
#elif page == "Overall Betting Details":
    #changes:
    #1. add in a team drop down
    #2. ensure these dropdowns go back to normal when nothings selected
    #3. add in parlay betting, so it should save each bet chosen unless user deletes the bets and will add the parlay automatically
    # New section for Betting Details
# ...

elif page == "Forecasting Player Statistics":
    st.header("Statistic Details")
    st.title("NBA Players Forecasted Statistics")

    # Sidebar selections
    unique_dates = data['GAME_DATE'].dt.strftime('%Y-%m-%d').unique()
    selected_date = st.sidebar.selectbox('Select a Date', unique_dates)
    #select date and filter for the players and teams
    data = data[data['GAME_DATE'] == pd.to_datetime(selected_date)]
    selected_players = st.sidebar.multiselect("Select Players", options=data['PLAYER_NAME'].unique())
    selected_teams = st.sidebar.multiselect("Select Teams", options=data['TEAM_NAME'].unique())  # Assuming this is used somewhere in your app
    game_location = st.sidebar.selectbox('Select Game Location', ['All', 'Home', 'Away'])

    # Parameters for betting options
    n_games = st.sidebar.slider('Number of Games', min_value=1, max_value=60, value=10)
    league_std_rate = st.sidebar.slider('League Standard Deviation Above Rate', min_value=0.0, max_value=1.0, value=0.9, step=0.01)
    probability_high = st.sidebar.slider('High Probability Threshold', min_value=0.0, max_value=1.0, value=0.9, step=0.01)
    probability_low = st.sidebar.slider('Low Probability Threshold', min_value=0.0, max_value=1.0, value=0.1, step=0.01)

    # Filter data for the selected date
    betting_today_data = data[data['GAME_DATE'] == pd.to_datetime(selected_date)]
    league_std_data = prepare_league_std_data(betting_today_data, n_games=n_games, game_location=game_location)

    if selected_players:
        evaluated_bets_list = []
        for player in selected_players:
            player_data = betting_today_data[betting_today_data['PLAYER_NAME'] == player]
            evaluated_bets_df = evaluate_bets_n_games_debug(player_data, player_data, n_games)  # Adjust this call as needed
            evaluated_bets_list.append(evaluated_bets_df)

        combined_evaluated_bets_df = pd.concat(evaluated_bets_list, ignore_index=True) if evaluated_bets_list else pd.DataFrame()
        
        if not combined_evaluated_bets_df.empty:
            st.dataframe(combined_evaluated_bets_df[['PLAYER_NAME', 'GAME_DATE', 'Stat', 'Threshold', 'Actual Value', 'Bet Correct']])
            total_bets = len(combined_evaluated_bets_df)
            correct_bets = combined_evaluated_bets_df['Bet Correct'].sum()
            correct_percentage = correct_bets / total_bets * 100 if total_bets > 0 else 0
            st.write(f"Total Bets: {total_bets}, Correct Bets: {correct_bets}, Correct Percentage: {correct_percentage:.2f}%")
        else:
            st.write("No betting options generated for the selected criteria.")



Overwriting app.py


In [82]:
%%writefile modular/betting_functions.py 
import pandas as pd
import numpy as np

#Things to consider:
#1. Calculate the probability of a player achieving a certain statistic in a game
#2. Calculate the probability of a player achieving a certain statistic against a specific team
#3. Calculate the probability of a player achieving a certain statistic in a game given the player's recent performance

#proposed changes:
#add in the opposing team as a parameter
#add in the number of games against the opposing team as a parameter
#add league std as a parameter
#add in level of competition as a warning if the opponent win rate is higher than the average win rate of this teams previous competition
#add in the number of games above projection and number of games as a parameter as a confidence rate

#***Take out the best options according to these parameters into a daily dashboard***
#record the results from this^ and see if the model is accurate


def calculate_probability(player_data, stat, projection, league_std_data, n_games=10, league_std_rate=0.9, opposing_team=None):
    # Filter out games without statistics (e.g., future games without stats yet)
    games_with_stats = player_data.dropna(subset=[stat])
    
    # Now, select the last 10 games from this filtered dataset
    last_10_games = games_with_stats.tail(n_games)
    #print(f"Last 10 games for {stat}:")
    #print(last_10_games[[stat]])
    
    # Ensure stat is used directly without alteration
    if stat not in games_with_stats.columns:
        raise KeyError(f"Statistic '{stat}' not found in player data columns.")

    # Diagnostic prints
    #print(f"Last 10 games for {stat}:")
    #print(last_10_games[[stat]])
    #print(f"Number of NaN values for {stat}: {last_10_games[stat].isna().sum()}")

    player_std = last_10_games[stat].fillna(0).std()
    #print(f"Player Std Dev for {stat}: {player_std}")

    # Initialize metrics here
    number_of_games_against_team = 0
    against_team_probability = 0
    number_of_games_above_projection_against_team = 0  # Initialize here


    if opposing_team:
        player_data.loc[:, 'OPPONENT_NAME'] = player_data['OPPONENT_NAME'].str.strip()
        
        #print(f"Opposing Team: {opposing_team}")
        #print("player_data['OPPONENT_NAME'] before=", player_data['OPPONENT_NAME'].unique())
        games_against_team = player_data[player_data['OPPONENT_NAME'] == opposing_team]
        #print("games_against_team after=", games_against_team['OPPONENT_NAME'].unique())
        #print(f"Games against {opposing_team}:")
        #print(games_against_team[[stat]])
        if not games_against_team.empty:
            games_above_projection_against_team = games_against_team[games_against_team[stat] >= projection]
            number_of_games_above_projection_against_team = len(games_above_projection_against_team)
            number_of_games_against_team = len(games_against_team)
            against_team_probability = len(games_above_projection_against_team) / number_of_games_against_team if number_of_games_against_team > 0 else 0
        #print(f"Games above projection against {opposing_team}: {number_of_games_above_projection_against_team} out of {number_of_games_against_team}")
    else:
        against_team_probability = None

    games_above_projection = last_10_games[last_10_games[stat] >= projection]
    number_of_games_above_projection = len(games_above_projection)
    number_of_games = len(last_10_games)
    probability = number_of_games_above_projection / number_of_games if number_of_games > 0 else 0

    #print(f"Games above projection: {number_of_games_above_projection} out of {number_of_games}")
    
    league_std = league_std_data[stat].iloc[0] if stat in league_std_data.columns else 0
    std_dev_comparison = player_std < league_std * league_std_rate

    #print(f"League Std Dev for {stat}: {league_std}")
    #print(f"Std Dev Comparison: {'Yes' if std_dev_comparison else 'No'}")

    return probability, against_team_probability, number_of_games_against_team, player_std, std_dev_comparison, league_std, number_of_games_above_projection, number_of_games



def calculate_bet_outcome(bet_amount, odds, probability):
    """
    Calculate expected profit or loss from a bet based on American odds.
    - For positive odds: the profit is bet_amount * (odds / 100) if win.
    - For negative odds: the profit is bet_amount / (abs(odds) / 100) if win.
    The loss is always the bet amount as you lose the stake if the bet does not win.
    """
    if odds > 0:
        # For positive odds, potential profit includes the bet amount
        potential_profit = bet_amount * (odds / 100)
    else:
        # For negative odds, potential profit is the bet amount since you need to bet more to win 100 units
        potential_profit = bet_amount / (abs(odds) / 100)

    # Expected profit considering the probability of winning
    expected_profit = potential_profit
    # No need to adjust for bet_amount for positive odds as it's considered in potential profit
    probability_weighted_to_profit = probability * potential_profit

    # Expected loss is straightforward; it's the bet amount since you lose the stake if the bet doesn't win
    expected_loss = bet_amount

    return expected_profit, expected_loss, probability_weighted_to_profit


def generate_betting_options(player_data, league_std_data, player_names, opposing_teams, all_players=True, n_games=10, league_std_rate=0.9, probability_high=0.9, probability_low=0.1):
    if not isinstance(player_names, list):
        player_names = [player_names]
    if opposing_teams is not None and not isinstance(opposing_teams, list):
        opposing_team = [opposing_teams]
    """
    Generate filtered betting options based on given criteria, now including game dates.
    """
    betting_categories = {
        'PTS': np.arange(9.5, 30.5, 1),
        'AST': np.arange(2.5, 12.5, 1),
        'REB': np.arange(2.5, 12.5, 1),
        'STL': np.arange(0.5, 5.5, 1),
        'BLK': np.arange(0.5, 5.5, 1),
        'FG3M': np.arange(0.5, 5.5, 1),
    }

    results = []
    
    if all_players:
        players = player_data['PLAYER_NAME'].unique()
    else:
        players = [player_name]

    for player in players:
        player_season_data = player_data[player_data['PLAYER_NAME'] == player].copy()
        game_dates = player_season_data['GAME_DATE'].unique()

        # Corrected handling of multiple opposing teams
        for game_date in game_dates:
            game_data = player_season_data[player_season_data['GAME_DATE'] < game_date]
            if not isinstance(opposing_team, list):
                opposing_teams_temp = [opposing_teams]  # Ensure opposing_teams is treated as a list
            else:
                opposing_teams_temp = opposing_teams

            for opposing_team in opposing_teams_temp:
                for stat, thresholds in betting_categories.items():
                    for threshold in thresholds:
                        # Calculate probability and other metrics for the specific game
                        probability, against_team_probability, number_of_games_against_team, player_std, std_dev_comparison, league_std, number_of_games_above_projection, number_of_games = calculate_probability(
                            game_data, stat, threshold, league_std_data, n_games, league_std_rate, opposing_teams)

                        if (probability > probability_high or probability < probability_low) and (player_std <= league_std * league_std_rate):
                            prob_comparison = 'Higher' if probability > probability_high else 'Lower' if probability < probability_low else 'Uncertain'
                            std_dev_comparison = 'Better than league std by at least 10%' if player_std <= league_std * league_std_rate else 'Not better than league std by at least 10%'
                            recommendation = 'Bet' if (prob_comparison == 'Higher' or prob_comparison == 'Lower') and player_std <= league_std * league_std_rate else 'Avoid'

                            result = {
                                'PLAYER_NAME': player,
                                'Stat': stat,
                                'Threshold': threshold,
                                'Probability': probability,
                                'Std Dev Comparison': std_dev_comparison,
                                'Probability comparison': prob_comparison,
                                'Recommendation based on Prob and std_dev': recommendation,
                                'Against Team Probability': against_team_probability if against_team_probability is not None else 'N/A',
                                'Games Against Team': number_of_games_against_team if number_of_games_against_team > 0 else 'N/A',
                                'GAME_DATE': game_date  # Include the game date in the results
                            }
                            results.append(result)

    results = [result for result in results if result['Recommendation based on Prob and std_dev'] == 'Bet']
                
    return pd.DataFrame(results)



def evaluate_bets(generated_bets, actual_performance):
    # Reset index to ensure it's sequential starting from 0
    generated_bets.reset_index(drop=True, inplace=True)

    # Iterate through generated bets using .iterrows() for safer access
    for index, row in generated_bets.iterrows():
        stat_column = row['Stat']
        actual_stat_row = actual_performance[
            (actual_performance['PLAYER_NAME'] == row['PLAYER_NAME']) & 
            (actual_performance['GAME_DATE'] == row['GAME_DATE'])
        ]
        
        if not actual_stat_row.empty:
            actual_value = actual_stat_row.iloc[0][stat_column]
            generated_bets.at[index, 'Actual Value'] = actual_value  # Assign actual value to the DataFrame
            
            if pd.notnull(actual_value):
                bet_correct = ((row['Probability comparison'] == 'Higher' and actual_value > row['Threshold']) or
                               (row['Probability comparison'] == 'Lower' and actual_value < row['Threshold']))
                generated_bets.at[index, 'Bet Outcome'] = bet_correct
            else:
                generated_bets.at[index, 'Bet Outcome'] = np.nan  # Mark as NaN if actual value is NaN
        else:
            # Mark as NaN if there's no matching performance data
            generated_bets.at[index, 'Actual Value'] = np.nan
            generated_bets.at[index, 'Bet Outcome'] = np.nan

    # Initialize running totals outside the loop to avoid resetting them on each iteration
    running_correct = 0
    running_incorrect = 0

    # After determining bet outcomes, calculate running totals
    for index, row in generated_bets.iterrows():
        if pd.notnull(row['Bet Outcome']):
            running_correct += int(row['Bet Outcome'] == True)
            running_incorrect += int(row['Bet Outcome'] == False)

            generated_bets.at[index, 'Running Correct'] = running_correct
            generated_bets.at[index, 'Running Incorrect'] = running_incorrect
        else:
            generated_bets.at[index, 'Running Correct'] = running_correct
            generated_bets.at[index, 'Running Incorrect'] = running_incorrect

    return generated_bets

    
def evaluate_bets_n_games_debug(generated_bets, actual_performance, n_games=10):
    # Ensure the datetime format is correct
    actual_performance['GAME_DATE'] = pd.to_datetime(actual_performance['GAME_DATE'])
    actual_performance.sort_values(by='GAME_DATE', inplace=True)
    
    evaluated_bets_list = []  # To store evaluated bets for debugging

    for player in generated_bets['PLAYER_NAME'].unique():
        print(f"\nEvaluating bets for: {player}")  # Debug: Confirm the player
        
        player_bets = generated_bets[generated_bets['PLAYER_NAME'] == player]
        player_performance = actual_performance[actual_performance['PLAYER_NAME'] == player]
        
        player_performance.sort_values(by='GAME_DATE', inplace=True)
        if len(player_performance) < n_games:
            print(f"Warning: {player} has only {len(player_performance)} games available, less than {n_games} games specified.")
        
        min_date_for_n_games = player_performance['GAME_DATE'].unique()[-n_games]
        print(f"Minimum date for the last {n_games} games: {min_date_for_n_games}")  # Debug: Check min date

        # Filter for bets and performances within the last n games
        player_bets_filtered = player_bets[player_bets['GAME_DATE'] >= min_date_for_n_games]
        player_performance_filtered = player_performance[player_performance['GAME_DATE'] >= min_date_for_n_games]

        correct_bets = 0
        incorrect_bets = 0

        for index, bet in player_bets_filtered.iterrows():
            game_date = bet['GAME_DATE']
            actual = player_performance_filtered[player_performance_filtered['GAME_DATE'] == game_date]

            if not actual.empty:
                actual_value = actual.iloc[0][bet['Stat']]
                bet_correct = ((bet['Probability comparison'] == 'Higher' and actual_value > bet['Threshold']) or
                               (bet['Probability comparison'] == 'Lower' and actual_value < bet['Threshold']))
                
                if bet_correct:
                    correct_bets += 1
                else:
                    incorrect_bets += 1
                
                evaluated_bets_list.append({**bet, 'Actual Value': actual_value, 'Bet Correct': bet_correct})
            else:
                print(f"No actual performance data for {player} on {game_date}")

        print(f"{player} - Correct Bets: {correct_bets}, Incorrect Bets: {incorrect_bets}")  # Debug: Check bet outcomes

    # Compile the debug results into a DataFrame
    evaluated_bets_df_debug = pd.DataFrame(evaluated_bets_list)
    
    if not evaluated_bets_df_debug.empty:
        final_corrects = sum(evaluated_bets_df_debug['Bet Correct'])
        overall_bets = len(evaluated_bets_df_debug)
        correct_percentage = final_corrects / overall_bets * 100
        print(f"\nFinal Correct Percentage: {correct_percentage}% ({final_corrects}/{overall_bets})")
    else:
        print("No bets evaluated. Check if the date range or player selection might be too restrictive.")

    return evaluated_bets_df_debug




Overwriting modular/betting_functions.py


In [154]:
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from modular.player_game_logs import load_nba_player_game_logs, prepare_upcoming_games_data#, process_final_data
from modular.metrics_functions import prepare_mean_std_data, prepare_league_std_data, prepare_performance_against_all_teams
from modular.betting_functions import calculate_probability, calculate_bet_outcome, generate_betting_options


def process_final_data(selected_season=['2023-24'], min_avg_minutes=35, 
                       prev_games_save_path='data/player_game_logs_winr.csv', upcoming_games_save_path='data/23_24_season_games.csv',
                       output_path='data/combined_data.csv', load_new_data=True):
    
    if load_new_data:
        # Attempt to load previous games data
        try:
            previous_games = load_nba_player_game_logs(selected_season, min_avg_minutes, prev_games_save_path)
            print(f"Loaded previous games data: {previous_games.shape}")
            if previous_games is None: #or previous_games.empty
                raise ValueError("Failed to load data from player game logs or no data available.")
        except Exception as e:
            print(f"Error loading previous games data: {e}")
            return pd.DataFrame()

        # Attempt to load upcoming games data
        try:
            upcoming_games = prepare_upcoming_games_data(upcoming_games_save_path, prev_games_save_path, expand_with_players=True)
            print(f"Loaded upcoming games data: {upcoming_games.shape}")
            if upcoming_games is None: # or upcoming_games.empty
                raise ValueError("Failed to load upcoming games data or no data available.")
        except Exception as e:
            print(f"Error preparing upcoming games data: {e}")
            return pd.DataFrame()
    else:
        def load_data():
            data = pd.read_csv('data/player_game_logs_winr.csv')
            data['GAME_DATE'] = pd.to_datetime(data['GAME_DATE'])
            data.sort_values(by='GAME_DATE', inplace=True)
            return data

        previous_games = load_data()

        #pull in upcoming games to concatenate to data and input averages onto it
        upcoming_games = prepare_upcoming_games_data('data/23_24_season_games.csv', 'data/player_game_logs_winr.csv', expand_with_players=True)

        # Load the existing games data
        previous_games = load_data()
        print(previous_games.head())

    previous_games['GAME_DATE'] = pd.to_datetime(previous_games['GAME_DATE'])
    previous_games.sort_values(by='GAME_DATE', inplace=True)

    # Ensure GAME_DATE is in datetime format for comparison
    upcoming_games['GAME_DATE'] = pd.to_datetime(upcoming_games['GAME_DATE'])

    # Filter out upcoming games that have dates already in previous games
    unique_upcoming_games = upcoming_games[~upcoming_games['GAME_DATE'].isin(previous_games['GAME_DATE'])]

    # Concatenate the unique upcoming games to the previous games dataset
    data = pd.concat([previous_games, unique_upcoming_games], ignore_index=True)

    # Sort the concatenated data by GAME_DATE to maintain chronological order
    data.sort_values(by='GAME_DATE', inplace=True)

    # Reset the index of the concatenated DataFrame
    data.reset_index(drop=True, inplace=True)

    # Save the processed data to CSV files
    data.to_csv(output_path, index=False)

    return data

# try the other functions
test_nba_player_log_pull = load_nba_player_game_logs('2023-24', 35,'data/player_game_logs_winr.csv')
print("test_nba_player_log_pull=", test_nba_player_log_pull.head())

#process data has these parameters: selected_season='2023-24', min_avg_minutes=15.0, 
#prev_games_save_path='data/player_game_logs_winr.csv', upcoming_games_save_path='data/23_24_season_games.csv', output_path='data/combined_data.csv'
processed_data = process_final_data(selected_season='2023-24', min_avg_minutes=35, prev_games_save_path='data/player_game_logs_winr.csv', 
                                    upcoming_games_save_path='data/23_24_season_games.csv', output_path='data/combined_data.csv', load_new_data=False)

# print(processed_data.head())
print("new process details: min max dates, length, columns", processed_data['GAME_DATE'].min(), processed_data['GAME_DATE'].max(), len(processed_data), processed_data.columns)




Processing season 2023-24...


  new_players_data['GAME_DATE'] = pd.to_datetime(new_players_data['GAME_DATE'])


Player game logs saved to data/player_game_logs_winr.csv


AttributeError: 'NoneType' object has no attribute 'head'

In [144]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta

import numpy as np
import pandas as pd
from datetime import datetime, timedelta

def calculate_probability(player_data, stat, projection, league_std_data, n_games=10, league_std_rate=0.9, opposing_teams=None):
    results = []
    # Ensure opposing_teams is a list for consistent processing
    opposing_teams_list = [opposing_teams] if isinstance(opposing_teams, str) else opposing_teams or ['All']
    
    for player_name in player_data['PLAYER_NAME'].unique():
        player_specific_data = player_data[player_data['PLAYER_NAME'] == player_name]

        # Iterate through the list of opposing teams or default to 'All'
        for opposing_team in opposing_teams_list:
            if opposing_team != 'All':
                # Filter games against the specific opposing team
                team_specific_data = player_specific_data[player_specific_data['OPPONENT_NAME'] == opposing_team]
            else:
                # Use all data if 'All' teams are considered
                team_specific_data = player_specific_data
            
            # Ensure there's enough data to calculate the probability
            if len(team_specific_data) < n_games:
                print(f"Not enough games for player: {player_name} against team: {opposing_team}")
                continue

            # Calculate statistics for the last n games
            recent_games = team_specific_data.sort_values('GAME_DATE', ascending=False).head(n_games)
            games_with_stat = recent_games.dropna(subset=[stat])
            num_games_with_stat_above_projection = games_with_stat[games_with_stat[stat] >= projection].shape[0]
            
            # Handle cases with no stats or all NaN values
            if games_with_stat.empty or games_with_stat[stat].std() is np.nan:
                player_std = 0
            else:
                player_std = games_with_stat[stat].std()
            
            league_std = league_std_data.get(stat, np.nan)
            std_dev_comparison = player_std < (league_std * league_std_rate)
            
            probability = num_games_with_stat_above_projection / n_games if n_games > 0 else 0
            
            # Append results for each player against each team (or 'All')
            results.append({
                'player_name': player_name,
                'opposing_team': opposing_team,
                'stat': stat,
                'projection': projection,
                'probability': probability,
                'std_dev_comparison': std_dev_comparison,
                'player_std': player_std,
                'league_std': league_std,
                'number_of_games': n_games,
                'number_of_games_above_projection': num_games_with_stat_above_projection
            })

    return pd.DataFrame(results)





def generate_betting_options(player_data, league_std_data, player_name, opposing_team, all_players=False, n_games=10, league_std_rate=0.9, probability_high=0.9, probability_low=0.1):
    """
    Generate filtered betting options based on given criteria, now including game dates.
    """
    betting_categories = {
        'PTS': np.arange(9.5, 30.5, 1),
        'AST': np.arange(2.5, 12.5, 1),
        'REB': np.arange(2.5, 12.5, 1),
        'STL': np.arange(0.5, 5.5, 1),
        'BLK': np.arange(0.5, 5.5, 1),
        'FG3M': np.arange(0.5, 5.5, 1),
    }

    results = []
    
    if all_players:
        players = player_data['PLAYER_NAME'].unique()
    else:
        players = [player_name]

    for player in players:
        # Filter player data for the specific player
        player_season_data = player_data[player_data['PLAYER_NAME'] == player].copy()

        # Get unique game dates for the player
        game_dates = player_season_data['GAME_DATE'].unique()
        #print(f"Unique game dates for player: {game_dates}")

        for game_date in game_dates:
            # Filter data for the specific game date
            #print(f"Processing game date: {game_date}")
            game_data = player_season_data[player_season_data['GAME_DATE'] < game_date]
            #print(f"Filtered game data for calculations: {game_data.shape[0]} rows")

            for stat, thresholds in betting_categories.items():
                for threshold in thresholds:
                    # Calculate probability and other metrics for the specific game
                    probability, against_team_probability, number_of_games_against_team, player_std, std_dev_comparison, league_std, number_of_games_above_projection, number_of_games = calculate_probability(
                        game_data, stat, threshold, league_std_data, n_games, league_std_rate, opposing_team)

                    if (probability > probability_high or probability < probability_low) and (player_std <= league_std * league_std_rate):
                        prob_comparison = 'Higher' if probability > probability_high else 'Lower' if probability < probability_low else 'Uncertain'
                        std_dev_comparison = 'Better than league std by at least 10%' if player_std <= league_std * league_std_rate else 'Not better than league std by at least 10%'
                        recommendation = 'Bet' if (prob_comparison == 'Higher' or prob_comparison == 'Lower') and player_std <= league_std * league_std_rate else 'Avoid'

                        result = {
                            'PLAYER_NAME': player,
                            'Stat': stat,
                            'Threshold': threshold,
                            'Probability': probability,
                            'Std Dev Comparison': std_dev_comparison,
                            'Probability comparison': prob_comparison,
                            'Recommendation based on Prob and std_dev': recommendation,
                            'Against Team Probability': against_team_probability if against_team_probability is not None else 'N/A',
                            'Games Against Team': number_of_games_against_team if number_of_games_against_team > 0 else 'N/A',
                            'GAME_DATE': game_date  # Include the game date in the results
                        }
                        results.append(result)

    results = [result for result in results if result['Recommendation based on Prob and std_dev'] == 'Bet']
                
    return pd.DataFrame(results)


import pandas as pd
import numpy as np

#old
def evaluate_bets_n_games_debug(generated_bets, actual_performance, n_games=10):
    # Ensure the datetime format is correct
    actual_performance['GAME_DATE'] = pd.to_datetime(actual_performance['GAME_DATE'])
    actual_performance.sort_values(by='GAME_DATE', inplace=True)
    
    evaluated_bets_list = []  # To store evaluated bets for debugging

    for player in generated_bets['PLAYER_NAME'].unique():
        print(f"\nEvaluating bets for: {player}")  # Debug: Confirm the player
        
        player_bets = generated_bets[generated_bets['PLAYER_NAME'] == player]
        player_performance = actual_performance[actual_performance['PLAYER_NAME'] == player]
        
        player_performance.sort_values(by='GAME_DATE', inplace=True)
        if len(player_performance) < n_games:
            print(f"Warning: {player} has only {len(player_performance)} games available, less than {n_games} games specified.")
        
        min_date_for_n_games = player_performance['GAME_DATE'].unique()[-n_games]
        print(f"Minimum date for the last {n_games} games: {min_date_for_n_games}")  # Debug: Check min date

        # Filter for bets and performances within the last n games
        player_bets_filtered = player_bets[player_bets['GAME_DATE'] >= min_date_for_n_games]
        player_performance_filtered = player_performance[player_performance['GAME_DATE'] >= min_date_for_n_games]

        correct_bets = 0
        incorrect_bets = 0

        for index, bet in player_bets_filtered.iterrows():
            game_date = bet['GAME_DATE']
            actual = player_performance_filtered[player_performance_filtered['GAME_DATE'] == game_date]

            if not actual.empty:
                actual_value = actual.iloc[0][bet['Stat']]
                bet_correct = ((bet['Probability comparison'] == 'Higher' and actual_value > bet['Threshold']) or
                               (bet['Probability comparison'] == 'Lower' and actual_value < bet['Threshold']))
                
                if bet_correct:
                    correct_bets += 1
                else:
                    incorrect_bets += 1
                
                evaluated_bets_list.append({**bet, 'Actual Value': actual_value, 'Bet Correct': bet_correct})
            else:
                print(f"No actual performance data for {player} on {game_date}")

        print(f"{player} - Correct Bets: {correct_bets}, Incorrect Bets: {incorrect_bets}")  # Debug: Check bet outcomes

    # Compile the debug results into a DataFrame
    evaluated_bets_df_debug = pd.DataFrame(evaluated_bets_list)
    
    if not evaluated_bets_df_debug.empty:
        final_corrects = sum(evaluated_bets_df_debug['Bet Correct'])
        overall_bets = len(evaluated_bets_df_debug)
        correct_percentage = final_corrects / overall_bets * 100
        print(f"\nFinal Correct Percentage: {correct_percentage}% ({final_corrects}/{overall_bets})")
    else:
        print("No bets evaluated. Check if the date range or player selection might be too restrictive.")

    return evaluated_bets_df_debug

#new
def evaluate_bets_n_games_debug_new(generated_bets, actual_performance, n_games=10):
    actual_performance['GAME_DATE'] = pd.to_datetime(actual_performance['GAME_DATE'])
    actual_performance.sort_values(by='GAME_DATE', inplace=True)

    # Initialize additional columns in generated_bets for evaluation metrics
    generated_bets['Correct Bets'] = 0
    generated_bets['Incorrect Bets'] = 0
    generated_bets['Total Bets Evaluated'] = 0
    generated_bets['Correct Percentage'] = 0.0

    for player in generated_bets['PLAYER_NAME'].unique():
        player_generated_bets = generated_bets[generated_bets['PLAYER_NAME'] == player].copy()
        player_actual_performance = actual_performance[actual_performance['PLAYER_NAME'] == player]

        correct_bets = 0
        incorrect_bets = 0

        for index, bet in player_generated_bets.iterrows():
            # Filtering actual performances based on the date of the bet
            relevant_actual_performances = player_actual_performance[player_actual_performance['GAME_DATE'] <= bet['GAME_DATE']].sort_values(by='GAME_DATE', ascending=False).head(n_games)
            if relevant_actual_performances.empty:
                continue  # Skip if no relevant performances found

            # Check if actual value on the game date meets the bet condition
            actual_value = relevant_actual_performances[relevant_actual_performances['GAME_DATE'] == bet['GAME_DATE']][bet['Stat']].values
            if actual_value.size > 0:
                actual_value = actual_value[0]
                bet_correct = ((bet['Probability comparison'] == 'Higher' and actual_value > bet['Threshold']) or
                               (bet['Probability comparison'] == 'Lower' and actual_value < bet['Threshold']))
                
                if bet_correct:
                    correct_bets += 1
                else:
                    incorrect_bets += 1
            else:
                print(f"No actual performance data for {player} on {bet['GAME_DATE']} for stat {bet['Stat']}")

        total_bets_evaluated = correct_bets + incorrect_bets
        correct_percentage = (correct_bets / total_bets_evaluated * 100) if total_bets_evaluated > 0 else 0

        # Update the generated_bets DataFrame with evaluation results
        generated_bets.loc[generated_bets['PLAYER_NAME'] == player, 'Correct Bets'] = correct_bets
        generated_bets.loc[generated_bets['PLAYER_NAME'] == player, 'Incorrect Bets'] = incorrect_bets
        generated_bets.loc[generated_bets['PLAYER_NAME'] == player, 'Total Bets Evaluated'] = total_bets_evaluated
        generated_bets.loc[generated_bets['PLAYER_NAME'] == player, 'Correct Percentage'] = correct_percentage

        print(f"{player} - Correct Bets: {correct_bets}, Incorrect Bets: {incorrect_bets}, Total Bets Evaluated: {total_bets_evaluated}, Correct Percentage: {correct_percentage:.2f}%")

    return generated_bets




In [145]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from modular.player_game_logs import load_nba_player_game_logs, prepare_upcoming_games_data
from modular.metrics_functions import prepare_mean_std_data, prepare_league_std_data, prepare_performance_against_all_teams
from modular.betting_functions import calculate_bet_outcome, generate_betting_options, evaluate_bets, calculate_probability

# Fixed values for player and date for debugging
selected_player = ["Myles Turner", "Tyrese Haliburton"]
selected_date = "2024-03-12"
end_date = "2024-03-09"
opposing_team = "Oklahoma City Thunder"  # Example opposing team for testing
stat = "PTS"
projection = 20.5  # Example projection for testing

#sort data by game date ascending
data = processed_data.sort_values(by='GAME_DATE', ascending=True)

# Simulate selecting data for a specific player and date
# Instead of using == for comparison, use isin() for list of names
player_data_original = data[data['PLAYER_NAME'].isin(selected_player)]
player_data = player_data_original.copy()


#Testing the generate_betting_options filter-------------------------------------------------------------------------------------

# Ensure GAME_DATE is in datetime format
player_data['GAME_DATE'] = pd.to_datetime(player_data['GAME_DATE'])
player_data.sort_values(by='GAME_DATE', inplace=True)

# Filter data to include only games up to 'selected_date' for each player
player_data = player_data[player_data['GAME_DATE'] <= pd.to_datetime(selected_date)]


# Filter for selected date directly
selected_date_dt = pd.to_datetime(selected_date)

# Get today's date as a datetime object at midnight
today = datetime.now()
today = pd.to_datetime(today)
yesterday = today - timedelta(days=1)
yesterday = pd.to_datetime(yesterday)

# Filter for selected date
# If the selected date is today or in the future, filter data up to yesterday
if selected_date_dt >= today:
    print("Selected date is today or in the future.")
    player_data_filt = player_data[player_data['GAME_DATE'] <= yesterday]
# If the selected date is before today, filter for the selected day
else:
    print("Selected date is before today.")
    player_data_filt = player_data[player_data['GAME_DATE'] <= selected_date_dt]

# print("Filtered player data for selected date:", player_data_filt)

#Testing the generate_betting_options filter-------------------------------------------------------------------------------------

# Generate betting options for the selected player (assuming league_std_data is predefined or mocked similarly)
league_std_data = pd.DataFrame({
    'PTS': [10.5],
    'AST': [2.5],
    'REB': [3.5],
    'STL': [1.5],
    'BLK': [0.8],
    'FG3M': [2.0]
})

n_games = 11
league_std_rate = 0.9
probability_high = 0.9
probability_low = 0.1


# Assuming all necessary imports and functions are defined as per your script.

# Ensure the correct datetime format and sort order
print("Step 1: Data Preparation Completed")

number_of_players = len(player_data_filt['PLAYER_NAME'].unique())
# Step 2: Generate Betting Options for all historical data
betting_options_df = generate_betting_options(player_data_filt, league_std_data, selected_player, opposing_team, all_players=True, n_games=n_games, league_std_rate=league_std_rate, probability_high=probability_high, probability_low=probability_low)
print(f"Step 2: Generated {len(betting_options_df)} betting options for all historical data for {number_of_players} players:")
#print("betting options df before evaluated =", betting_options_df[['PLAYER_NAME', 'Stat', 'Threshold', 'Probability comparison', 'GAME_DATE']])

# Step 3: Filter Betting Options for the selected date
selected_date_dt = pd.to_datetime(selected_date)
print("Selected date (datetime):", selected_date_dt)
# print("betting options df game date=", betting_options_df.head())
if not betting_options_df.empty:
    betting_options_df_selected_date = betting_options_df[betting_options_df['GAME_DATE'] == selected_date_dt]
    print(betting_options_df_selected_date['GAME_DATE'].min())
    print(betting_options_df_selected_date['GAME_DATE'].max())
    number_of_players = len(betting_options_df_selected_date['PLAYER_NAME'].unique())
    print(f"Step 3: Filtered {len(betting_options_df_selected_date)} betting options for the selected date ({selected_date}) for {number_of_players} players:")
    #print(betting_options_df_selected_date[['PLAYER_NAME', 'Stat', 'Threshold', 'GAME_DATE']])

    #evaluated_bets_df = evaluate_bets_n_games_debug(betting_options_df, player_data_filt, n_games)
    evaluated_bets_df = evaluate_bets_n_games_debug(betting_options_df, player_data_filt, n_games=10)
    #print(evaluated_bets_df.columns)
    print(evaluated_bets_df[['PLAYER_NAME', 'Stat', 'Threshold', 'Probability comparison', 'GAME_DATE', 'Correct Bets',
                              'Incorrect Bets', 'Total Bets Evaluated','Correct Percentage' ]])
    print("Step 4: Evaluated bets based on the last n games.")

    #print if incorrect bets are > 0
    print('unique incorrect bets', evaluated_bets_df['Incorrect Bets'].unique())




KeyError: 'GAME_DATE'