Inputs: Available players from a yahoo Context Export

Outputs: Processed Data for evaluation


In [1]:
import pandas as pd
import json

from datetime import datetime
import pytz

import numpy as np

from basketball_reference_scraper.teams import get_roster, get_team_stats, get_opp_stats, get_roster_stats, get_team_misc, get_team_ratings
from basketball_reference_scraper.players import get_stats, get_game_logs, get_player_headshot

# https://github.com/swar/nba_api
from nba_api.stats.endpoints import playergamelog
from nba_api.stats.static import players

import requests

from dotenv import load_dotenv
import os
from pathlib import Path

from yahoo_oauth import OAuth2
import yahoo_fantasy_api as yfa  # yahoo-fantasy-api.readthedocs.io/en/latest/yahoo_fantasy_api.html

# Load the .env file from the current directory
load_dotenv(dotenv_path = Path("../src/.env"))

True

In [2]:
today = datetime.utcnow().date()

  today = datetime.utcnow().date()


In [5]:
# Get the absolute path to the 'src' directory
src_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'src'))
oauth = OAuth2(None, None, from_file='../src/oauth2.json')

gm = yfa.Game(oauth, 'nba')
lg = yfa.League(oauth, '454.l.222542')


[2024-10-30 15:16:21,181 DEBUG] [yahoo_oauth.oauth.__init__] Checking 
[2024-10-30 15:16:21,183 DEBUG] [yahoo_oauth.oauth.token_is_valid] ELAPSED TIME : 4972.878912687302
[2024-10-30 15:16:21,185 DEBUG] [yahoo_oauth.oauth.token_is_valid] TOKEN HAS EXPIRED
[2024-10-30 15:16:21,186 DEBUG] [yahoo_oauth.oauth.refresh_access_token] REFRESHING TOKEN


## Preprocessing

In [None]:
# Open the JSON file and load its content
with open("../data/team_mapper.json", 'r') as file:
    team_mapper  = json.load(file)

yahoo_to_bball = {v['Yahoo']: v['BBall Reference'] for k, v in team_mapper.items()}
bball_to_yahoo_teams = {v['BBall Reference'] : v['Yahoo'] for k, v in team_mapper.items()}

#team names to Yahoo codes
team_to_yahoo_mapper = {team: details["Yahoo"] for team, details in team_mapper.items()}


In [None]:
# Import Data
available_players = pd.read_csv('../data/Yahoo_DF_player_export.csv') # Update according to contest

available_players['date'] = datetime.now().date()
available_players['parsed_id'] = available_players['ID'].str.extract(r'nba\.p\.(\d+)').astype(int)

# Add Home_Away column using a lambda function
available_players['Home_Game'] = available_players.apply(lambda x: '0' if f"{x['Team']}@{x['Opponent']}" == x['Game'] else '1', axis=1)

available_players.head()

In [None]:
# Yahoo
yahoo_teams_playing_today = available_players['Team'].unique()

bball_reference_teams = np.array([yahoo_to_bball.get(team, team) for team in yahoo_teams_playing_today])


In [None]:
season_stats = pd.DataFrame(lg.player_stats(list(available_players['parsed_id']), 'average_season',  season=2024))

season_stats.replace('-', np.nan, inplace=True)
season_stats.set_index('name', inplace = True)

## Team Stats

In [None]:
team_ratings = get_team_ratings(2025, team=bball_reference_teams )
for i, row in team_ratings.iterrows():
    team = bball_to_yahoo_teams[row.TEAM]
    
    available_players.loc[available_players['Team'] == team, 'ORTG'] = row.ORTG
    available_players.loc[available_players['Team'] == team, 'DRTG'] = row.DRTG

    available_players.loc[available_players['Opponent'] == team, "Opponent_ORTG"] = row.ORTG  
    available_players.loc[available_players['Opponent'] == team, "Opponent_DRTG"] = row.DRTG 

# Team Ranking for pace
for team in bball_reference_teams:
    try:
        team_rank_sr = get_team_misc(team, 2025, 'RANK')

        # Add the Series attributes to each row where 'Team'
        for key, value in team_rank_sr.items():
            available_players.loc[available_players['Team'] == team, key] = value
            available_players.loc[available_players['Opponent'] == bball_to_yahoo_teams[team], "Opponent_"+key] = value            

    except:
        print(f"Error with {team}")  

## Get Game Logs and actual fan points

In [None]:
all_players = players.get_players()

# Create a dictionary mapping player_id to player_name
player_dict = {player['full_name'] : player['id']for player in all_players}

In [None]:
import logging 

error_players = []
scoring_criteria = { 'PTS':1, 'REB': 1.2, 'AST':1.5, 'STL':3, 'BLK':3, 'TOV':-1}
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')

for player_name in season_stats.index:

    try: 
        player_id = player_dict[player_name]

        game_log = playergamelog.PlayerGameLog(player_id, season='2024-25')

        temp_df =  game_log.get_data_frames()[0]

        temp_df['actual_FP'] = [sum(row[col] * scoring_criteria[col] for col in scoring_criteria ) for _, row in temp_df.iterrows() ]

        #print(len(temp_df['actual_FP']))
        
        # last 3 games
        FPS1 = temp_df['actual_FP'][0]
        FPS2 = temp_df['actual_FP'][1]
        FPS3 = temp_df['actual_FP'][2]


        season_stats.loc[player_name, 'FPS1'] = FPS1
        season_stats.loc[player_name, 'FPS2'] = FPS2
        season_stats.loc[player_name, 'FPS3'] = FPS3

    
    except Exception as e:
        # take a whil to run
        #logging.error(f"Error processing player {player_name}: {e}")
        error_players.append(player_name)




## Vegas Odds

In [None]:
# Parameters:

API_KEY = os.getenv("odds_API_KEY")

SPORT = "upcoming" # use the sport_key from the /sports endpoint below, or use 'upcoming' to see the next 8 games across all sports

REGIONS = "us" # uk | us | eu | au. Multiple can be specified if comma delimited

MARKETS = "h2h,spreads" # h2h | spreads | totals. Multiple can be specified if comma delimited

ODDS_FORMAT = "decimal" # decimal | american

DATE_FORMAT = "iso" # iso | unix

COMMENCE_TIME = f"{str(today)}T00:00:00Z"


In [None]:
# Get today's odds
# https://the-odds-api.com/liveapi/guides/v4/
odds_response = requests.get(
    f'https://api.the-odds-api.com/v4/sports/basketball_nba/odds',
    params={
        'api_key': API_KEY,
        'regions': REGIONS,
        'markets': MARKETS,
        'oddsFormat': ODDS_FORMAT,
        'dateFormat': DATE_FORMAT,
        'commenceTimeFrom': COMMENCE_TIME
    }
)

json_odds = odds_response.json()

In [None]:
filtered_data = [
    game for game in json_odds
    if datetime.strptime(game['commence_time'], '%Y-%m-%dT%H:%M:%SZ')
    .replace(tzinfo=pytz.utc)
    .astimezone(pytz.timezone('US/Central')).date() == today
]

In [None]:
# Export for historical
with open(f"../data/odds_history/nba_vegas_odds_{today}.json", 'w') as output_file:
    json.dump(filtered_data, output_file, indent=4)

In [None]:
# Parse odds into a DataFrame
odds_list = []

for game in filtered_data:
    home_team = game['home_team']
    away_team = game['away_team']
    for bookmaker in game['bookmakers']:
        for market in bookmaker['markets']:
            if market['key'] == 'h2h':
                for outcome in market['outcomes']:
                    odds_list.append({
                        'Team': outcome['name'],
                        'Odds': outcome['price']
                    })
odds_df = pd.DataFrame(odds_list)

# Group by 'Team' and calculate the average odds for each team
grouped_odds = odds_df.groupby('Team').agg({'Odds': 'mean'}).reset_index()

grouped_odds['Team_Abbreviation'] = grouped_odds['Team'].replace(team_to_yahoo_mapper)

In [None]:
# for today's date
print(f"Odds for {today}")
grouped_odds

## Combine Dfs

In [None]:
merged_df = pd.merge(available_players, season_stats, left_on='parsed_id', right_on='player_id', how='inner')

Additional Metrics: Weighted FPS, Home/Away Modifier, Opponent Modifier, Team Modifier, Game Pace

In [None]:
# Calculate weighted average of recent FPS (Fantasy Points Scored)
merged_df['weighted_FPS'] = ( 0.4 * merged_df['FPPG'] + 0.3 * merged_df['FPS1'] + 0.2 * merged_df['FPS2'] + 0.1 * merged_df['FPS3'])

# Calculate Home/Away Modifier
# If home, increase value slightly (e.g., by 5%), otherwise decrease
merged_df['Home_Modifier'] = merged_df['Home_Game'].apply(lambda x: 1.05 if x == 1 else 0.95)

# Calculate Opponent Difficulty Modifier
# Use opponent defensive rating (DRtg): Higher DRtg means worse defense, good for our player
merged_df['Opponent_Modifier'] = merged_df['Opponent_DRTG'].apply(lambda x: 1.0 + (100 - x) / 100 if x < 100 else 1.0)

# Calculate Team Offensive Rating Modifier
# Higher offensive rating is better
merged_df['Team_Modifier'] = merged_df['ORTG'].apply(lambda x: 1.0 + (x - 100) / 100 if x > 100 else 1.0)

# Calculate Pace Modifier
# Use both team pace and opponent pace: More possessions mean more opportunities for stats
merged_df['Pace_Modifier'] = (merged_df['Pace'] + merged_df['Opponent_Pace']) / 200


## Merge against odds

In [None]:
merged_df = merged_df.merge(grouped_odds[['Team_Abbreviation', 'Odds']], left_on='Team', right_on='Team_Abbreviation', how='left')
merged_df.rename(columns={'Odds': 'Team_Odds'}, inplace=True)
merged_df.drop(columns='Team_Abbreviation', inplace=True)

merged_df = merged_df.merge(grouped_odds[['Team_Abbreviation', 'Odds']], left_on='Opponent', right_on='Team_Abbreviation', how='left')
merged_df.rename(columns={'Odds': 'Opponent_Odds'}, inplace=True)
merged_df.drop(columns='Team_Abbreviation', inplace=True)

In [None]:
# Incorporate team and opponent odds to calculate the Value_Score
merged_df['Adjusted_Team_Odds'] = 1 / merged_df['Team_Odds']
merged_df['Adjusted_Opponent_Odds'] = 1 / merged_df['Opponent_Odds']

# Normalize Adjusted Odds to avoid large disparities
merged_df['Normalized_Team_Odds'] = merged_df['Adjusted_Team_Odds'] / merged_df['Adjusted_Team_Odds'].max()
merged_df['Normalized_Opponent_Odds'] = merged_df['Adjusted_Opponent_Odds'] / merged_df['Adjusted_Opponent_Odds'].max()


In [None]:
# Calculate the new Value_Score including the odds
merged_df['Value_Score'] = (
    merged_df['weighted_FPS'] *
    merged_df['Home_Modifier'] *
    merged_df['Opponent_Modifier'] *
    merged_df['Team_Modifier'] *
    #merged_df['Pace_Modifier'] *
    merged_df['Normalized_Team_Odds'] *
    merged_df['Normalized_Opponent_Odds']
) / merged_df['Salary']

# Drop any where value score is NaN.
#merged_df.dropna(subset=['Value_Score'], inplace = True)

In [None]:
merged_df

## Final Output

In [None]:
merged_df.to_csv(f"../outputs/lineup_{datetime.now().date()}.csv")

In [None]:
merged_df.head()