In [129]:
import pandas as pd
import numpy as np
import sys
from nba_api.stats.endpoints import AllTimeLeadersGrids, commonplayerinfo, CommonTeamRoster
from datetime import datetime, timedelta
import time

In [187]:
from nba_api.stats.endpoints import teamdetails, teamplayerdashboard, commonallplayers

def get_team_roster(team_id):
    # Get all players who have played for the team
    players_info = commonallplayers.CommonAllPlayers(is_only_current_season=1)
    players_info_data = players_info.get_data_frames()[0]
    
    # Filter players by team ID
    roster = players_info_data[players_info_data["TEAM_ID"] == team_id]
    
    return roster

def get_team_stats(team_id, season):
    # Get team stats for a specific season
    team_stats = teamplayerdashboard.TeamPlayerDashboard(team_id=team_id, season=season)
    team_stats_data = team_stats.get_data_frames()[0]
    
    # Extract team stats
    team_stats = team_stats_data
    
    return team_stats

In [188]:
from nba_api.stats.static import teams

def get_team_ids():
    # Retrieve all NBA teams
    nba_teams = teams.get_teams()
    
    team_ids = {}
    # Extract team IDs
    for team in nba_teams:
        team_ids[team["full_name"]] = team['id']
    
    return team_ids

In [307]:
def get_all_games(start_year = "2017-9-1", end_year = None, teams = "all"):
    start_year = datetime.strptime(start_year, "%Y-%m-%d")
    team_ids = get_team_ids()
    dfs = []
    if teams == "all":
        for team, team_id in team_ids.items():
            print(f"Attempting to collect {team}")
            gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id, timeout=60)
            games = gamefinder.get_data_frames()[0]
            games["GAME_DATE"] = games["GAME_DATE"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
            games = games[games["GAME_DATE"] >= start_year]
            dfs.append(games)
            print(f"Successfully collected {team}")
            time.sleep(2.5)
        return dfs
    else:
        team_id = team_ids[teams]
        gamefinder = leaguegamefinder.LeagueGameFinder(team_id_nullable=team_id)
        games = gamefinder.get_data_frames()[0]
        games["GAME_DATE"] = games["GAME_DATE"].apply(lambda x: datetime.strptime(x, "%Y-%m-%d"))
        games = games[games["GAME_DATE"] >= start_year]
        return games

In [1142]:
def mark_birthday_games(games_df, players_df, day_range):
   
    birthday_indexes = []
    games_df["birthday_game"] = 0

    for index, player_row in players_df.iterrows():
        player_team = player_row["Team"]
        player_birthday = datetime.strptime(player_row["Birthday"], "%Y-%m-%d")
        
        birthday_range_start = player_birthday - timedelta(days=day_range)
        birthday_range_end = player_birthday + timedelta(days=day_range)
        
        games_to_check = games_df[(games_df["home_team_abbrev"] == player_team) | (games_df["visit_team_abbrev"] == player_team)]
        birthday_games = games_to_check[(games_to_check["game_date"].dt.month == birthday_range_start.month) & 
                                        (games_to_check['game_date'].dt.day >= birthday_range_start.day) &
                                        (games_to_check['game_date'].dt.day <= birthday_range_end.day)]
        if len(birthday_games.index) >= 1:
            birthday_indexes.append([*birthday_games.index])
    
    for i in birthday_indexes:
        # games_df["birthday_game"].iloc[i] = 1
        games_df.loc[i, "birthday_game"] = 1
        
    return games_df

In [308]:
all_games = get_all_games(start_year = "2017-9-1")
all_games = pd.concat(all_games)
all_games

Attempting to collect Atlanta Hawks
Successfully collected Atlanta Hawks
Attempting to collect Boston Celtics
Successfully collected Boston Celtics
Attempting to collect Cleveland Cavaliers
Successfully collected Cleveland Cavaliers
Attempting to collect New Orleans Pelicans
Successfully collected New Orleans Pelicans
Attempting to collect Chicago Bulls
Successfully collected Chicago Bulls
Attempting to collect Dallas Mavericks
Successfully collected Dallas Mavericks
Attempting to collect Denver Nuggets
Successfully collected Denver Nuggets
Attempting to collect Golden State Warriors
Successfully collected Golden State Warriors
Attempting to collect Houston Rockets
Successfully collected Houston Rockets
Attempting to collect Los Angeles Clippers
Successfully collected Los Angeles Clippers
Attempting to collect Los Angeles Lakers
Successfully collected Los Angeles Lakers
Attempting to collect Miami Heat
Successfully collected Miami Heat
Attempting to collect Milwaukee Bucks
Successfully

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612737,ATL,Atlanta Hawks,0022300835,2024-02-27,ATL vs. UTA,W,239,124,46,93,0.495,12,32.0,0.375,20,23,0.870,15.0,40.0,55.0,27,8.0,8,17,15,27.8
1,22023,1610612737,ATL,Atlanta Hawks,0022300821,2024-02-25,ATL vs. ORL,W,241,109,41,90,0.456,12,29.0,0.414,15,17,0.882,9.0,35.0,44.0,31,10.0,3,9,17,17.0
2,22023,1610612737,ATL,Atlanta Hawks,0022300804,2024-02-23,ATL vs. TOR,L,239,121,46,106,0.434,12,39.0,0.308,17,24,0.708,20.0,34.0,54.0,27,5.0,4,9,11,-2.0
3,22023,1610612737,ATL,Atlanta Hawks,0022300777,2024-02-14,ATL @ CHA,L,239,99,36,89,0.404,9,38.0,0.237,18,20,0.900,15.0,30.0,45.0,23,5.0,6,16,20,-23.0
4,22023,1610612737,ATL,Atlanta Hawks,0022300763,2024-02-12,ATL vs. CHI,L,241,126,44,94,0.468,14,43.0,0.326,24,29,0.828,15.0,34.0,49.0,33,4.0,3,11,20,-10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580,12017,1610612766,CHA,Charlotte Hornets,0011700067,2017-10-13,CHA vs. DAL,W,239,111,42,85,0.494,17,30.0,0.567,10,14,0.714,9.0,41.0,50.0,24,6.0,4,21,21,15.0
581,12017,1610612766,CHA,Charlotte Hornets,0011700060,2017-10-11,CHA vs. BOS,L,239,100,37,88,0.420,13,37.0,0.351,13,19,0.684,12.0,35.0,47.0,18,3.0,2,14,14,-8.0
582,12017,1610612766,CHA,Charlotte Hornets,0011700051,2017-10-09,CHA @ MIA,L,239,106,41,91,0.451,5,26.0,0.192,19,28,0.679,13.0,30.0,43.0,17,8.0,3,14,22,-3.0
583,12017,1610612766,CHA,Charlotte Hornets,0011700019,2017-10-04,CHA @ DET,W,240,109,39,87,0.448,7,32.0,0.219,24,33,0.727,8.0,33.0,41.0,22,14.0,7,14,19,5.0


In [376]:
team_names = {
    'Atlanta Hawks': 'ATL',
    'Boston Celtics': 'BOS',
    'Cleveland Cavaliers': 'CLE',
    'New Orleans Pelicans': 'NOP',
    'Chicago Bulls': 'CHI',
    'Dallas Mavericks': 'DAL',
    'Denver Nuggets': 'DEN',
    'Golden State Warriors': 'GSW',
    'Houston Rockets': 'HOU',
    'LA Clippers': 'LAC',
    'Los Angeles Lakers': 'LAL',
    'Miami Heat': 'MIA',
    'Milwaukee Bucks': 'MIL',
    'Minnesota Timberwolves': 'MIN',
    'Brooklyn Nets': 'BKN',
    'New York Knicks': 'NYK',
    'Orlando Magic': 'ORL',
    'Indiana Pacers': 'IND',
    'Philadelphia 76ers': 'PHI',
    'Phoenix Suns': 'PHX',
    'Portland Trail Blazers': 'POR',
    'Sacramento Kings': 'SAC',
    'San Antonio Spurs': 'SAS',
    'Oklahoma City Thunder': 'OKC',
    'Toronto Raptors': 'TOR',
    'Utah Jazz': 'UTA',
    'Memphis Grizzlies': 'MEM',
    'Washington Wizards': 'WAS',
    'Detroit Pistons': 'DET',
    'Charlotte Hornets': 'CHA'
}

In [1319]:
all_games.head()

Unnamed: 0,SEASON_ID,TEAM_ID,TEAM_ABBREVIATION,TEAM_NAME,GAME_ID,GAME_DATE,MATCHUP,WL,MIN,PTS,FGM,FGA,FG_PCT,FG3M,FG3A,FG3_PCT,FTM,FTA,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PLUS_MINUS
0,22023,1610612737,ATL,Atlanta Hawks,22300835,2024-02-27,ATL vs. UTA,W,239,124,46,93,0.495,12,32.0,0.375,20,23,0.87,15.0,40.0,55.0,27,8.0,8,17,15,27.8
1,22023,1610612737,ATL,Atlanta Hawks,22300821,2024-02-25,ATL vs. ORL,W,241,109,41,90,0.456,12,29.0,0.414,15,17,0.882,9.0,35.0,44.0,31,10.0,3,9,17,17.0
2,22023,1610612737,ATL,Atlanta Hawks,22300804,2024-02-23,ATL vs. TOR,L,239,121,46,106,0.434,12,39.0,0.308,17,24,0.708,20.0,34.0,54.0,27,5.0,4,9,11,-2.0
3,22023,1610612737,ATL,Atlanta Hawks,22300777,2024-02-14,ATL @ CHA,L,239,99,36,89,0.404,9,38.0,0.237,18,20,0.9,15.0,30.0,45.0,23,5.0,6,16,20,-23.0
4,22023,1610612737,ATL,Atlanta Hawks,22300763,2024-02-12,ATL vs. CHI,L,241,126,44,94,0.468,14,43.0,0.326,24,29,0.828,15.0,34.0,49.0,33,4.0,3,11,20,-10.0


In [1119]:
drop_games = pd.read_csv("~/Documents/GitHub/CloudComputing/DroppedGames.csv").drop("Unnamed: 0", axis = 1)
all_games_filtered_columns = all_games[["TEAM_NAME", "GAME_DATE", "MATCHUP", "WL"]]
all_games_filtered_columns = all_games_filtered_columns.reset_index()
all_games_filtered_columns = all_games_filtered_columns.drop("index", axis = 1)
all_games_filtered_columns["TEAM_NAME"] = all_games_filtered_columns["TEAM_NAME"].apply(lambda x: team_names[x])
all_games_filtered_columns = all_games_filtered_columns[~all_games_filtered_columns["MATCHUP"].isin(drop_games["0"])]
all_games_filtered_columns

Unnamed: 0,TEAM_NAME,GAME_DATE,MATCHUP,WL
0,ATL,2024-02-27,ATL vs. UTA,W
1,ATL,2024-02-25,ATL vs. ORL,W
2,ATL,2024-02-23,ATL vs. TOR,L
3,ATL,2024-02-14,ATL @ CHA,L
4,ATL,2024-02-12,ATL vs. CHI,L
...,...,...,...,...
18798,CHA,2017-10-13,CHA vs. DAL,W
18799,CHA,2017-10-11,CHA vs. BOS,L
18800,CHA,2017-10-09,CHA @ MIA,L
18801,CHA,2017-10-04,CHA @ DET,W


In [1120]:
betting_lines = pd.read_csv("BettingData.csv")

betting_lines = betting_lines.drop("Unnamed: 0", axis = 1)

betting_lines["game_date"] = pd.to_datetime(betting_lines["game_date"], format='%Y-%m-%d', errors="coerce")

betting_lines.head()

Unnamed: 0,game_date,home_team_abbrev,visit_team_abbrev,home_team_score,visit_team_score,game_over_under,line,favorite,score,total,over_hit,under_hit,favorite_covered,underdog_covered
0,2017-10-17,CLE,BOS,102,99,216.0,-4.5,CLE,99-102,201,0,1,0,1
1,2017-10-17,GSW,HOU,121,122,231.0,-9.5,GSW,122-121,243,1,0,0,1
2,2017-10-18,DET,CHA,102,90,202.0,-2.5,DET,90-102,192,0,1,1,0
3,2017-10-18,IND,BKN,140,131,216.5,-3.0,IND,131-140,271,1,0,1,0
4,2017-10-18,ORL,MIA,116,109,205.5,3.5,MIA,109-116,225,1,0,0,1


In [1344]:
player_birthday_df.to_csv("PlayerBirthdayDF.csv", index = False)

In [1346]:
player_birthday_df.columns

Index(['Name', 'Team', 'Birthday', 'From_Year', 'To_Year', 'id'], dtype='object')

In [1345]:
merged_data["game_date"] = pd.to_datetime(merged_data["game_date"])
player_birthday_df["Birthday"] = pd.to_datetime(player_birthday_df["Birthday"])

Unnamed: 0,game_date,home_team_abbrev,visit_team_abbrev,home_team_score,visit_team_score,game_over_under,line,favorite,score,total,over_hit,under_hit,favorite_covered,underdog_covered,TEAM_NAME,MATCHUP,WL,birthday_game,team_covered
0,2017-10-17,CLE,BOS,102,99,216.0,-4.5,CLE,99-102,201,0,1,0,1,CLE,CLE vs. BOS,W,0,0
1,2017-10-17,GSW,HOU,121,122,231.0,-9.5,GSW,122-121,243,1,0,0,1,GSW,GSW vs. HOU,L,0,0
2,2017-10-18,DET,CHA,102,90,202.0,-2.5,DET,90-102,192,0,1,1,0,DET,DET vs. CHA,W,0,1
3,2017-10-18,IND,BKN,140,131,216.5,-3.0,IND,131-140,271,1,0,1,0,IND,IND vs. BKN,W,0,1
4,2017-10-18,ORL,MIA,116,109,205.5,3.5,MIA,109-116,225,1,0,0,1,ORL,ORL vs. MIA,W,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16590,2024-02-27,ATL,UTA,124,97,238.0,-2.0,ATL,97-124,221,0,1,1,0,UTA,UTA @ ATL,L,0,0
16591,2024-02-27,BOS,PHI,117,99,231.5,-11.5,BOS,99-117,216,0,1,1,0,PHI,PHI @ BOS,L,0,0
16592,2024-02-27,NYK,NOP,92,115,215.0,6.5,NOP,115-92,207,0,1,1,0,NOP,NOP @ NYK,W,0,1
16593,2024-02-27,MIL,CHA,123,85,220.0,-13.5,MIL,85-123,208,0,1,1,0,CHA,CHA @ MIL,L,1,0


In [978]:
active_player_birthdays = []
count = 0
for player_id in filtered_player_birthdays["id"]:
    count += 1
    if count % 100 == 0:
        print("Starting Sleep")
        time.sleep(300)
    player_info = commonplayerinfo.CommonPlayerInfo(player_id = player_id, timeout=60).get_data_frames()[0]
    time.sleep(.5)
    try:
        if (2017 <= player_info["FROM_YEAR"][0] <= 2024) or (2017 <= player_info["TO_YEAR"][0] <= 2024):
            print(f"Collected {player_info['DISPLAY_FIRST_LAST'][0]}")
            active_player_birthdays.append((player_info["DISPLAY_FIRST_LAST"][0], player_info["TEAM_ABBREVIATION"][0], player_info["BIRTHDATE"][0].split("T")[0], player_info["FROM_YEAR"][0], player_info["TO_YEAR"][0]))
    except TypeError:
        continue
        
print("Completed Collection")

Collected Deng Adel
Collected Santi Aldama
Collected Cole Aldrich
Collected Rawle Alkins
Collected Kadeem Allen
Collected Jose Alvarado
Collected Justin Anderson
Collected Giannis Antetokounmpo
Collected Kostas Antetokounmpo
Collected Ryan Arcidiacono
Collected Darrell Arthur
Collected Jamel Artis
Collected D.J. Augustin
Collected Deni Avdija
Collected Joel Ayayi
Collected Marvin Bagley III
Collected Amari Bailey
Collected Ron Baker
Collected Wade Baldwin IV
Collected Patrick Baldwin Jr.
Collected Lonzo Ball
Collected Paolo Banchero
Collected Dalano Banton
Collected Will Barton
Collected Charles Bassey
Collected Emoni Bates
Collected Keita Bates-Diop
Collected Nicolas Batum
Collected Aron Baynes
Collected Malik Beasley
Collected Michael Beasley
Collected Marco Belinelli
Collected Jordan Bell
Collected Dragan Bender
Collected Davis Bertans
Collected Saddiq Bey
Collected Tyler Bey
Collected Onuralp Bitim
Collected Anthony Black
Collected Tarik Black
Collected Eric Bledsoe
Collected Kelji

In [1273]:
player_birthday_df = pd.DataFrame(active_player_birthdays, columns = ["Name", "Team", "Birthday", "From_Year", "To_Year"])
player_birthday_df = player_birthday_df.merge(filtered_player_birthdays[["Name", "id"]], on = ["Name"])
player_birthday_df = player_birthday_df[player_birthday_df["Team"] != ""]
player_birthday_df = player_birthday_df.reset_index().drop("index", axis = 1)

player_birthday_df

Unnamed: 0,Name,Team,Birthday,From_Year,To_Year,id
0,Deng Adel,CLE,1997-02-01,2018,2018,1629061
1,Santi Aldama,MEM,2001-01-10,2021,2023,1630583
2,Cole Aldrich,NYK,1988-10-31,2010,2017,202332
3,Rawle Alkins,CHI,1997-10-29,2018,2018,1628959
4,Jose Alvarado,NOP,1998-04-12,2021,2023,1630631
...,...,...,...,...,...,...
388,Guerschon Yabusele,BOS,1995-12-17,2016,2018,1627824
389,Tyler Zeller,BOS,1990-01-17,2012,2019,203092
390,Paul Zipser,CHI,1994-02-18,2016,2017,1627835
391,Ante Zizic,CLE,1997-01-04,2017,2019,1627790


In [1274]:
away_games = pd.merge(betting_lines, all_games_filtered_columns, how = "left", left_on = ["game_date", "visit_team_abbrev"], right_on = ["GAME_DATE", "TEAM_NAME"]).dropna()

home_games = pd.merge(betting_lines, all_games_filtered_columns, how = "left", left_on = ["game_date", "home_team_abbrev"], right_on = ["GAME_DATE", "TEAM_NAME"]).dropna()

merged_data = pd.concat([home_games, away_games]).reset_index().drop("index", axis = 1)
merged_data = merged_data.drop(["GAME_DATE"], axis = 1)

In [1343]:
merged_data.to_csv("MergedData.csv", index=False)

In [1275]:
birthday_games_df = mark_birthday_games(merged_data, player_birthday_df, 1)

In [1272]:
def team_covered(games_df, teams):
    
    games_df["team_covered"] = 0
    covered = {}
    
    for team in teams:
        for index, game_row in games_df[games_df["TEAM_NAME"] == team].iterrows():
            if ((game_row["favorite"] == team) and (game_row["favorite_covered"] == 1)) | ((game_row["favorite"] != team) and (game_row["underdog_covered"] == 1)):
                covered[index] = 1
            else:
                covered[index] = 0
    
    games_df["team_covered"] = games_df.index.map(covered)
    
    return games_df

In [None]:
birthday_games_df

In [1276]:
birthday_games_df = team_covered(birthday_games_df, pd.unique(birthday_games_df["home_team_abbrev"]))

In [1277]:
birthday_games_df

Unnamed: 0,game_date,home_team_abbrev,visit_team_abbrev,home_team_score,visit_team_score,game_over_under,line,favorite,score,total,over_hit,under_hit,favorite_covered,underdog_covered,TEAM_NAME,MATCHUP,WL,birthday_game,team_covered
0,2017-10-17,CLE,BOS,102,99,216.0,-4.5,CLE,99-102,201,0,1,0,1,CLE,CLE vs. BOS,W,0,0
1,2017-10-17,GSW,HOU,121,122,231.0,-9.5,GSW,122-121,243,1,0,0,1,GSW,GSW vs. HOU,L,0,0
2,2017-10-18,DET,CHA,102,90,202.0,-2.5,DET,90-102,192,0,1,1,0,DET,DET vs. CHA,W,0,1
3,2017-10-18,IND,BKN,140,131,216.5,-3.0,IND,131-140,271,1,0,1,0,IND,IND vs. BKN,W,0,1
4,2017-10-18,ORL,MIA,116,109,205.5,3.5,MIA,109-116,225,1,0,0,1,ORL,ORL vs. MIA,W,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16590,2024-02-27,ATL,UTA,124,97,238.0,-2.0,ATL,97-124,221,0,1,1,0,UTA,UTA @ ATL,L,0,0
16591,2024-02-27,BOS,PHI,117,99,231.5,-11.5,BOS,99-117,216,0,1,1,0,PHI,PHI @ BOS,L,0,0
16592,2024-02-27,NYK,NOP,92,115,215.0,6.5,NOP,115-92,207,0,1,1,0,NOP,NOP @ NYK,W,0,1
16593,2024-02-27,MIL,CHA,123,85,220.0,-13.5,MIL,85-123,208,0,1,1,0,CHA,CHA @ MIL,L,1,0


In [1342]:
filtered_player_birthdays

Unnamed: 0,Name,Birthdate,id
10,Deng Adel,1997-02-01,1629061
11,Jeff Adrien,1986-02-10,202399
15,Josh Akognon,1986-02-10,203006
16,Solomon Alabi,1988-03-21,202374
18,Santi Aldama,2001-01-10,1630583
...,...,...,...
2303,Tyler Zeller,1990-01-17,203092
2305,Zhou Qi,1996-01-16,1627753
2307,Paul Zipser,1994-02-18,1627835
2308,Ante Zizic,1997-01-04,1627790


In [1283]:
birthday_games_df.columns

Index(['game_date', 'home_team_abbrev', 'visit_team_abbrev', 'home_team_score',
       'visit_team_score', 'game_over_under', 'line', 'favorite', 'score',
       'total', 'over_hit', 'under_hit', 'favorite_covered',
       'underdog_covered', 'TEAM_NAME', 'MATCHUP', 'WL', 'birthday_game',
       'team_covered'],
      dtype='object')

In [1281]:
birthday_games_df[["TEAM_NAME", "team_covered", "birthday_game"]].groupby(["TEAM_NAME", "birthday_game"]).value_counts()["CLE"]

birthday_game  team_covered
0              0               223
               1               170
1              0                77
               1                74
dtype: int64

In [1227]:
birthday_games_df[["favorite", "favorite_covered", "underdog_covered", "TEAM_NAME", "team_covered"]][birthday_games_df["TEAM_NAME"] == "CLE"]

Unnamed: 0,favorite,favorite_covered,underdog_covered,TEAM_NAME,team_covered
0,CLE,0,1,CLE,0
28,CLE,0,1,CLE,0
48,CLE,0,1,CLE,1
89,CLE,0,1,CLE,1
105,CLE,0,1,CLE,1
...,...,...,...,...,...
8247,CLE,0,1,CLE,0
8255,CLE,1,0,CLE,1
8276,CLE,1,0,CLE,0
8326,CLE,0,1,CLE,0


In [1011]:
birthday_games_df[["birthday_game"]].value_counts()

birthday_game
0                5849
1                2551
dtype: int64

In [1176]:
birthday_games_df[["TEAM_NAME", "home_team_abbrev", "favorite", "favorite_covered", "birthday_game"]][(birthday_games_df["home_team_abbrev"] == "SAC") | (birthday_games_df["visit_team_abbrev"] == "SAC")].groupby(["TEAM_NAME", "favorite_covered", "birthday_game"]).value_counts()["SAC"]

favorite  favorite_covered  birthday_game  home_team_abbrev
ATL       0                 0              SAC                 1
          1                 0              ATL                 4
                            1              SAC                 1
BKN       0                 0              SAC                 2
                                           BKN                 1
                                                              ..
WAS       0                 1              WAS                 2
                                           SAC                 2
          1                 0              WAS                 1
                                           SAC                 1
                            1              SAC                 1
Length: 223, dtype: int64

In [1159]:
birthday_games_df[["birthday_game"]][(birthday_games_df["home_team_abbrev"] == "SAC") | (birthday_games_df["visit_team_abbrev"] == "SAC")].value_counts()

birthday_game
0                690
1                356
dtype: int64

In [1284]:
from scipy.stats import chi2_contingency

In [1285]:
contingency_table = pd.crosstab(birthday_games_df['team_covered'], birthday_games_df['birthday_game'])

# Print contingency table (optional)
print("Contingency Table:")
print(contingency_table)

# Chi-square test of independence
chi2, p, dof, expected = chi2_contingency(contingency_table)

# Print test statistics
print("\nChi-square Statistic:", chi2)
print("P-value:", p)
print("Degrees of Freedom:", dof)
print("Expected Frequencies Table:")
print(expected)

Contingency Table:
birthday_game     0     1
team_covered             
0              5880  2574
1              5663  2478

Chi-square Statistic: 0.0
P-value: 1.0
Degrees of Freedom: 1
Expected Frequencies Table:
[[5880.35685447 2573.64314553]
 [5662.64314553 2478.35685447]]


In [1318]:
def chi_square_test_for_team(group):
    contingency_table = pd.crosstab(group['team_covered'], group['birthday_game'])
    chi2, p, dof, expected = chi2_contingency(contingency_table)
    return chi2, p, dof, expected

# Group by 'TEAM_NAME' and apply the function
results = birthday_games_df.groupby('TEAM_NAME').apply(chi_square_test_for_team)

# Print results for each team
for team, result in results.items():
    chi2, p, dof, expected = result
    if p <= .1:
        print(f"\nTeam: {team}")
        print("Chi-square Statistic:", chi2)
        print("P-value:", p)
        print("Degrees of Freedom:", dof)
        print("Expected Frequencies Table:")
        print(expected)


Team: CHA
Chi-square Statistic: 3.8590361487173155
P-value: 0.04947874481412501
Degrees of Freedom: 1
Expected Frequencies Table:
[[163.41085271 108.58914729]
 [146.58914729  97.41085271]]

Team: DEN
Chi-square Statistic: 2.7523429401223805
P-value: 0.09711203067019762
Degrees of Freedom: 1
Expected Frequencies Table:
[[217.65313029  89.34686971]
 [201.34686971  82.65313029]]

Team: IND
Chi-square Statistic: 1.9682304534560189
P-value: 0.1606358146747501
Degrees of Freedom: 1
Expected Frequencies Table:
[[188.03696858  83.96303142]
 [185.96303142  83.03696858]]

Team: MEM
Chi-square Statistic: 2.1102979268663384
P-value: 0.14631083036545778
Degrees of Freedom: 1
Expected Frequencies Table:
[[186.71323529  81.28676471]
 [192.28676471  83.71323529]]

Team: OKC
Chi-square Statistic: 9.056910300612039
P-value: 0.00261703834009946
Degrees of Freedom: 1
Expected Frequencies Table:
[[189.2393321  65.7606679]
 [210.7606679  73.2393321]]

Team: ORL
Chi-square Statistic: 2.1928607081327387
P-va

In [1304]:
from scipy.stats import ttest_ind

# Assuming birthday_games_df is your pandas DataFrame
# Replace 'WL' with the actual column name representing win-loss outcomes

# Separate data for birthday games and non-birthday games
birthday_games = birthday_games_df[birthday_games_df['birthday_game'] == 1]['WL']
birthday_games = birthday_games.apply(lambda x: wl_dict[x])
non_birthday_games = birthday_games_df[birthday_games_df['birthday_game'] == 0]['WL']
non_birthday_games = non_birthday_games.apply(lambda x: wl_dict[x])

# Perform t-test
t_statistic, p_value = ttest_ind(birthday_games, non_birthday_games)

# Print results
print("T-Statistic:", t_statistic)
print("P-value:", p_value)

T-Statistic: 0.005135201908661543
P-value: 0.9959027814209129


In [1348]:
wl_dict = {'W': 1, 'L': 0}

In [1316]:
from scipy.stats import ttest_ind

# Assuming birthday_games_df is your pandas DataFrame
# Replace 'WL' with the actual column name representing win-loss outcomes
# Replace 'TEAM_NAME' with the actual column name representing team names

# Define a function to perform t-test for each team
def t_test_for_team(group):
    birthday_games = group[group['birthday_game'] == 1]['WL']
    birthday_games = birthday_games.apply(lambda x: wl_dict[x])
    non_birthday_games = group[group['birthday_game'] == 0]['WL']
    non_birthday_games = non_birthday_games.apply(lambda x: wl_dict[x])
    
    t_statistic, p_value = ttest_ind(birthday_games, non_birthday_games)
    return t_statistic, p_value

# Group by 'TEAM_NAME' and apply the function
results = birthday_games_df.groupby('TEAM_NAME').apply(t_test_for_team)

# Print results for each team
for team, result in results.items():
    t_statistic, p_value = result
    if p_value <= .10:
        print(f"\nTeam: {team}")
        print("T-Statistic:", t_statistic)
        print("P-value:", p_value)



Team: CHA
T-Statistic: -1.7950315269254722
P-value: 0.07323604895632055

Team: DET
T-Statistic: -1.6790742363110507
P-value: 0.09374413530721767

Team: IND
T-Statistic: -1.6834434337141349
P-value: 0.09286831379429852

Team: MEM
T-Statistic: 2.1420801392698556
P-value: 0.032631995097883976

Team: OKC
T-Statistic: 2.599955483951792
P-value: 0.009580219905293284

Team: PHI
T-Statistic: 1.8553696425941235
P-value: 0.06405147396409794


In [1323]:
birthday_games_df[birthday_games_df["TEAM_NAME"] != birthday_games_df["home_team_abbrev"]].groupby("home_team_abbrev").apply(t_test_for_team)

home_team_abbrev
ATL      (1.4395705551839468, 0.15113458868955143)
BKN       (0.2541123710176615, 0.7995965953203449)
BOS      (0.39079601553087234, 0.6962208487101673)
CHA      (1.2490908371998273, 0.21277725119000712)
CHI       (-0.45678099606280065, 0.64821280101093)
CLE      (0.30938506368328594, 0.7572629174288827)
DAL      (-0.1883648300676678, 0.8507280945341115)
DEN      (-0.3120124716429323, 0.7552537215044841)
DET        (0.7710671582728142, 0.441378205044722)
GSW      (0.49180125923325035, 0.6232171156608044)
HOU      (-0.2919025534538945, 0.7705765157399009)
IND       (0.8660209389507252, 0.3872591153456292)
LAC      (0.23509230030357836, 0.8143101683674061)
LAL     (-0.30176071073886024, 0.7630561118220924)
MEM    (-2.2958657051113547, 0.022447182305407046)
MIA     (-0.07593572588711356, 0.9395217048691604)
MIL       (0.2878371956579998, 0.7736727902691102)
MIN         (0.554435648709452, 0.579755496820518)
NOP     (-0.14885116535714016, 0.8817860200554694)
NYK     (-0.17

In [1324]:
birthday_games_df[birthday_games_df["TEAM_NAME"] == birthday_games_df["home_team_abbrev"]].groupby("home_team_abbrev").apply(t_test_for_team)

home_team_abbrev
ATL    (-1.4395705551839486, 0.15113458868955104)
BKN    (-0.25411237101766065, 0.7995965953203455)
BOS    (-0.39079601553087234, 0.6962208487101673)
CHA    (-1.2490908371998282, 0.21277725119000676)
CHI      (0.4960501932148794, 0.6202796814093444)
CLE    (-0.30938506368328683, 0.7572629174288821)
DAL     (0.18836483006766602, 0.8507280945341127)
DEN      (0.3120124716429323, 0.7552537215044841)
DET    (-0.7710671582728135, 0.44137820504472247)
GSW     (-0.4918012592332494, 0.6232171156608052)
HOU     (0.29190255345389443, 0.7705765157399009)
IND     (-0.8660209389507252, 0.3872591153456292)
LAC    (-0.23509230030357925, 0.8143101683674054)
LAL     (0.30176071073885935, 0.7630561118220933)
MEM     (2.295865705111355, 0.022447182305407008)
MIA     (0.07593572588711521, 0.9395217048691591)
MIL    (-0.28783719565800076, 0.7736727902691094)
MIN     (-0.5544356487094536, 0.5797554968205167)
NOP     (0.14885116535713847, 0.8817860200554707)
NYK     (0.17191291867771508, 0.8

In [1314]:
# Group by 'TEAM_NAME' and 'home_team_abbrev' (city) and apply the function
results = birthday_games_df.groupby(['TEAM_NAME', 'home_team_abbrev']).apply(t_test_for_team)

# Print results for each team and city
for (team, city), result in results.items():
    t_statistic, p_value = result
    if (p_value < .05) and (abs(t_statistic) != np.inf):
        print(f"\nTeam: {team}, City: {city}")
        print("T-Statistic:", t_statistic)
        print("P-value:", p_value)

  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  t_statistic, p_value = ttest_ind(birthday_games, non_birthday_games)



Team: HOU, City: DEN
T-Statistic: 2.4337372337779035
P-value: 0.033189140421799455

Team: LAC, City: PHX
T-Statistic: 2.442316990216822
P-value: 0.02581342101313369

Team: MEM, City: HOU
T-Statistic: -3.562626515972126
P-value: 0.004452286189469762

Team: MEM, City: MEM
T-Statistic: 2.295865705111355
P-value: 0.022447182305407008

Team: MIL, City: BKN
T-Statistic: 2.5495097567963922
P-value: 0.024215051722005176

Team: MIN, City: GSW
T-Statistic: 2.2809967098135044
P-value: 0.04345998196469156

Team: NOP, City: OKC
T-Statistic: -2.9580398915498085
P-value: 0.014336107062361972

Team: NYK, City: BOS
T-Statistic: 2.5318484177091665
P-value: 0.02977639721939338

Team: OKC, City: OKC
T-Statistic: 2.0566228161040776
P-value: 0.040694206630617714

Team: ORL, City: CHA
T-Statistic: 2.455163805847503
P-value: 0.03644666583038085

Team: ORL, City: WAS
T-Statistic: -2.756913983540959
P-value: 0.020236688803349834

Team: PHI, City: BOS
T-Statistic: 2.696799449852968
P-value: 0.013874806517073488

In [1340]:
# Create full results list
full_results = []
# Group by 'TEAM_NAME' and apply chi-square test to check significance
results = birthday_games_df.groupby('TEAM_NAME').apply(chi_square_test_for_team)

# Print results for each team
full_results.append(("Chi-Square"))
for team, result in results.items():
    chi2, p, dof, expected = result
    if p <= .1:
        full_results.append((team, "Chi-square Statistic:", chi2, "P-value:", p))
        
# Group by "Team Name" and apply t-test for each team
results = birthday_games_df.groupby('TEAM_NAME').apply(t_test_for_team)

# Print results for each team
full_results.append(("T-Test Team"))
for team, result in results.items():
    t_statistic, p_value = result
    if p_value <= .10:
        full_results.append(("Team:", team, "T-Statistic:", t_statistic, "P-value:", p_value))

# Group by "Team Name" and "Home Team abbrev (city)" and apply t-test for each team
results = birthday_games_df.groupby(['TEAM_NAME', 'home_team_abbrev']).apply(t_test_for_team)

# Print results for each team and city
full_results.append(("T-Test Team and City"))
for (team, city), result in results.items():
    t_statistic, p_value = result
    if (p_value < .05) and (abs(t_statistic) != np.inf):
        full_results.append(("Team:", team, "City:", city, "T-Statistic:", t_statistic, "P-value:", p_value))

  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  t_statistic, p_value = ttest_ind(birthday_games, non_birthday_games)


In [1341]:
def write_tuples_to_txt(tuples_list, file_path):
    try:
        with open(file_path, 'w') as file:
            for tup in tuples_list:
                line = ' '.join(map(str, tup)) + '\n'
                file.write(line)
        print(f"Data written to {file_path} successfully.")
    except Exception as e:
        print(f"Error writing data to {file_path}: {e}")

# Example usage:
tuples_list = [(1, 'apple'), (2, 'banana'), (3, 'cherry')]
file_path = 'data.txt'

write_tuples_to_txt(full_results, "Results.txt")


Data written to Results.txt successfully.


In [1356]:
merged_data[merged_data["home_team_abbrev"] != merged_data["TEAM_NAME"]]["birthday_game"].value_counts()

0    5771
1    2526
Name: birthday_game, dtype: int64

In [1357]:
5771+2526

8297