# Datos de partido equipos

In [1]:
pip install nba_api

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from nba_api.stats.endpoints import teamgamelog
from nba_api.stats.static import teams
import warnings


In [13]:
def get_logs(teamid, season):
    gamelog_home = teamgamelog.TeamGameLog(team_id=teamid, season=season)
    log_home = gamelog_home.get_data_frames()[0]
    warnings.filterwarnings("ignore", message="Could not infer format, so each element will be parsed individually")
    log_home['GAME_DATE'] = pd.to_datetime(log_home['GAME_DATE'])
    return log_home

def get_averages(df, teamid, end_date):
    
    filtered_games = df[df["Team_ID_1"] == teamid]
    gameday = filtered_games[filtered_games["GAME_DATE_1"] == end_date]

    filtered_games = filtered_games[filtered_games['GAME_DATE_1'] < end_date]
    
    if filtered_games.shape[0] > 10:
        filtered_games = filtered_games.tail(10)
        
    # If no games are found, return None
    if filtered_games.empty:
        print("empty")
        return None
    
    
    columns = ["GAME_ID", "Team_ID_1", "Team_ID_2", "GAME_DATE_1", "MATCHUP_1", "PTS_1", "PTS_2", "win%_1","efg%_1", "fta_rate_1","tov%_1","oreb%_1", "offrtg_1", "defrtg_1", "pace_1", "ppg_1", "opp_ppg_1", "dreb%_1", "win%_2","efg%_2", "fta_rate_2","tov%_2","oreb%_2", "offrtg_2", "defrtg_2", "pace_2", "ppg_2", "opp_ppg_2", "dreb%_2"]
    stats = pd.DataFrame(columns=columns)

    stats["GAME_ID"] = gameday["Game_ID"]
    stats["Team_ID_1"] = gameday["Team_ID_1"]
    stats["Team_ID_2"] = gameday["Team_ID_2"]
    stats["GAME_DATE_1"]= gameday["GAME_DATE_1"]
    stats["MATCHUP_1"]= gameday["MATCHUP_1"]
    stats["PTS_1"] = gameday["PTS_1"]
    stats["PTS_2"] = gameday["PTS_2"]
    
    stats['win%_1'] = filtered_games['WL_1'].value_counts(normalize=True).get('W', 0)
    stats['efg%_1'] = (filtered_games['FGM_1'].sum() + 0.5 * filtered_games['FG3M_1'].sum()) / filtered_games['FGA_1'].sum()
    stats['fta_rate_1'] = filtered_games['FTA_1'].sum() / filtered_games['FGA_1'].sum()
    stats['tov%_1'] = filtered_games['TOV_1'].sum() / (filtered_games['FGA_1'].sum() + 0.44 * filtered_games['FTA_1'].sum() + filtered_games['TOV_1'].sum())
    stats['oreb%_1'] = filtered_games['OREB_1'].sum() / (filtered_games['OREB_1'].sum() + filtered_games['DREB_2'].sum())
    stats['offrtg_1'] = filtered_games['PTS_1'].sum()  / 0.96*((filtered_games['FGA_1'].sum() )+(filtered_games['TOV_1'].sum()  )+0.44*(filtered_games['FTA_1'].sum() )-(filtered_games['OREB_1'].sum() ))
    stats["defrtg_1"] = 100 * (filtered_games['PTS_2'].sum() / 0.96*((filtered_games['FGA_1'].sum() )+(filtered_games['TOV_1'].sum()  )+0.44*(filtered_games['FTA_1'].sum() )-(filtered_games['OREB_1'].sum() )))
    stats['pace_1'] = 0.96*((filtered_games['FGA_1'].sum() )+(filtered_games['TOV_1'].sum()  )+0.44*(filtered_games['FTA_1'].sum() )-(filtered_games['OREB_1'].sum() )) / (48 * (filtered_games['MIN_1'].sum()  / 240))
    stats["ppg_1"] = filtered_games["PTS_1"].mean()
    stats['opp_ppg_1'] = filtered_games["PTS_2"].mean()
    stats["dreb%_1"] = filtered_games["DREB_1"].sum() / (filtered_games["DREB_1"].sum() + filtered_games['OREB_2'].sum())
    
    stats['win%_2'] = filtered_games['WL_2'].value_counts(normalize=True).get('W', 0)
    stats['efg%_2'] = (filtered_games['FGM_2'].sum() + 0.5 * filtered_games['FG3M_2'].sum()) / filtered_games['FGA_2'].sum()
    stats['fta_rate_2'] = filtered_games['FTA_2'].sum() / filtered_games['FGA_2'].sum()
    stats['tov%_2'] = filtered_games['TOV_2'].sum() / (filtered_games['FGA_2'].sum() + 0.44 * filtered_games['FTA_2'].sum() + filtered_games['TOV_2'].sum())
    stats['oreb%_2'] = filtered_games['OREB_2'].sum() / (filtered_games['OREB_2'].sum() + filtered_games['DREB_1'].sum())
    stats['offrtg_2'] = filtered_games['PTS_2'].sum()  / 0.96*((filtered_games['FGA_2'].sum() )+(filtered_games['TOV_2'].sum()  )+0.44*(filtered_games['FTA_2'].sum() )-(filtered_games['OREB_2'].sum() ))
    stats["defrtg_2"] = 100 * (filtered_games['PTS_1'].sum() / 0.96*((filtered_games['FGA_2'].sum() )+(filtered_games['TOV_2'].sum()  )+0.44*(filtered_games['FTA_2'].sum() )-(filtered_games['OREB_2'].sum() )))
    stats['pace_2'] = 0.96*((filtered_games['FGA_2'].sum() )+(filtered_games['TOV_2'].sum()  )+0.44*(filtered_games['FTA_2'].sum() )-(filtered_games['OREB_2'].sum() )) / (48 * (filtered_games['MIN_2'].sum()  / 240))
    stats["ppg_2"] = filtered_games["PTS_2"].mean()
    stats['opp_ppg_2'] = filtered_games["PTS_1"].mean()
    stats["dreb%_2"] = filtered_games["DREB_2"].sum() / (filtered_games["DREB_2"].sum() + filtered_games['OREB_1'].sum())
    
    return stats
    

In [14]:
teams_list = teams.get_teams()
id_list = [team['id'] for team in teams_list]

In [15]:
columns = ['Team_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL', 'W', 'L', 'W_PCT', 'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA', 'FT_PCT', 'OREB', 'DREB', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']
LogsSeason = pd.DataFrame(columns=columns)
for id in id_list:
    df = get_logs(id, '2023-24')
    LogsSeason = pd.concat([LogsSeason, df], ignore_index=True)

  LogsSeason = pd.concat([LogsSeason, df], ignore_index=True)


In [16]:
# Split the dataframe based on Game_ID and Team_ID
df_team1 = LogsSeason[LogsSeason['MATCHUP'].str.contains('@')].copy()
df_team2 = LogsSeason[LogsSeason['MATCHUP'].str.contains('vs.')].copy()

# Add suffixes to columns to distinguish between team1 and team2
df_team1 = df_team1.add_suffix('_1')
df_team2 = df_team2.add_suffix('_2')

# Rename the Game_ID columns back to the original
df_team1.rename(columns={'Game_ID_1': 'Game_ID'}, inplace=True)
df_team2.rename(columns={'Game_ID_2': 'Game_ID'}, inplace=True)

# Merge the two dataframes on Game_ID
merged_df = pd.merge(df_team1, df_team2, on='Game_ID')

merged_df = merged_df.sort_values(by=['Team_ID_1', 'GAME_DATE_1'])

In [17]:
columns = ["GAME_ID", "Team_ID_1", "Team_ID_2", "GAME_DATE_1", "MATCHUP_1", "PTS_1", "PTS_2", "win%_1","efg%_1", "fta_rate_1","tov%_1","oreb%_1", "offrtg_1", "defrtg_1", "pace_1", "ppg_1", "opp_ppg_1", "dreb%_1", "win%_2","efg%_2", "fta_rate_2","tov%_2","oreb%_2", "offrtg_2", "defrtg_2", "pace_2", "ppg_2", "opp_ppg_2", "dreb%_2"]
final = pd.DataFrame(columns=columns)
listgames = []

for id in id_list:
    listgames = merged_df[merged_df["Team_ID_1"] == id]["GAME_DATE_1"].to_list()
    for day in listgames:
        final = pd.concat([final, get_averages(merged_df, id, day)], ignore_index=True)

empty
empty


  final = pd.concat([final, get_averages(merged_df, id, day)], ignore_index=True)


empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty
empty


In [19]:
final.to_csv("GamesStatsTeams23-24.csv", index = False)