In [50]:
import pandas as pd
import os

In [51]:
team_code_map = {
    'ATL': 'ATL',
    'BKN': 'BRK',
    'BOS': 'BOS',
    'CHA': 'CHO',
    'CHI': 'CHI',
    'DAL': 'DAL',
    'DEN': 'DEN',
    'DET': 'DET',
    'GSW': 'GSW',
    'HOU': 'HOU',
    'IND': 'IND',
    'LAC': 'LAC',
    'LAL': 'LAL',
    'MEM': 'MEM',
    'MIA': 'MIA',
    'MIL': 'MIL',
    'MIN': 'MIN',
    'NOP': 'NOP',
    'NYK': 'NYK',
    'OKC': 'OKC',
    'ORL': 'ORL',
    'PHI': 'PHI',
    'PHX': 'PHO',
    'POR': 'POR',
    'SAC': 'SAC',
    'SAS': 'SAS',
    'TOR': 'TOR',
    'UTA': 'UTA',
    'WAS': 'WAS'
}

In [52]:
def transform_nba_game_log(path_to_csv):
    df = pd.read_csv(path_to_csv)

    # Parse date
    df['Date'] = pd.to_datetime(df['GAME_DATE']).dt.strftime('%Y-%m-%d')

    # Extract Team and Opponent
    def parse_matchup(matchup):
        team, at_vs, opp = matchup.split()
        return (
            team_code_map.get(team, team),
            '@' if at_vs == '@' else '',
            team_code_map.get(opp, opp)
        )

    parsed = df['MATCHUP'].apply(parse_matchup)
    df['Team'] = parsed.apply(lambda x: x[0])
    df[''] = parsed.apply(lambda x: x[1])
    df['Opp'] = parsed.apply(lambda x: x[2])

    # Result column
    df['Result'] = df['WL'] + ' ' + df['PTS'].astype(str) + '-' + (df.groupby('Player_Name')['PTS'].shift(-1).fillna(df['PTS'])).astype(str)

    # Games Started: unknown, use '*'
    df['GS'] = '*'

    # Minutes
    df['MP'] = df['MIN']

    # Shooting stats
    df['FG'] = df['FGM']
    df['FG%'] = df['FG_PCT']
    df['3P'] = df['FG3M']
    df['3PA'] = df['FG3A']
    df['3P%'] = df['FG3_PCT']

    df['2P'] = df['FGM'] - df['FG3M']
    df['2PA'] = df['FGA'] - df['FG3A']
    df['2P%'] = df['2P'] / df['2PA']
    df['eFG%'] = (df['FGM'] + 0.5 * df['FG3M']) / df['FGA']

    df['FT'] = df['FTM']
    df['FTA'] = df['FTA']
    df['FT%'] = df['FT_PCT']

    df['ORB'] = df['OREB']
    df['DRB'] = df['DREB']
    df['TRB'] = df['REB']

    df['AST'] = df['AST']
    df['STL'] = df['STL']
    df['BLK'] = df['BLK']
    df['TOV'] = df['TOV']
    df['PF'] = df['PF']
    df['PTS'] = df['PTS']

    df['+/-'] = df['PLUS_MINUS']
    
    
    num_cols = ['FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%',
        'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB',
        'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', '+/-'
    ]
    for col in num_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    # Select final column order
    output_cols = [
        'Date', 'Team', '', 'Opp', 'Result', 'GS', 'MP',
        'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%',
        'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB',
        'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', '+/-'
    ]
    df.sort_values(by='Date', inplace=True)
    df_out = df[output_cols]

    return df_out


In [53]:
filepaths_and_players = [(os.path.join('./data', f), f.replace('.csv', '')) 
                         for f in os.listdir('./data') if f.endswith('.csv')]

In [54]:
print(filepaths_and_players)

[('./data/Jalen_Suggs.csv', 'Jalen_Suggs'), ('./data/Sam_Merrill.csv', 'Sam_Merrill'), ('./data/Tobias_Harris.csv', 'Tobias_Harris'), ('./data/Markieff_Morris.csv', 'Markieff_Morris'), ('./data/Pete_Nance.csv', 'Pete_Nance'), ('./data/Jaylen_Clark.csv', 'Jaylen_Clark'), ('./data/Collin_Gillespie.csv', 'Collin_Gillespie'), ('./data/De_Aaron_Fox.csv', 'De_Aaron_Fox'), ('./data/PJ_Hall.csv', 'PJ_Hall'), ('./data/Kyle_Filipowski.csv', 'Kyle_Filipowski'), ('./data/Malevy_Leons.csv', 'Malevy_Leons'), ('./data/Ayo_Dosunmu.csv', 'Ayo_Dosunmu'), ('./data/Kobe_Brown.csv', 'Kobe_Brown'), ('./data/James_Johnson.csv', 'James_Johnson'), ('./data/Jeff_Dowtin_Jr_.csv', 'Jeff_Dowtin_Jr_'), ('./data/Oso_Ighodaro.csv', 'Oso_Ighodaro'), ('./data/Dominick_Barlow.csv', 'Dominick_Barlow'), ('./data/Julian_Champagnie.csv', 'Julian_Champagnie'), ('./data/Karlo_Matković.csv', 'Karlo_Matković'), ('./data/Yuki_Kawamura.csv', 'Yuki_Kawamura'), ('./data/Devin_Carter.csv', 'Devin_Carter'), ('./data/Shaedon_Sharpe.cs

In [55]:
for filepath, player in filepaths_and_players:
    
    df = transform_nba_game_log(filepath)
    team = df["Team"].unique()
    team_code = team[0]
    output_dir = f"./all_player_game_data/{team_code}"
    os.makedirs(output_dir, exist_ok=True)
    df.to_csv(f"{output_dir}/{player}.csv", index=False)

In [42]:
# df = transform_nba_game_log("./Jarrett_Allen.csv")
df = pd.read_csv("./all_player_game_data/CLE/Jarrett_Allen.csv")


In [43]:
print(df.columns)

Index(['Date', 'Team', 'Unnamed: 2', 'Opp', 'Result', 'GS', 'MP', 'FG', 'FGA',
       'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT', 'FTA',
       'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS',
       '+/-'],
      dtype='object')


In [None]:
df

In [17]:
opps = []
opps = df['Opp'].unique()

In [18]:
print(opps)

['IND' 'NYK' 'CHI' 'SAC' 'SAS' 'LAC' 'DET' 'POR' 'UTA' 'PHX' 'ORL' 'MEM'
 'BKN' 'MIL' 'CHA' 'MIA' 'BOS' 'TOR' 'MIN' 'WAS' 'DAL' 'ATL' 'HOU' 'PHI'
 'OKC' 'LAL' 'GSW' 'DEN' 'NOP']


In [11]:
df2 = pd.read_csv("../backend/data/player_game_data/LAL/lebron_james.csv")

In [12]:
opps2 = []
opps2 = df2["Opp"].unique()

In [28]:
print(opps2)
team = df2["Team"].unique()
player = df2["Player_Name"]

['MIN' 'PHO' 'SAC' 'CLE' 'TOR' 'DET' 'MEM' 'PHI' 'SAS' 'NOP' 'UTA' 'ORL'
 'DEN' 'OKC' 'MIA' 'ATL' 'POR' 'GSW' 'HOU' 'DAL' 'BRK' 'LAC' 'WAS' 'BOS'
 'CHO' 'NYK' 'IND' 'MIL' 'CHI']


KeyError: 'Player_Name'

In [27]:
print(team[0])

LAL


In [None]:
df.to_csv("./all_player_game_data/{team[0]}/{player}.csv")