In [2]:
import pandas as pd
import numpy as np
import glob

In [3]:
# Load and combine all matchup CSV files
file_paths = glob.glob("matchups-*.csv")

# combine all CSV files into a single DataFrame
games_df = pd.concat([pd.read_csv(file) for file in file_paths], ignore_index=True)

print(f"Total records combine: {len(games_df)}")

Total records combine: 236912


In [4]:
players_dict = {}

def get_player_score(row, home):
    if home:
        return 3*row["fgm_3_home"] + 2*row["fgm_2_home"] + row["fta_home"] + 1.2*row["reb_home"] + 1.5*row["ast_home"] + 2*row["blk_home"] - row["to_home"]
    else:
        return 3*row["fgm_3_visitor"] + 2*row["fgm_2_visitor"] + row["fta_visitor"] + 1.2*row["reb_visitor"] + 1.5*row["ast_visitor"] + 2*row["blk_visitor"] - row["to_visitor"]

def update_player_dict(player, index, home=True):
    if player not in players_dict:
        players_dict[player] = {
            "name": player,
            "team": '',
            "score": 0,
            "total_games": 0
        }
        if home:
            players_dict[player]['team']=games_df.at[index, 'home_team']
        else:
            players_dict[player]['team']=games_df.at[index, 'away_team']
    
    players_dict[player]["score"] += get_player_score(row, home)
    players_dict[player]["total_games"] += 1
    

In [5]:
print(games_df.columns)

Index(['game', 'season', 'home_team', 'away_team', 'starting_min', 'end_min',
       'home_0', 'home_1', 'home_2', 'home_3', 'home_4', 'away_0', 'away_1',
       'away_2', 'away_3', 'away_4', 'fga_home', 'fta_home', 'fgm_home',
       'fga_2_home', 'fgm_2_home', 'fga_3_home', 'fgm_3_home', 'ast_home',
       'blk_home', 'pf_home', 'reb_home', 'dreb_home', 'oreb_home', 'to_home',
       'pts_home', 'pct_home', 'pct_2_home', 'pct_3_home', 'fga_visitor',
       'fta_visitor', 'fgm_visitor', 'fga_2_visitor', 'fgm_2_visitor',
       'fga_3_visitor', 'fgm_3_visitor', 'ast_visitor', 'blk_visitor',
       'pf_visitor', 'reb_visitor', 'dreb_visitor', 'oreb_visitor',
       'to_visitor', 'pts_visitor', 'pct_visitor', 'pct_2_visitor',
       'pct_3_visitor', 'outcome'],
      dtype='object')


In [6]:
# Build player dictionary
for index, row in games_df.iterrows():
    game_row = index
    
    homeplayers = []
    homeplayers.append(row['home_0'])
    homeplayers.append(row['home_1'])
    homeplayers.append(row['home_2'])
    homeplayers.append(row['home_3'])
    homeplayers.append(row['home_4'])

    awayplayers = []
    awayplayers.append(row['away_0'])
    awayplayers.append(row['away_1'])
    awayplayers.append(row['away_2'])
    awayplayers.append(row['away_3'])
    awayplayers.append(row['away_4'])

    for player in homeplayers:
        update_player_dict(player, index, home=True);
    for player in awayplayers:
        update_player_dict(player, index, home=False);

In [7]:
player_df_dict = {"name":[], "team":[], "score":[], "total_games":[]}
for player in players_dict:
    player_df_dict["name"].append(players_dict[player]["name"])
    player_df_dict["team"].append(players_dict[player]["team"])
    player_df_dict["score"].append(players_dict[player]["score"])
    player_df_dict["total_games"].append(players_dict[player]["total_games"])

In [8]:
player_df = pd.DataFrame(data=player_df_dict, columns=["name", "team", "score", "total_games"])

In [9]:
for index, row in player_df.iterrows():
    print(row)

name           Andrew Bynum
team                    LAL
score               39720.0
total_games            4094
Name: 0, dtype: object
name           Lamar Odom
team                  LAL
score             58549.1
total_games          6795
Name: 1, dtype: object
name           Luke Walton
team                   LAL
score              26387.3
total_games           2962
Name: 2, dtype: object
name           Sasha Vujacic
team                     LAL
score                18456.4
total_games             2887
Name: 3, dtype: object
name           Smush Parker
team                    LAL
score               11326.2
total_games            1229
Name: 4, dtype: object
name           Boris Diaw
team                  PHO
score             59691.6
total_games          9096
Name: 5, dtype: object
name           Kurt Thomas
team                   PHO
score              25348.1
total_games           3638
Name: 6, dtype: object
name           Raja Bell
team                 PHO
score            39129.8


In [10]:
print(player_df)

                  name team    score  total_games
0         Andrew Bynum  LAL  39720.0         4094
1           Lamar Odom  LAL  58549.1         6795
2          Luke Walton  LAL  26387.3         2962
3        Sasha Vujacic  LAL  18456.4         2887
4         Smush Parker  LAL  11326.2         1229
...                ...  ...      ...          ...
1039     Andre Dawkins  MIA     66.6           11
1040   Sean Kilpatrick  MIN    237.4           39
1041        David Wear  SAC     29.1            4
1042  Cameron Bairstow  CHI    208.4           25
1043  Jerrelle Benimon  UTA     14.6            1

[1044 rows x 4 columns]


In [11]:
# Round the score column to 2 decimal places
player_df['score'] = player_df['score'].round(2)

# Save the updates DataFrame to CSV
player_df.to_csv('combined_player_data.csv', index=False)

print("Player data successfully saved to 'combined_player_data.csv'")

Player data successfully saved to 'combined_player_data.csv'
