In [1]:
import pickle
import pandas as pd

In [2]:
with open('./data/transformed_data/teams_stats.pickle', 'rb') as team_stats:
    teams_stats = pickle.load(team_stats)

In [3]:
with open('./data/transformed_data/players_stats.pickle', 'rb') as play_stats:
    players_stats = pickle.load(play_stats)

In [4]:
with open('./data/transformed_data/players_advanced_stats.pickle', 'rb') as play_advanced_stats:
    players_advanced_stats = pickle.load(play_advanced_stats)

In [5]:
game_types = ["playoffs", "leagues"]
years = list(range(2001, 2023))
stats = {}
df_columns = ["ID", "age", "games_played_perc", "games_started_perc", "avg_minutes_played", "WS48", "team_successes", 
              "defensive", "most_improved", "most_valuable", "most_valuable_finals", "sixth_man", "all_league_1", "all_league_2", 
              "all_league_3", "all_league_4", "all_league_5", "all_league_6", "all_league_7", "all_league_8", "all_league_9", "all_league_10", 
              "all_league_11", "all_league_12", "all_league_13", "all_league_14", "all_league_15", "all_def_1", "all_def_2", "all_def_3", 
              "all_def_4", "all_def_5", "all_def_6", "all_def_7", "all_def_8", "all_def_9", "all_def_10", "all_def_11"]

In [6]:
# % GAMES PLAYED FOR EACH PLAYER
for year in years:  
    player_list = set(players_stats[year]["playoffs"]["ID"].values.tolist() + players_stats[year]["leagues"]["ID"].values.tolist())
    stats[year] = pd.DataFrame(columns=df_columns)
    stats[year]['ID'] = [player for player in player_list]
    stats[year] = stats[year].set_index("ID")
    for player_id in player_list:
        games_played = 0
        team_games_played = 0
        for game_type in game_types:
            if player_id in player_list:
                try:
                    player_team = players_stats[year][game_type].query(f"ID == '{player_id}'")["Tm"].iloc[0]
                except:
                    pass
                try:
                    games_played += players_stats[year][game_type].query(f"ID == '{player_id}'")["G"].iloc[0]   
                except:
                    pass
                try:
                    team_games_played += teams_stats[year][game_type].query(f"Team == '{player_team}'")["G"].iloc[0]
                except:    
                    pass

        perc_games_played = games_played / team_games_played
            
        stats[year].loc[[f'{player_id}'], ['games_played_perc']] = perc_games_played

In [8]:
# % GAMES STARTED FOR EACH PLAYER
for year in years:
    player_list = set(players_stats[year]["playoffs"]["ID"].values.tolist() + players_stats[year]["leagues"]["ID"].values.tolist())
    for player_id in player_list:
        games_started = 0
        team_games_played = 0
        for game_type in game_types:
            if player_id in player_list:
                try:
                    player_team = players_stats[year][game_type].query(f"ID == '{player_id}'")["Tm"].iloc[0]
                except:
                    pass
                try:
                    games_started += players_stats[year][game_type].query(f"ID == '{player_id}'")["GS"].iloc[0]
                except:
                    pass
                try:    
                    team_games_played += teams_stats[year][game_type].query(f"Team == '{player_team}'")["G"].iloc[0]
                except:
                    pass

        perc_games_started = games_started / team_games_played
            
        stats[year].loc[[f'{player_id}'], ['games_started_perc']] = perc_games_started

In [9]:
# AVERAGE MINUTES PLAYED FOR EACH PLAYER
for year in years:
    player_list = set(players_stats[year]["playoffs"]["ID"].values.tolist() + players_stats[year]["leagues"]["ID"].values.tolist())
    for player_id in player_list:
        avg_minutes_played = 0
        avg_minutes_played_x_games = 0
        total_games = 0
        minutes_played_per_game = 0
        games_played = 0
        for game_type in game_types:
            if player_id in player_list:
                try:
                    minutes_played_per_game = players_stats[year][game_type].query(f"ID == '{player_id}'")["MP"].iloc[0]
                    games_played = players_stats[year][game_type].query(f"ID == '{player_id}'")["G"].iloc[0]
                except:
                    minutes_played_per_game += 0
                    games_played += 0
                
            avg_minutes_played_x_games += minutes_played_per_game * games_played
            total_games += games_played
            
        if total_games != 0:
            avg_minutes_played = round(avg_minutes_played_x_games/total_games, 2)
        else:
            avg_minutes_played = 0
        
        stats[year].loc[[f'{player_id}'], ['avg_minutes_played']] = avg_minutes_played

In [10]:
# WIN SHARES PER 48 MINUTES FOR EACH PLAYER
for year in years:
    player_list = set(players_stats[year]["playoffs"]["ID"].values.tolist() + players_stats[year]["leagues"]["ID"].values.tolist())
    for player_id in player_list:
        minutes_played = 0
        win_shares = 0
        win_share_48 = 0
        for game_type in game_types:
            if player_id in player_list:
                try:
                    minutes_played = players_stats[year][game_type].query(f"ID == '{player_id}'")["MP"].iloc[0]
                    win_shares = players_advanced_stats[year][game_type].query(f"ID == '{player_id}'")["WS"].iloc[0]
                except:
                    minutes_played += 0
                    win_shares += 0
            
        if minutes_played != 0:
            win_share_48 = round((win_shares/minutes_played) * 48, 2)
        else:
            win_share_48 = 0
        
        stats[year].loc[[f'{player_id}'], ['WS48']] = win_share_48

In [11]:
for year in years:
    player_list = set(players_stats[year]["playoffs"]["ID"].values.tolist() + players_stats[year]["leagues"]["ID"].values.tolist())
    for player_id in player_list:
        try:
            stats[year].loc[[f'{player_id}'], ['age']] = players_stats[year]["leagues"].query(f"ID == '{player_id}'")["Age"].iloc[0]
        except:
            stats[year].loc[[f'{player_id}'], ['age']] = players_stats[year]["playoffs"].query(f"ID == '{player_id}'")["Age"].iloc[0]

{2001:           age games_played_perc games_started_perc avg_minutes_played   WS48   
 ID                                                                             
 foxri01    31               1.0            0.94898              29.19   8.77  \
 campbel01  32          0.956522           0.956522              29.85  10.24   
 perduwi01  35          0.188235                0.0               3.98    3.2   
 fosteje01  24          0.872093           0.127907              16.03    8.0   
 tsakaja01  21          0.709302                0.5              16.74   3.76   
 ...        ..               ...                ...                ...    ...   
 willije01  27          0.744681           0.021277               19.2    8.4   
 kukocto01  32          0.691489           0.202128               24.6   9.17   
 blounco01  32          0.829268           0.073171               19.2    4.5   
 davidko01  29          0.329268                0.0                7.7   2.49   
 oyedeol01  19        