In [1]:
import pandas as pd
import os 

input_data_dir = os.getcwd().replace('/notebooks', '') + '/data/external'
output_data_dir = os.getcwd().replace('/notebooks', '') + '/data/interim'

team_filename = 'game_summary_by_team.csv'
indiv_player_game_stats_filename = 'game_summary_by_player.csv'
indiv_player_metrics_filename = 'player_metadata.csv'

team_df = pd.read_csv(os.path.join(input_data_dir, team_filename))
indiv_player_game_df = pd.read_csv(os.path.join(input_data_dir, indiv_player_game_stats_filename))
indiv_player_metrics_df = pd.read_csv(os.path.join(input_data_dir, indiv_player_metrics_filename))

first_cols = ['player_link', 'player_name', 'season', 'week', 'team','position']
other_cols = [x for x in list(indiv_player_game_df.columns) if x not in first_cols]
indiv_player_game_df_cols = first_cols + other_cols
indiv_player_game_df = indiv_player_game_df[indiv_player_game_df_cols]
indiv_player_game_df.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,player_link,player_name,season,week,team,position,def_int,fga,fgm,fumbles,...,rush_long,rush_td,rush_yds,sacks,snaps,tackles_assists,tackles_solo,targets,xpa,xpm
0,/players/F/FreeDo20.htm,Doug Free,2012,1,Dallas Cowboys,OL,,,,,...,,,,,61,,,,,
1,/players/L/LiviNa20.htm,Nate Livings,2012,1,Dallas Cowboys,OL,,,,,...,,,,,61,,,,,
2,/players/B/BernMa20.htm,Mackenzy Bernadeau,2012,1,Dallas Cowboys,OL,,,,,...,,,,,61,,,,,
3,/players/S/SmitTy00.htm,Tyron Smith,2012,1,Dallas Cowboys,OL,,,,,...,,,,,61,,,,,
4,/players/R/RomoTo00.htm,Tony Romo,2012,1,Dallas Cowboys,QB,,,,0.0,...,9.0,0.0,12.0,,61,,,,,


In [2]:
unique_games = team_df["url"].unique()
unique_games = unique_games[0:5]

In [3]:
def get_fave_dog_game_stats(game_df):
    team_df_cols = [
        "first_downs", "total_yds", "time_of_poss", "fumbles", "fumbles_lost", "turnovers",
        "drives", "plays", "yds", "tds", "fgs", "punts",
        "rush_att", "rush_yds", "rush_tds", 
        "rush_le", "rush_yds_le", "rush_lt", "rush_lt", "rush_yds_lt", "rush_lg", "rush_yds_lg", "rush_md", "rush_yds_md",
        "rush_re", "rush_yds_re", "rush_rt", "rush_rt", "rush_yds_rt", "rush_rg", "rush_yds_rg",
        "pass_comp", "pass_att", "pass_yds", "pass_tds", "pass_int", "net_pass_yds",
        "sacks", "penalties", "penalty_yds", "third_down_conv", "fourth_down_conv"    
    ]
    
    fave_dog_dict = {}
    
    for col in team_df_cols:
        fave_stat = game_df[game_df["favorite_flg"] == True]["team_{}".format(col)].values[0]
        dog_stat = game_df[game_df["favorite_flg"] == False]["team_{}".format(col)].values[0]
        fave_dog_dict["fave_{}".format(col)] = fave_stat
        fave_dog_dict["dog_{}".format(col)] = dog_stat
        
    return fave_dog_dict

In [70]:
def get_player_stats(players_df, team, favorite_flg):
    
    col_prepend = "fave" if favorite_flg == True else "dog"
    pos_stats_dict = {
        "QB" : ["fumbles", "pass_att", "pass_cmp", "pass_int", "pass_sacked", "pass_yds", 
                "rush_att", "rush_long", "rush_td", "rush_yds", "snaps"],
        "RB" : ["fumbles", "rush_att", "rush_long", "rush_td", "rush_yds",
                "rec", "rec_long", "rec_td", "rec_yds", "targets", "snaps"],
        "WR" : ["fumbles", "rec", "rec_long", "rec_td", "rec_yds", "targets", "snaps"],
        "TE" : ["fumbles", "rec", "rec_long", "rec_td", "rec_yds", "targets", "snaps"],
        "OL" : [],
        "DL" : ["fumbles_forced", "sacks", "tackles_assists", "tackles_solo", "snaps"],
        "LB" : ["fumbles_forced", "sacks", "tackles_assists", "tackles_solo", "snaps"],
        "DB" : ["def_int", "fumbles_forced", "sacks", "tackles_assists", "tackles_solo", "snaps"],
        "K" : ["fga", "fgm", "xpa", "xpm"],
        "P" : ["punt", "punt_yds_per_punt"]
    }
    print(team, favorite_flg)
    team_players_df = players_df[players_df["team"] == team]
    print(team_players_df["position"].unique())
    
    pos_player_dict = {}
    for pos in team_players_df["position"].unique():
        pos_colname = pos.lower()
        pos_players_df = team_players_df[team_players_df["position"] == pos].fillna(0)
        pos_stats = pos_stats_dict[pos]
        
        for stat in pos_stats:
            pos_aggregated_stat = pos_players_df[stat].sum()
            pos_player_dict["{}_{}_{}_sum".format(col_prepend, pos_colname, stat)] = pos_aggregated_stat
        
        pos_player_avg_dict = get_pos_player_avg_stats(pos_players_df, pos, favorite_flg)
        pos_player_dict.update(pos_player_avg_dict)
    return pos_player_dict

def get_pos_player_avg_stats(pos_players_df, pos, favorite_flg):
    
    numerator_key = "numerator"
    denominator_key = "denominator"
    col_prepend = "fave" if favorite_flg == True else "dog"
    pos_colname = pos.lower()

    avg_stats_def = {
        "yds_per_rec" : {numerator_key : "rec_yds", denominator_key : "rec"},
        "yds_per_rush" : {numerator_key : "rush_yds", denominator_key : "rush_att"},
        "yds_per_target" : {numerator_key: "rec_yds", denominator_key : "targets"},
        "yds_per_snap" : {numerator_key : ["rush_yds", "rec_yds"], denominator_key : "snaps"},
        "yds_per_pass_att" : {numerator_key : "pass_att", denominator_key : "pass_yds"},
        "yds_per_pass_comp" : {numerator_key : "pass_cmp", denominator_key : "pass_yds"},
        "tackles_per_snap" : {numerator_key : ["tackles_assists", "tackles_solo"], denominator_key : "snaps"},
        "fg_pct" : {numerator_key : "fgm", denominator_key : "fga"}
    }
    pos_stats_dict = {
        "QB" : ["yds_per_pass_att", "yds_per_pass_comp"],
        "RB" : ["yds_per_rush", "yds_per_rec", "yds_per_target", "yds_per_snap"],
        "WR" : ["yds_per_rec", "yds_per_target", "yds_per_snap"],
        "TE" : ["yds_per_rec", "yds_per_target", "yds_per_snap"],
        "OL" : [],
        "DL" : ["tackles_per_snap"],
        "LB" : ["tackles_per_snap"],
        "DB" : ["tackles_per_snap"],
        "P" : [],
        "K" : ["fg_pct"]
    }
    pos_stat_list = pos_stats_dict[pos]
    
    pos_player_avg_dict = {}
    for stat in pos_stat_list:
        stat_config = avg_stats_def[stat]
        numerator = stat_config[numerator_key]
        denominator = stat_config[denominator_key]
        
        
        if isinstance(numerator, list):
            numerator_val = pos_players_df[numerator].sum().sum()
        else:
            numerator_val = pos_players_df[numerator].sum()
            
        denominator_val = pos_players_df[denominator].sum()
        if denominator_val == 0:
            avg_stat_val = None
        else:
            avg_stat_val = float(numerator_val) / float(denominator_val)
        
        pos_player_avg_dict["{}_{}_{}".format(col_prepend, pos_colname, stat)] = avg_stat_val
    
    return pos_player_avg_dict


In [71]:
output_df = pd.DataFrame()

for game_link in unique_games:
    
    team_df_subset = team_df[team_df["url"] == game_link]
    
    season = team_df_subset[team_df_subset["favorite_flg"] == True]["season"].values[0]
    week = team_df_subset[team_df_subset["favorite_flg"] == True]["week"].values[0]
    player_game_df_subset = indiv_player_game_df[indiv_player_game_df["season"] == season]
    player_game_df_subset = player_game_df_subset[player_game_df_subset["week"].astype(str) == week]
    favorite = team_df_subset[team_df_subset["favorite_flg"] == True]["team"].values[0]
    underdog = team_df_subset[team_df_subset["favorite_flg"] == False]["team"].values[0]
    line = abs(team_df_subset[team_df_subset["favorite_flg"] == False]["line"].values[0])
    over_under = abs(team_df_subset[team_df_subset["favorite_flg"] == False]["over_under"].values[0])
    favorite_pts = team_df_subset[team_df_subset["favorite_flg"] == True]["off_pts"].values[0]
    underdog_pts = team_df_subset[team_df_subset["favorite_flg"] == False]["off_pts"].values[0]
    
    home_favorite = team_df_subset[team_df_subset["favorite_flg"] == True]["home_flg"].values[0]
    game_dict = {
        "game_link" : game_link,
        "season" : season,
        "week" : week,
        "favorite" : favorite,
        "favorite_pts" : favorite_pts,
        "underdog" : underdog,
        "underdog_pts" : underdog_pts,
        "line" : line,
        "over_under" : over_under,
        "home_favorite": home_favorite
    }
    
    fave_dog_dict = get_fave_dog_game_stats(team_df_subset)
    game_dict.update(fave_dog_dict)
    
    favorite_dict = get_player_stats(player_game_df_subset, favorite, favorite_flg = True)
    underdog_dict = get_player_stats(player_game_df_subset, underdog, favorite_flg = False)
print(favorite, underdog, season, week)
print(favorite_dict)

New York Giants True
['OL' 'QB' 'TE' 'WR' 'RB' 'DB' 'DL' 'LB' 'P' 'K']
Dallas Cowboys False
['OL' 'QB' 'WR' 'TE' 'RB' 'DB' 'LB' 'DL' 'K' 'P']
Chicago Bears True
['OL' 'QB' 'TE' 'WR' 'RB' 'DB' 'LB' 'DL' 'K' 'P']
Indianapolis Colts False
['OL' 'QB' 'WR' 'TE' 'RB' 'DB' 'LB' 'DL' 'P' 'K']
New York Jets True
['OL' 'QB' 'RB' 'WR' 'TE' 'LB' 'DB' 'DL' 'K' 'P']
Buffalo Bills False
['OL' 'QB' 'WR' 'TE' 'RB' 'LB' 'DB' 'DL' 'P' 'K']
New Orleans Saints True
['OL' 'QB' 'TE' 'WR' 'RB' 'DB' 'DL' 'LB' 'P' 'K']
Washington Redskins False
['OL' 'TE' 'QB' 'WR' 'RB' 'LB' 'DB' 'DL' 'K' 'P']
New England Patriots True
['OL' 'QB' 'TE' 'WR' 'RB' 'LB' 'DB' 'DL' 'K' 'P']
Tennessee Titans False
['OL' 'TE' 'WR' 'QB' 'RB' 'DB' 'LB' 'DL' 'K' 'P']
New England Patriots Tennessee Titans 2012 1
{'fave_qb_fumbles_sum': 0.0, 'fave_qb_pass_att_sum': 31.0, 'fave_qb_pass_cmp_sum': 23.0, 'fave_qb_pass_int_sum': 0.0, 'fave_qb_pass_sacked_sum': 1.0, 'fave_qb_pass_yds_sum': 236.0, 'fave_qb_rush_att_sum': 2.0, 'fave_qb_rush_long_su

In [52]:
pos_players_df = team_players_df[team_players_df["position"] == "K"]

for col in pos_players_df.columns:
    remove_flg = True if all(pos_players_df[col].isnull()) is True else False
    if remove_flg == True:
        pos_players_df = pos_players_df.drop(col, axis = 1)
pos_players_df["fgm"].sum().sum()

2.0

In [7]:
player_game_df_subset = indiv_player_game_df[indiv_player_game_df["season"] == 2014]
player_game_df_subset = player_game_df_subset[player_game_df_subset["week"].astype(str) == '5']
team_players_df = player_game_df_subset[player_game_df_subset["team"] == 'Philadelphia Eagles']
team_players_df["position"].unique()

array(['OL', 'QB', 'WR', 'TE', 'RB', 'DB', 'LB', 'DL', 'K', 'P'],
      dtype=object)

In [35]:
pos_players_df["rec_yds"].sum()

5.0