In [1]:
import os
from src.config import config
from src.utils.points_calc import calculate_fantasy_points
from src.utils.points_config import PointsConfig
import pandas as pd


def read_pbp_agg(run_id: str) -> pd.DataFrame:
    pbp_agg_path = config['local']['data_paths']['outputs']['play_by_play_agg']
    pbp_filename = f'play_by_play_agg_{run_id}.parquet'
    return pd.read_parquet(os.path.join(pbp_agg_path, pbp_filename))


def read_rosters(run_id: str) -> pd.DataFrame:
    rosters_path = config['local']['data_paths']['outputs']['rosters']
    rosters_filename = f'rosters_{run_id}.parquet'
    return pd.read_parquet(os.path.join(rosters_path, rosters_filename))


def calculate_pbp_fantasy_points(df: pd.DataFrame, pc: PointsConfig) -> pd.DataFrame:
    df['fantasy_points'] = df.apply(lambda x: calculate_fantasy_points(pc, x.passing_yards, x.passing_touchdowns,
                                                                       x.interceptions, x.receptions, x.receiving_yards,
                                                                       x.receiving_touchdowns, x.rushing_yards,
                                                                       x.rushing_touchdowns, x.fumbles), axis=1)
    return df


In [2]:
from src.utils.points_config import STANDARD_HALF_PPR

run_id = '20240809'
pbp_agg = read_pbp_agg(run_id)
rosters = read_rosters(run_id)
fantasy_pts_df = calculate_pbp_fantasy_points(pbp_agg, STANDARD_HALF_PPR)

In [31]:
relevant_positions = ['QB', 'WR', 'RB', 'OL', 'TE']
rosters_filtered = rosters[rosters['position'].isin(relevant_positions)]

In [4]:
pbp_agg[['game_id', 'player_id', 'season', 'week', 'opponent', 'home_away', 'passing_yards', 'rushing_yards', 'receiving_yards', 'passing_touchdowns', 'receiving_touchdowns', 'rushing_touchdowns', 'receptions', 'interceptions', 'fumbles', 'fantasy_points']]

Unnamed: 0,game_id,player_id,season,week,opponent,home_away,passing_yards,rushing_yards,receiving_yards,passing_touchdowns,receiving_touchdowns,rushing_touchdowns,receptions,interceptions,fumbles,fantasy_points
0,2020_01_ARI_SF,00-0034681,2020,1,SF,away,0.0,26.0,19.0,0.0,1.0,0.0,5.0,0.0,1.0,11.00
1,2020_01_ARI_SF,00-0034775,2020,1,SF,away,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,3.00
2,2020_01_ARI_SF,00-0035752,2020,1,SF,away,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.30
3,2020_01_ARI_SF,00-0034011,2020,1,SF,away,0.0,0.0,21.0,0.0,0.0,0.0,2.0,0.0,1.0,1.10
4,2020_01_ARI_SF,00-0030564,2020,1,SF,away,0.0,0.0,151.0,0.0,0.0,0.0,16.0,0.0,0.0,23.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11326,2023_22_SF_KC,00-0036567,2023,22,KC,away,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.80
11327,2023_22_SF_KC,00-0033288,2023,22,KC,away,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,2.40
11328,2023_22_SF_KC,00-0036259,2023,22,KC,away,21.0,0.0,42.0,1.0,1.0,0.0,5.0,0.0,0.0,16.91
11329,2023_22_SF_KC,00-0029892,2023,22,KC,away,0.0,2.0,31.0,0.0,0.0,0.0,2.0,0.0,0.0,4.30


In [39]:
merged_df = pbp_agg.merge(
    rosters_filtered[['team', 'week', 'season', 'position', 'full_name', 'player_id', 'active']],
    left_on=['team', 'week', 'season'],
    right_on=['team', 'week', 'season'],
    suffixes=('', '_teammate')
).rename(columns={'player_id_teammate': 'teammate_id'})

In [40]:
merged_df.groupby(['player_id', 'game_id']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,team,home_away,opponent,player,passing_attempts,passing_yards,passing_touchdowns,rushing_attempts,rushing_yards,rushing_touchdowns,...,interceptions,season,week,position,curr_team,fantasy_points,position_teammate,full_name,teammate_id,active
player_id,game_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
00-0019596,2020_01_TB_NO,45,45,45,45,45,45,45,45,45,45,...,45,45,45,45,45,45,45,45,45,45
00-0019596,2020_03_TB_DEN,40,40,40,40,40,40,40,40,40,40,...,40,40,40,40,40,40,40,40,40,40
00-0019596,2020_05_TB_CHI,40,40,40,40,40,40,40,40,40,40,...,40,40,40,40,40,40,40,40,40,40
00-0019596,2020_07_TB_LV,42,42,42,42,42,42,42,42,42,42,...,42,42,42,42,42,42,42,42,42,42
00-0019596,2020_08_TB_NYG,41,41,41,41,41,41,41,41,41,41,...,41,41,41,41,41,41,41,41,41,41
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
00-0039165,2023_11_SEA_LA,36,36,36,36,36,36,36,36,36,36,...,36,36,36,36,36,36,36,36,36,36
00-0039165,2023_13_SEA_DAL,36,36,36,36,36,36,36,36,36,36,...,36,36,36,36,36,36,36,36,36,36
00-0039165,2023_14_SEA_SF,37,37,37,37,37,37,37,37,37,37,...,37,37,37,37,37,37,37,37,37,37
00-0039165,2023_16_SEA_TEN,36,36,36,36,36,36,36,36,36,36,...,36,36,36,36,36,36,36,36,36,36


In [44]:
import pandas as pd
import numpy as np

# Assuming you have the current_teammate_presence DataFrame with a 'position' column
# If 'position' is not in the original DataFrame, you'll need to join it from another source

# First, let's aggregate the data
aggregated_presence = merged_df.groupby(['game_id', 'player_id', 'teammate_id', 'position'])['active'].max().reset_index()

# Filter for specific positions
relevant_positions = ['QB', 'WR', 'RB', 'OL', 'TE']
position_filtered = aggregated_presence[aggregated_presence['position'].isin(relevant_positions)]

# Identify teammates who were active at least once in the season and in relevant positions
active_teammates = position_filtered[position_filtered['active'] == 1]['teammate_id'].unique()

# Filter the data to include only active teammates in relevant positions
filtered_presence = position_filtered[position_filtered['teammate_id'].isin(active_teammates)]

# Optional: Filter for teammates active in a minimum number of games
games_active = filtered_presence.groupby('teammate_id')['active'].sum()
frequently_active_teammates = games_active[games_active >= 5].index  # Adjust 5 to your preferred threshold
filtered_presence = filtered_presence[filtered_presence['teammate_id'].isin(frequently_active_teammates)]

# Now pivot the filtered data
reshaped_teammate_presence = filtered_presence.pivot(
    index=['game_id', 'player_id'],
    columns='teammate_id',
    values='active'
).reset_index()

# Rename columns to add 'teammate_' prefix
reshaped_teammate_presence.columns = ['game_id', 'player_id'] + [f'teammate_{col}' for col in reshaped_teammate_presence.columns if col not in ['game_id', 'player_id']]

# Fill NaN values with 0 (for teammates who weren't present in a game)
reshaped_teammate_presence = reshaped_teammate_presence.fillna(0)

# Convert float values to int
for col in reshaped_teammate_presence.columns:
    if col.startswith('teammate_'):
        reshaped_teammate_presence[col] = reshaped_teammate_presence[col].astype(int)

print(f"Number of teammate columns: {len(reshaped_teammate_presence.columns) - 2}")  # Subtract 2 for game_id and player_id

Number of teammate columns: 1648


In [50]:
filtered_presence

Unnamed: 0,game_id,player_id,teammate_id,position,active
0,2020_01_ARI_SF,00-0022921,00-0022921,WR,1
3,2020_01_ARI_SF,00-0022921,00-0029146,WR,1
4,2020_01_ARI_SF,00-0022921,00-0029316,WR,1
5,2020_01_ARI_SF,00-0022921,00-0030046,WR,0
6,2020_01_ARI_SF,00-0022921,00-0030564,WR,1
...,...,...,...,...,...
444752,2023_22_SF_KC,00-0037834,00-0037831,QB,0
444753,2023_22_SF_KC,00-0037834,00-0037834,QB,1
444755,2023_22_SF_KC,00-0037834,00-0038643,QB,1
444756,2023_22_SF_KC,00-0037834,00-0038647,QB,0


In [47]:
import pandas as pd
import numpy as np

# Assuming you have the current_teammate_presence DataFrame with columns:
# game_id, player_id, teammate_id, active, position

# Filter for relevant positions
# relevant_positions = ['QB', 'WR', 'RB', 'OL', 'TE']
# filtered_presence = current_teammate_presence[current_teammate_presence['position'].isin(relevant_positions)]

# Group by game and player, and aggregate active teammates
def aggregate_teammates(group):
    active_teammates = group[group['active'] == 1]['teammate_id'].tolist()
    return pd.Series({
        'active_teammates': active_teammates,
        'num_active_teammates': len(active_teammates)
    })

reshaped_teammate_presence = filtered_presence.groupby(['game_id', 'player_id']).apply(aggregate_teammates).reset_index()

print(reshaped_teammate_presence.head())
print(f"Number of columns: {len(reshaped_teammate_presence.columns)}")

          game_id   player_id  \
0  2020_01_ARI_SF  00-0022921   
1  2020_01_ARI_SF  00-0030564   
2  2020_01_ARI_SF  00-0031558   
3  2020_01_ARI_SF  00-0033118   
4  2020_01_ARI_SF  00-0034011   

                                    active_teammates  num_active_teammates  
0  [00-0022921, 00-0029146, 00-0029316, 00-003056...                    21  
1  [00-0022921, 00-0029146, 00-0029316, 00-003056...                    21  
2  [00-0022921, 00-0029146, 00-0029316, 00-003056...                    21  
3  [00-0022921, 00-0029146, 00-0029316, 00-003056...                    21  
4  [00-0022921, 00-0029146, 00-0029316, 00-003056...                    21  
Number of columns: 4


  reshaped_teammate_presence = filtered_presence.groupby(['game_id', 'player_id']).apply(aggregate_teammates).reset_index()


In [51]:
filtered_presence

Unnamed: 0,game_id,player_id,teammate_id,position,active
0,2020_01_ARI_SF,00-0022921,00-0022921,WR,1
3,2020_01_ARI_SF,00-0022921,00-0029146,WR,1
4,2020_01_ARI_SF,00-0022921,00-0029316,WR,1
5,2020_01_ARI_SF,00-0022921,00-0030046,WR,0
6,2020_01_ARI_SF,00-0022921,00-0030564,WR,1
...,...,...,...,...,...
444752,2023_22_SF_KC,00-0037834,00-0037831,QB,0
444753,2023_22_SF_KC,00-0037834,00-0037834,QB,1
444755,2023_22_SF_KC,00-0037834,00-0038643,QB,1
444756,2023_22_SF_KC,00-0037834,00-0038647,QB,0


In [52]:
aggregate_teammates

<function __main__.aggregate_teammates(group)>

In [48]:
reshaped_teammate_presence

Unnamed: 0,game_id,player_id,active_teammates,num_active_teammates
0,2020_01_ARI_SF,00-0022921,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
1,2020_01_ARI_SF,00-0030564,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
2,2020_01_ARI_SF,00-0031558,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
3,2020_01_ARI_SF,00-0033118,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
4,2020_01_ARI_SF,00-0034011,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
...,...,...,...,...
10990,2023_22_SF_KC,00-0035719,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22
10991,2023_22_SF_KC,00-0036259,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22
10992,2023_22_SF_KC,00-0036261,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22
10993,2023_22_SF_KC,00-0036567,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22


In [49]:
pbp_agg.merge(
    reshaped_teammate_presence,
    how='inner',
    on=['game_id', 'player_id']
)

Unnamed: 0,game_id,team,home_away,opponent,player,player_id,passing_attempts,passing_yards,passing_touchdowns,rushing_attempts,...,receiving_touchdowns,fumbles,interceptions,season,week,position,curr_team,fantasy_points,active_teammates,num_active_teammates
0,2020_01_ARI_SF,ARI,away,SF,C.Edmonds,00-0034681,0.0,0.0,0.0,6.0,...,1.0,1.0,0.0,2020,1,RB,TB,11.00,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
1,2020_01_ARI_SF,ARI,away,SF,C.Kirk,00-0034775,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2020,1,WR,JAX,3.00,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
2,2020_01_ARI_SF,ARI,away,SF,C.Streveler,00-0035752,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,2020,1,QB,NYJ,0.30,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
3,2020_01_ARI_SF,ARI,away,SF,D.Arnold,00-0034011,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,2020,1,TE,PHI,1.10,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
4,2020_01_ARI_SF,ARI,away,SF,D.Hopkins,00-0030564,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2020,1,WR,TEN,23.10,"[00-0022921, 00-0029146, 00-0029316, 00-003056...",21
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11021,2023_22_SF_KC,SF,away,KC,D.Samuel,00-0035719,0.0,0.0,0.0,3.0,...,0.0,0.0,0.0,2023,22,WR,SF,9.60,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22
11022,2023_22_SF_KC,SF,away,KC,E.Mitchell,00-0036567,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,2023,22,RB,SF,0.80,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22
11023,2023_22_SF_KC,SF,away,KC,G.Kittle,00-0033288,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2023,22,TE,SF,2.40,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22
11024,2023_22_SF_KC,SF,away,KC,J.Jennings,00-0036259,1.0,21.0,1.0,0.0,...,1.0,0.0,0.0,2023,22,WR,SF,16.91,"[00-0027857, 00-0029892, 00-0032128, 00-003215...",22
