In [3]:
import pandas as pd
import io

# Assuming the data is in a file named 'player_stats.csv'
df = pd.read_csv('data//player_stats.csv')

# Convert 'season' to numeric, coercing any non-numeric values to NaN
df['season'] = pd.to_numeric(df['season'], errors='coerce')

# Filter for regular season games
df_reg = df[df['season_type'] == 'REG']

# 1. Last season (2023) values
df_2023 = df_reg[df_reg['season'] == 2023].groupby('player_name').agg({
    'week': 'count',
    'completions': 'sum',
    'attempts': 'sum',
    'passing_yards': 'sum',
    'passing_tds': 'sum',
    'interceptions': 'sum',
    'carries': 'sum',
    'rushing_yards': 'sum',
    'rushing_tds': 'sum',
    'receptions': 'sum',
    'targets': 'sum',
    'receiving_yards': 'sum',
    'receiving_tds': 'sum',
    'fantasy_points': 'sum',
    'fantasy_points_ppr': 'sum'
}).reset_index().rename(columns={'week': 'games_played'})

# 2. Average of first 3 career years
df_sorted = df_reg.sort_values(['player_name', 'season'])
df_first_3 = df_sorted.groupby('player_name').head(48)  # Assuming 16 games per season
df_first_3_avg = df_first_3.groupby(['player_name', 'season']).agg({
    'week': 'count',
    'completions': 'sum',
    'attempts': 'sum',
    'passing_yards': 'sum',
    'passing_tds': 'sum',
    'interceptions': 'sum',
    'carries': 'sum',
    'rushing_yards': 'sum',
    'rushing_tds': 'sum',
    'receptions': 'sum',
    'targets': 'sum',
    'receiving_yards': 'sum',
    'receiving_tds': 'sum',
    'fantasy_points': 'sum',
    'fantasy_points_ppr': 'sum'
}).groupby('player_name').mean().reset_index().rename(columns={'week': 'games_played'})

# 3. Average of last 3 available years
df_last_3 = df_sorted.groupby('player_name').tail(48)  # Assuming 16 games per season
df_last_3_avg = df_last_3.groupby(['player_name', 'season']).agg({
    'week': 'count',
    'completions': 'sum',
    'attempts': 'sum',
    'passing_yards': 'sum',
    'passing_tds': 'sum',
    'interceptions': 'sum',
    'carries': 'sum',
    'rushing_yards': 'sum',
    'rushing_tds': 'sum',
    'receptions': 'sum',
    'targets': 'sum',
    'receiving_yards': 'sum',
    'receiving_tds': 'sum',
    'fantasy_points': 'sum',
    'fantasy_points_ppr': 'sum'
}).groupby('player_name').mean().reset_index().rename(columns={'week': 'games_played'})

# Save the results to CSV files
df_2023.to_csv('results//2023_season_stats.csv', index=False)
df_first_3_avg.to_csv('results//first_3_years_avg_stats.csv', index=False)
df_last_3_avg.to_csv('results//last_3_years_avg_stats.csv', index=False)

print("Data processing completed. Results saved to CSV files.")

# Display the first few rows of each DataFrame
print("\n2023 Season Stats:")
print(df_2023.head())

print("\nFirst 3 Years Average Stats:")
print(df_first_3_avg.head())

print("\nLast 3 Years Average Stats:")
print(df_last_3_avg.head())

Data processing completed. Results saved to CSV files.

2023 Season Stats:
  player_name  week  completions  attempts  passing_yards  passing_tds  \
0  A.Abdullah    16            0         0              0            0   
1     A.Armah     3            0         0              0            0   
2      A.Beck    12            0         0              0            0   
3     A.Brown    17            0         0              0            0   
4    A.Cooper    15            0         0              0            0   

   interceptions  carries  rushing_yards  rushing_tds  receptions  targets  \
0              0       15             89            0          19       24   
1              0        0              0            0           3        3   
2              0        5              3            1          11       13   
3              0        0              0            0         106      158   
4              0        0              0            0          72      128   

   receivin

In [6]:
df_last_3_avg.head()

Unnamed: 0,player_name,week,completions,attempts,passing_yards,passing_tds,interceptions,carries,rushing_yards,rushing_tds,receptions,targets,receiving_yards,receiving_tds,fantasy_points,fantasy_points_ppr
0,A.Abdullah,12.0,0.0,0.0,0.0,0.0,0.0,19.0,77.5,0.0,22.0,29.0,166.5,0.75,28.9,50.9
1,A.Armah,6.8,0.0,0.0,0.0,0.0,0.0,5.2,11.2,0.6,2.4,3.2,8.6,0.2,6.78,9.18
2,A.Auclair,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.8,26.2,0.2,3.82,6.82
3,A.Baccellia,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,10.0,45.0,0.0,4.5,11.5
4,A.Bachman,1.0,0.0,0.0,0.0,0.0,0.0,1.0,-3.0,0.0,0.0,0.0,0.0,0.0,-0.3,-0.3


In [32]:
import pandas as pd
import io
import numpy as np

# Assuming the data is in a file named 'player_stats.csv'
df = pd.read_csv('data//player_stats.csv')


# Filter for regular season games
df = df[df['season_type'] == 'REG']
df['games_played'] = 1

# List of columns to sum across seasons for each player and position
columns_to_sum = ['games_played',
    'completions', 'attempts', 'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
    'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch',
    'passing_first_downs', 'passing_epa', 'passing_2pt_conversions', 'pacr', 'dakota', 'carries',
    'rushing_yards', 'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost', 'rushing_first_downs',
    'rushing_epa', 'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards', 'receiving_tds',
    'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch',
    'receiving_first_downs', 'receiving_epa', 'receiving_2pt_conversions', 'racr', 'target_share',
    'air_yards_share', 'wopr', 'special_teams_tds', 'fantasy_points', 'fantasy_points_ppr'
]

# Group by player_display_name and position, then sum the relevant columns
season_avg = df.groupby(['player_display_name', 'position','season'])[columns_to_sum].sum().reset_index()


In [37]:
data=season_avg.copy()

In [40]:
numerical_columns

Index(['season', 'completions', 'attempts', 'passing_yards', 'passing_tds',
       'interceptions', 'sacks', 'sack_yards', 'sack_fumbles',
       'sack_fumbles_lost', 'passing_air_yards', 'passing_yards_after_catch',
       'passing_first_downs', 'passing_epa', 'passing_2pt_conversions', 'pacr',
       'dakota', 'carries', 'rushing_yards', 'rushing_tds', 'rushing_fumbles',
       'rushing_fumbles_lost', 'rushing_first_downs', 'rushing_epa',
       'rushing_2pt_conversions', 'receptions', 'targets', 'receiving_yards',
       'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost',
       'receiving_air_yards', 'receiving_yards_after_catch',
       'receiving_first_downs', 'receiving_epa', 'receiving_2pt_conversions',
       'racr', 'target_share', 'air_yards_share', 'wopr', 'special_teams_tds',
       'fantasy_points', 'fantasy_points_ppr'],
      dtype='object')

In [51]:
# Select numerical columns only
numerical_columns = data.select_dtypes(include='number').columns

# Drop 'games_played' and 'season' from the numerical columns, if they exist
numerical_columns = numerical_columns.drop(['games_played', 'season'], errors='ignore')

# Define the original weights for the last three seasons
original_weights = np.array([0.2, 0.3, 0.5])

# Initialize the result DataFrame with player_display_name, position, and season
cumulative_df = data[['player_display_name', 'position', 'season']].copy()

# Calculate the rolling weighted average for each numerical column
for column in numerical_columns:
    def custom_weighted_average(x):
        available_weights = original_weights[-len(x):]  # Adjust weights based on available history
        available_weights = available_weights / available_weights.sum()  # Normalize weights to sum to 1
        return np.dot(x, available_weights)
    
    cumulative_df[f'avg_{column}'] = data.groupby(['player_display_name', 'position'])[column].apply(
        lambda x: x.rolling(window=3, min_periods=1).apply(custom_weighted_average, raw=True)
    ).reset_index(level=[0,1], drop=True)

# Calculate cumulative sum for 'games_played'
if 'games_played' in data.columns:
    cumulative_df['games_played_so_far'] = data.groupby(['player_display_name', 'position'])['games_played'].cumsum()







In [55]:
cumulative_df[cumulative_df['player_display_name']=='Sam LaPorta']

Unnamed: 0,player_display_name,position,season,cumulative_weighted_avg_completions,cumulative_weighted_avg_attempts,cumulative_weighted_avg_passing_yards,cumulative_weighted_avg_passing_tds,cumulative_weighted_avg_interceptions,cumulative_weighted_avg_sacks,cumulative_weighted_avg_sack_yards,...,cumulative_weighted_avg_receiving_epa,cumulative_weighted_avg_receiving_2pt_conversions,cumulative_weighted_avg_racr,cumulative_weighted_avg_target_share,cumulative_weighted_avg_air_yards_share,cumulative_weighted_avg_wopr,cumulative_weighted_avg_special_teams_tds,cumulative_weighted_avg_fantasy_points,cumulative_weighted_avg_fantasy_points_ppr,cumulative_sum_games_played
12317,Sam LaPorta,TE,2023,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,38.326895,2.0,27.272639,3.630836,3.624208,7.9832,0.0,153.3,239.3,17
