### **Notebook objective**: create a rating per team and then have a running mean for each team for season 22/23 <br> - 6 rates per team : passing, tackles, rushing, sacks, receiving, interceptions <br> - Running mean : the running mean for the last 5 games will define the rating features for the prediction model

In [50]:
import pandas as pd
import numpy as np
import requests
import os
import json

pd.options.mode.chained_assignment = None  # default='warn'

df = pd.read_json("espn_rosters (1).json")

#### DATA CLEANING

In [51]:
#Replace " " in columns' name by "_"
df.columns = df.columns.str.replace(' ','_')
df.head()

Unnamed: 0,game_id,team,player_name,player_url,pass_completion,pass_yds,pass_avg,pass_td,pass_int,sacks,...,kicking_pct,kicking_long,kicking_xp,kicking_pts,punting_no,punting_yds,punting_avg,punting_tb,punting_in_20,punting_long
0,401220131,Dolphins,Zach Sieler,https://www.espn.com/nfl/player/_/id/3057956/z...,,,,,,,...,,,,,,,,,,
1,401220131,Dolphins,Patrick Laird,https://www.espn.com/nfl/player/_/id/3127211/p...,,,,,,,...,,,,,,,,,,
2,401220313,Seahawks,Chris Carson,https://www.espn.com/nfl/player/_/id/3919596/c...,,,,,,,...,,,,,,,,,,
3,401220131,Dolphins,Jerome Baker,https://www.espn.com/nfl/player/_/id/3915507/j...,,,,,,,...,,,,,,,,,,
4,401220225,Texans,Randall Cobb,https://www.espn.com/nfl/player/_/id/14053/ran...,,,,,,,...,,,,,,,,,,


##### Missing values

In [52]:
#Verification of the columns with no missing value
clean_columns = []
for column in df.columns:
    if 100*df[column].isnull().sum()/df.shape[0] == 0.000000 :
        clean_columns.append(column)
display(clean_columns)

#Replace all the missing value with 0 because the NaN means the metrics is not relevent for the concerned player
df = df.fillna(0)

#Check of the percentage of missing values after cleaning
display(100*df.isnull().sum()/df.shape[0])

['game_id', 'team', 'player_name', 'player_url']

game_id              0.0
team                 0.0
player_name          0.0
player_url           0.0
pass_completion      0.0
pass_yds             0.0
pass_avg             0.0
pass_td              0.0
pass_int             0.0
sacks                0.0
qbr                  0.0
rtg                  0.0
rush_car             0.0
rush_yds             0.0
rush_avg             0.0
rush_td              0.0
rush_long            0.0
receptions           0.0
rec_yds              0.0
rec_avg              0.0
rec_td               0.0
rec_long             0.0
rec_tgs              0.0
fumbles              0.0
fumbles_lost         0.0
fumbles_rec          0.0
defense_tot          0.0
defense_solo         0.0
defense_sacks        0.0
defense_tfl          0.0
defense_pd           0.0
defense_qb_hits      0.0
defense_td           0.0
interceptions        0.0
interceptions_yds    0.0
interceptions_td     0.0
kicks_return_no      0.0
kicks_return_yds     0.0
kicks_return_avg     0.0
kicks_return_long    0.0


##### Columns' types

In [53]:
#Pass completion: transform "x/y" (str) into a ratio (float)
df['pc_1'] = df['pass_completion'].apply(lambda x : x.split('/')[0] if x!=0 else x)
df['pc_2'] = df['pass_completion'].apply(lambda x : x.split('/')[1] if x!=0 else x)

#Convert str to float in pc_1 and pc_2 before looping
df['pc_1'] = df['pc_1'].astype(float)
df['pc_2'] = df['pc_2'].astype(float)

for i in range(len(df)):
    if df['pc_2'].iloc[i] != 0 :
        df['pass_completion'].iloc[i] = df['pc_1'].iloc[i] / df['pc_2'].iloc[i]
    else: df['pass_completion'].iloc[i] = 0

#Sacks : transform "x-y" (str) into a ratio (float)
df['sacks_1'] = df['sacks'].apply(lambda x : x.split('-')[0] if x!=0 else x)
df['sacks_2'] = df['sacks'].apply(lambda x : x.split('-')[1] if x!=0 else x)

#Convert str to float in sacks_1 and sacks_2 before looping
df['sacks_1'] = df['sacks_1'].astype(float)
df['sacks_2'] = df['sacks_2'].astype(float)

for i in range(len(df)):
    if df['sacks_2'].iloc[i] != 0:
        df['sacks'].iloc[i] = df['sacks_1'].iloc[i] / df['sacks_2'].iloc[i]
    else: df['sacks'].iloc[i] = 0

In [57]:
df['pass_completion']=df.pass_completion.apply(lambda x : float(x))
df['sacks']=df.sacks.apply(lambda x : float(x))

In [59]:
df['qbr']=df.sacks.apply(lambda x : float(x))

In [60]:
#Check of the global datatypes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 79606 entries, 0 to 79605
Data columns (total 61 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   game_id            79606 non-null  int64  
 1   team               79606 non-null  object 
 2   player_name        79606 non-null  object 
 3   player_url         79606 non-null  object 
 4   pass_completion    79606 non-null  float64
 5   pass_yds           79606 non-null  float64
 6   pass_avg           79606 non-null  float64
 7   pass_td            79606 non-null  float64
 8   pass_int           79606 non-null  float64
 9   sacks              79606 non-null  float64
 10  qbr                79606 non-null  float64
 11  rtg                79606 non-null  float64
 12  rush_car           79606 non-null  float64
 13  rush_yds           79606 non-null  float64
 14  rush_avg           79606 non-null  float64
 15  rush_td            79606 non-null  float64
 16  rush_long          796

In [61]:
#Drop useless preprocessing columns (pc_1, pc_2, sacks_1, sacks_2)
df = df.drop(columns=['pc_1', 'pc_2', 'sacks_1', 'sacks_2'], axis=1)

##### Total metrics by player

In [62]:
#Create a new column with the sum of all available metrics per player
metrics = ['pass_completion',
       'pass_yds', 'pass_avg', 'pass_td', 'pass_int', 'sacks', 'qbr', 'rtg',
       'rush_car', 'rush_yds', 'rush_avg', 'rush_td', 'rush_long',
       'receptions', 'rec_yds', 'rec_avg', 'rec_td', 'rec_long', 'rec_tgs',
       'fumbles', 'fumbles_lost', 'fumbles_rec', 'defense_tot', 'defense_solo',
       'defense_sacks', 'defense_tfl', 'defense_pd', 'defense_qb_hits',
       'defense_td', 'interceptions', 'interceptions_yds', 'interceptions_td',
       'kicks_return_no', 'kicks_return_yds', 'kicks_return_avg',
       'kicks_return_long', 'kicks_return_td', 'punt_return_no',
       'punt_return_yds', 'punt_return_avg', 'punt_return_long',
       'punt_return_td', 'kicking_fg', 'kicking_pct', 'kicking_long',
       'kicking_xp', 'kicking_pts', 'punting_no', 'punting_yds', 'punting_avg',
       'punting_tb', 'punting_in_20', 'punting_long']

df['rating_player'] = df[metrics].sum(axis=1)
df.head()

  df['rating_player'] = df[metrics].sum(axis=1)


Unnamed: 0,game_id,team,player_name,player_url,pass_completion,pass_yds,pass_avg,pass_td,pass_int,sacks,...,kicking_long,kicking_xp,kicking_pts,punting_no,punting_yds,punting_avg,punting_tb,punting_in_20,punting_long,rating_player
0,401220131,Dolphins,Zach Sieler,https://www.espn.com/nfl/player/_/id/3057956/z...,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,401220131,Dolphins,Patrick Laird,https://www.espn.com/nfl/player/_/id/3127211/p...,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23.5
2,401220313,Seahawks,Chris Carson,https://www.espn.com/nfl/player/_/id/3919596/c...,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,127.0
3,401220131,Dolphins,Jerome Baker,https://www.espn.com/nfl/player/_/id/3915507/j...,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.0
4,401220225,Texans,Randall Cobb,https://www.espn.com/nfl/player/_/id/14053/ran...,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,56.5


##### RATING FUNCTION

#### First thing first: a rating taking into account all the metrics

In [67]:
#Define a function to calculate a rating per team
def calculate_rating_team(idgame, team):
    df_game=df[df['game_id'] == idgame]
    df_team=df_game[df_game['team'] == team]
    rating_team = df_team['rating_player'].sum()
    return team, rating_team

###### Test of the function with one game id

In [71]:
test1 = df[df['game_id'] == 401220131]
test1['team'].unique()

array(['Dolphins', 'Patriots'], dtype=object)

In [68]:
calculate_rating_team(401220131, 'Dolphins')

('Dolphins', 1364.9888888888888)

In [72]:
calculate_rating_team(401220131, 'Patriots')

('Patriots', 1346.5561403508773)

###### Looks like it is working, youpi ! Let's industrialize now !

#### A rating per metric

In [77]:
len(metrics) # The number of rating we will have at the end

53