In [15]:
import pandas as pd
import numpy as np
import requests
import os
import json

pd.options.mode.chained_assignment = None  # default='warn'

In [16]:
#Import and convert json to pd.df
df = pd.read_json("espn_rosters.json")

#Have a first view of the dataset
df.head()

Unnamed: 0,game_id,team,player_name,player_url,pass completion,pass yds,pass avg,pass td,pass int,sacks,...,kicking pct,kicking long,kicking xp,kicking pts,punting no,punting yds,punting avg,punting tb,punting in 20,punting long
0,401030690,Ravens,Kenny Young,https://www.espn.com/nfl/player/_/id/3134310/k...,,,,,,,...,,,,,,,,,,
1,401030690,Ravens,Willie Snead IV,https://www.espn.com/nfl/player/_/id/17258/wil...,,,,,,,...,,,,,,,,,,
2,400951566,Chiefs,Dee Ford,https://www.espn.com/nfl/player/_/id/16707/dee...,,,,,,,...,,,,,,,,,,
3,401030690,Ravens,Patrick Onwuasor,https://www.espn.com/nfl/player/_/id/2576761/p...,,,,,,,...,,,,,,,,,,
4,400951566,Chiefs,Eric Berry,https://www.espn.com/nfl/player/_/id/13252/eri...,,,,,,,...,,,,,,,,,,


In [17]:
#Dataset shape
df.shape

(79625, 57)

In [18]:
#Print the list of the columns name
df.columns

Index(['game_id', 'team', 'player_name', 'player_url', 'pass completion',
       'pass yds', 'pass avg', 'pass td', 'pass int', 'sacks', 'qbr', 'rtg',
       'rush car', 'rush yds', 'rush avg', 'rush td', 'rush long',
       'receptions', 'rec yds', 'rec avg', 'rec td', 'rec long', 'rec tgs',
       'fumbles', 'fumbles lost', 'fumbles rec', 'defense tot', 'defense solo',
       'defense sacks', 'defense tfl', 'defense pd', 'defense qb hits',
       'defense td', 'interceptions', 'interceptions yds', 'interceptions td',
       'kicks return no', 'kicks return yds', 'kicks return avg',
       'kicks return long', 'kicks return td', 'punt return no',
       'punt return yds', 'punt return avg', 'punt return long',
       'punt return td', 'kicking fg', 'kicking pct', 'kicking long',
       'kicking xp', 'kicking pts', 'punting no', 'punting yds', 'punting avg',
       'punting tb', 'punting in 20', 'punting long'],
      dtype='object')

In [19]:
#Basics statistics
data_desc = df.describe(include='all')
display(data_desc)
print()

#Percentage of missing values
display(100*df.isnull().sum()/df.shape[0])

Unnamed: 0,game_id,team,player_name,player_url,pass completion,pass yds,pass avg,pass td,pass int,sacks,...,kicking pct,kicking long,kicking xp,kicking pts,punting no,punting yds,punting avg,punting tb,punting in 20,punting long
count,79625.0,79625,79625,79625,1572,1572.0,1572.0,1572.0,1572.0,1572,...,1280.0,1280.0,1280,1280.0,1284.0,1284.0,1284.0,1284.0,1284.0,1284.0
unique,,33,3411,4619,495,,,,,199,...,,,23,,,,,,,
top,,Ravens,Chris Jones,https://www.espn.com/nfl/player/_/id/2574056/k...,1/1,,,,,0-0,...,,,2/2,,,,,,,
freq,,2596,148,81,75,,,,,373,...,,,295,,,,,,,
mean,401134600.0,,,,,203.461832,7.25827,1.267812,0.677481,,...,73.722031,33.5875,,6.989844,4.246106,193.543614,45.447586,0.264798,1.544393,53.780374
std,133924.1,,,,,110.372584,4.371449,1.180753,0.889676,,...,38.494789,17.935254,,3.702889,1.865348,88.828307,5.370626,0.516274,1.213868,7.304643
min,400950200.0,,,,,0.0,0.0,0.0,0.0,,...,0.0,0.0,,0.0,1.0,16.0,16.0,0.0,0.0,16.0
25%,401030800.0,,,,,133.75,5.5,0.0,0.0,,...,50.0,27.0,,4.0,3.0,127.0,42.0,0.0,1.0,49.0
50%,401128000.0,,,,,216.5,6.9,1.0,0.0,,...,100.0,39.0,,7.0,4.0,185.0,45.6,0.0,1.0,54.0
75%,401220300.0,,,,,282.0,8.5,2.0,1.0,,...,100.0,47.0,,9.0,5.25,251.25,49.0,0.0,2.0,58.0





game_id               0.000000
team                  0.000000
player_name           0.000000
player_url            0.000000
pass completion      98.025746
pass yds             98.025746
pass avg             98.025746
pass td              98.025746
pass int             98.025746
sacks                98.025746
qbr                  98.025746
rtg                  98.025746
rush car             93.342543
rush yds             93.342543
rush avg             93.342543
rush td              93.342543
rush long            93.342543
receptions           86.840816
rec yds              86.840816
rec avg              86.840816
rec td               86.840816
rec long             86.840816
rec tgs              86.840816
fumbles              97.319937
fumbles lost         97.319937
fumbles rec          97.319937
defense tot          68.094192
defense solo         68.094192
defense sacks        68.094192
defense tfl          68.094192
defense pd           68.094192
defense qb hits      68.094192
defense 

### DATA CLEANING

#### Missing values

In [20]:
#Verification of the columns with no missing value
clean_columns = []
for column in df.columns:
    if 100*df[column].isnull().sum()/df.shape[0] == 0.000000 :
        clean_columns.append(column)
clean_columns

['game_id', 'team', 'player_name', 'player_url']

In [21]:
#Replace all the missing value with 0 because the NaN means the metrics is not relevent for the concerned player
df = df.fillna(0)
df.head()


Unnamed: 0,game_id,team,player_name,player_url,pass completion,pass yds,pass avg,pass td,pass int,sacks,...,kicking pct,kicking long,kicking xp,kicking pts,punting no,punting yds,punting avg,punting tb,punting in 20,punting long
0,401030690,Ravens,Kenny Young,https://www.espn.com/nfl/player/_/id/3134310/k...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,401030690,Ravens,Willie Snead IV,https://www.espn.com/nfl/player/_/id/17258/wil...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,400951566,Chiefs,Dee Ford,https://www.espn.com/nfl/player/_/id/16707/dee...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,401030690,Ravens,Patrick Onwuasor,https://www.espn.com/nfl/player/_/id/2576761/p...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,400951566,Chiefs,Eric Berry,https://www.espn.com/nfl/player/_/id/13252/eri...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [22]:
#Check of the percentage of missing values after cleaning
display(100*df.isnull().sum()/df.shape[0])

game_id              0.0
team                 0.0
player_name          0.0
player_url           0.0
pass completion      0.0
pass yds             0.0
pass avg             0.0
pass td              0.0
pass int             0.0
sacks                0.0
qbr                  0.0
rtg                  0.0
rush car             0.0
rush yds             0.0
rush avg             0.0
rush td              0.0
rush long            0.0
receptions           0.0
rec yds              0.0
rec avg              0.0
rec td               0.0
rec long             0.0
rec tgs              0.0
fumbles              0.0
fumbles lost         0.0
fumbles rec          0.0
defense tot          0.0
defense solo         0.0
defense sacks        0.0
defense tfl          0.0
defense pd           0.0
defense qb hits      0.0
defense td           0.0
interceptions        0.0
interceptions yds    0.0
interceptions td     0.0
kicks return no      0.0
kicks return yds     0.0
kicks return avg     0.0
kicks return long    0.0


#### Columns' name

In [23]:
#Replace " " in columns' name by "_"
df.columns = df.columns.str.replace(' ','_')
df.head()

Unnamed: 0,game_id,team,player_name,player_url,pass_completion,pass_yds,pass_avg,pass_td,pass_int,sacks,...,kicking_pct,kicking_long,kicking_xp,kicking_pts,punting_no,punting_yds,punting_avg,punting_tb,punting_in_20,punting_long
0,401030690,Ravens,Kenny Young,https://www.espn.com/nfl/player/_/id/3134310/k...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,401030690,Ravens,Willie Snead IV,https://www.espn.com/nfl/player/_/id/17258/wil...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,400951566,Chiefs,Dee Ford,https://www.espn.com/nfl/player/_/id/16707/dee...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,401030690,Ravens,Patrick Onwuasor,https://www.espn.com/nfl/player/_/id/2576761/p...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,400951566,Chiefs,Eric Berry,https://www.espn.com/nfl/player/_/id/13252/eri...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


#### Columns with metrics type (str to int)

In [24]:
#Create a list with all the columns with metrics
metric_columns = [c for c in df.columns if c!=clean_columns]
metric_columns = metric_columns[4:]
metric_columns

['pass_completion',
 'pass_yds',
 'pass_avg',
 'pass_td',
 'pass_int',
 'sacks',
 'qbr',
 'rtg',
 'rush_car',
 'rush_yds',
 'rush_avg',
 'rush_td',
 'rush_long',
 'receptions',
 'rec_yds',
 'rec_avg',
 'rec_td',
 'rec_long',
 'rec_tgs',
 'fumbles',
 'fumbles_lost',
 'fumbles_rec',
 'defense_tot',
 'defense_solo',
 'defense_sacks',
 'defense_tfl',
 'defense_pd',
 'defense_qb_hits',
 'defense_td',
 'interceptions',
 'interceptions_yds',
 'interceptions_td',
 'kicks_return_no',
 'kicks_return_yds',
 'kicks_return_avg',
 'kicks_return_long',
 'kicks_return_td',
 'punt_return_no',
 'punt_return_yds',
 'punt_return_avg',
 'punt_return_long',
 'punt_return_td',
 'kicking_fg',
 'kicking_pct',
 'kicking_long',
 'kicking_xp',
 'kicking_pts',
 'punting_no',
 'punting_yds',
 'punting_avg',
 'punting_tb',
 'punting_in_20',
 'punting_long']

In [25]:
len(metric_columns)

53

In [26]:
#Identify metrics that are not float or integer
cat_metrics = [] #Metrics that are categorical
for col in metric_columns:
    if type(col) != 
df.dtypes

SyntaxError: invalid syntax (3670693841.py, line 4)

#### Replace team Redskins by Washington (same team)

In [27]:
df["team"].replace("Redskins", "Washington", inplace=True)
df["team"].unique()

array(['Ravens', 'Chiefs', 'Vikings', 'Texans', 'Panthers', 'Packers',
       'Chargers', 'Dolphins', 'Falcons', 'Browns', 'Jets', 'Cardinals',
       'Eagles', 'Patriots', 'Titans', 'Buccaneers', 'Colts', 'Steelers',
       'Bengals', 'Bears', 'Saints', 'Bills', 'Washington', 'Rams',
       'Raiders', '49ers', 'Lions', 'Seahawks', 'Cowboys', 'Jaguars',
       'Broncos', 'Giants'], dtype=object)

### FIRST EXPLORATION - UNDERSTAND THE DATAS

In [28]:
nb_game = df.game_id.unique().shape[0]
nb_team = df.team.unique().shape[0]
list_team = [df.team.unique()]
nb_players = df.player_name.unique().shape[0]

print('Total games:', nb_game)
print('Total teams:', nb_team)
print('Teams list:', list_team)
print('Total players:', nb_players)

Total games: 1294
Total teams: 32
Teams list: [array(['Ravens', 'Chiefs', 'Vikings', 'Texans', 'Panthers', 'Packers',
       'Chargers', 'Dolphins', 'Falcons', 'Browns', 'Jets', 'Cardinals',
       'Eagles', 'Patriots', 'Titans', 'Buccaneers', 'Colts', 'Steelers',
       'Bengals', 'Bears', 'Saints', 'Bills', 'Washington', 'Rams',
       'Raiders', '49ers', 'Lions', 'Seahawks', 'Cowboys', 'Jaguars',
       'Broncos', 'Giants'], dtype=object)]
Total players: 3411


In [29]:
df.groupby(df.team).sum()

Unnamed: 0_level_0,game_id,pass_yds,pass_avg,pass_td,pass_int,rtg,rush_car,rush_yds,rush_avg,rush_td,...,punt_return_td,kicking_pct,kicking_long,kicking_pts,punting_no,punting_yds,punting_avg,punting_tb,punting_in_20,punting_long
team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
49ers,1015668135744,10835.0,449.2,59.0,42.0,4708.5,1183.0,5173.0,792.8,42.0,...,0.0,3397.4,1606.0,342.0,158.0,6996.0,1759.6,5.0,54.0,2016.0
Bears,991209614341,9342.0,321.3,48.0,41.0,4109.6,1079.0,4369.0,734.6,32.0,...,2.0,3033.7,1299.0,273.0,173.0,8149.0,1910.4,12.0,57.0,2271.0
Bengals,998817007043,9430.0,373.9,65.0,29.0,4264.2,908.0,3573.0,608.0,23.0,...,0.0,2941.7,1359.0,246.0,197.0,9099.0,1830.0,9.0,67.0,2133.0
Bills,1001630580988,9994.0,383.0,66.0,35.0,4518.6,1069.0,4608.0,637.7,33.0,...,0.0,3158.4,1397.0,290.0,166.0,7586.0,1779.5,14.0,65.0,2110.0
Broncos,984382664896,9150.0,372.5,46.0,45.0,3875.9,1058.0,4667.0,699.7,29.0,...,1.0,2900.4,1388.0,255.0,179.0,7925.0,1799.1,9.0,63.0,2133.0
Browns,1003239509306,9888.0,397.4,63.0,48.0,4209.9,1003.0,4963.0,687.2,38.0,...,0.0,2708.7,1207.0,230.0,172.0,7857.0,1873.6,10.0,53.0,2215.0
Buccaneers,989999385827,13008.0,389.6,92.0,38.0,4810.3,1011.0,3772.0,611.8,37.0,...,0.0,3101.8,1362.0,324.0,143.0,6113.0,1716.9,7.0,50.0,2034.0
Cardinals,989195203601,9383.0,390.7,51.0,28.0,4383.6,1102.0,4609.0,635.8,43.0,...,0.0,2604.7,1249.0,291.0,185.0,8630.0,1864.6,16.0,59.0,2220.0
Chargers,994418363617,10760.0,337.4,78.0,33.0,4208.3,1024.0,4195.0,632.0,34.0,...,1.0,2208.3,990.0,214.0,146.0,6848.0,1629.4,15.0,32.0,1917.0
Chiefs,1004448781496,12735.0,378.5,106.0,23.0,5083.6,1015.0,4797.0,973.8,34.0,...,3.0,3113.7,1358.0,345.0,123.0,5438.0,1718.1,8.0,48.0,2033.0


#### Identification of the most differentiating metrics (highest STD)

In [30]:
data_desc

Unnamed: 0,game_id,team,player_name,player_url,pass completion,pass yds,pass avg,pass td,pass int,sacks,...,kicking pct,kicking long,kicking xp,kicking pts,punting no,punting yds,punting avg,punting tb,punting in 20,punting long
count,79625.0,79625,79625,79625,1572,1572.0,1572.0,1572.0,1572.0,1572,...,1280.0,1280.0,1280,1280.0,1284.0,1284.0,1284.0,1284.0,1284.0,1284.0
unique,,33,3411,4619,495,,,,,199,...,,,23,,,,,,,
top,,Ravens,Chris Jones,https://www.espn.com/nfl/player/_/id/2574056/k...,1/1,,,,,0-0,...,,,2/2,,,,,,,
freq,,2596,148,81,75,,,,,373,...,,,295,,,,,,,
mean,401134600.0,,,,,203.461832,7.25827,1.267812,0.677481,,...,73.722031,33.5875,,6.989844,4.246106,193.543614,45.447586,0.264798,1.544393,53.780374
std,133924.1,,,,,110.372584,4.371449,1.180753,0.889676,,...,38.494789,17.935254,,3.702889,1.865348,88.828307,5.370626,0.516274,1.213868,7.304643
min,400950200.0,,,,,0.0,0.0,0.0,0.0,,...,0.0,0.0,,0.0,1.0,16.0,16.0,0.0,0.0,16.0
25%,401030800.0,,,,,133.75,5.5,0.0,0.0,,...,50.0,27.0,,4.0,3.0,127.0,42.0,0.0,1.0,49.0
50%,401128000.0,,,,,216.5,6.9,1.0,0.0,,...,100.0,39.0,,7.0,4.0,185.0,45.6,0.0,1.0,54.0
75%,401220300.0,,,,,282.0,8.5,2.0,1.0,,...,100.0,47.0,,9.0,5.25,251.25,49.0,0.0,2.0,58.0


In [31]:
std_analysis = data_desc.drop(clean_columns, axis=1)
std_analysis

Unnamed: 0,pass completion,pass yds,pass avg,pass td,pass int,sacks,qbr,rtg,rush car,rush yds,...,kicking pct,kicking long,kicking xp,kicking pts,punting no,punting yds,punting avg,punting tb,punting in 20,punting long
count,1572,1572.0,1572.0,1572.0,1572.0,1572,1572,1572.0,5301.0,5301.0,...,1280.0,1280.0,1280,1280.0,1284.0,1284.0,1284.0,1284.0,1284.0,1284.0
unique,495,,,,,199,768,,,,...,,,23,,,,,,,
top,1/1,,,,,0-0,--,,,,...,,,2/2,,,,,,,
freq,75,,,,,373,30,,,,...,,,295,,,,,,,
mean,,203.461832,7.25827,1.267812,0.677481,,,87.468511,6.403697,27.314469,...,73.722031,33.5875,,6.989844,4.246106,193.543614,45.447586,0.264798,1.544393,53.780374
std,,110.372584,4.371449,1.180753,0.889676,,,31.342696,6.240587,31.937518,...,38.494789,17.935254,,3.702889,1.865348,88.828307,5.370626,0.516274,1.213868,7.304643
min,,0.0,0.0,0.0,0.0,,,0.0,0.0,-12.0,...,0.0,0.0,,0.0,1.0,16.0,16.0,0.0,0.0,16.0
25%,,133.75,5.5,0.0,0.0,,,68.1,1.0,5.0,...,50.0,27.0,,4.0,3.0,127.0,42.0,0.0,1.0,49.0
50%,,216.5,6.9,1.0,0.0,,,88.1,4.0,16.0,...,100.0,39.0,,7.0,4.0,185.0,45.6,0.0,1.0,54.0
75%,,282.0,8.5,2.0,1.0,,,110.3,10.0,40.0,...,100.0,47.0,,9.0,5.25,251.25,49.0,0.0,2.0,58.0


#### First team rating attempt & issue detection in the dataset

In [32]:
game_test = df[df['game_id'] == 401030690]
game_test

Unnamed: 0,game_id,team,player_name,player_url,pass_completion,pass_yds,pass_avg,pass_td,pass_int,sacks,...,kicking_pct,kicking_long,kicking_xp,kicking_pts,punting_no,punting_yds,punting_avg,punting_tb,punting_in_20,punting_long
0,401030690,Ravens,Kenny Young,https://www.espn.com/nfl/player/_/id/3134310/k...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,401030690,Ravens,Willie Snead IV,https://www.espn.com/nfl/player/_/id/17258/wil...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,401030690,Ravens,Patrick Onwuasor,https://www.espn.com/nfl/player/_/id/2576761/p...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,401030690,Ravens,Brent Urban,https://www.espn.com/nfl/player/_/id/16831/bre...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,401030690,Ravens,Za'Darius Smith,https://www.espn.com/nfl/player/_/id/3043168/z...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1489,401030690,Bengals,Tyler Kroft,https://www.espn.com/nfl/player/_/id/2582410/t...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1518,401030690,Bengals,Alex Erickson,https://www.espn.com/nfl/player/_/id/2977800/a...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1547,401030690,Bengals,Tyler Boyd,https://www.espn.com/nfl/player/_/id/3045144/t...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1575,401030690,Bengals,John Ross,https://www.espn.com/nfl/player/_/id/3052177/j...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
team1 = game_test.team.unique()[0]
team2 = game_test.team.unique()[1]

In [33]:
team1

NameError: name 'team1' is not defined

In [None]:
game_test.loc[(game_test.team==team2)].sum()

game_id                                                    12431951390
team                 BengalsBengalsBengalsBengalsBengalsBengalsBeng...
player_name          Vincent ReyKevin HuberC.J. UzomahRyan GlasgowS...
player_url           https://www.espn.com/nfl/player/_/id/13766/vin...
pass_completion                                                      0
pass_yds                                                           0.0
pass_avg                                                           0.0
pass_td                                                            0.0
pass_int                                                           0.0
sacks                                                                0
qbr                                                                  0
rtg                                                                0.0
rush_car                                                           0.0
rush_yds                                                           0.0
rush_a

In [34]:
game_test.loc[(game_test.team==team1)].sum()

NameError: name 'team1' is not defined

In [None]:
rating_team1 = 0
rating_team2 = 0

for i in range(len(game_test)):
    if game_test['team'][i] == team1:
        rating_team1 =+ game_test.loc[i].sum()
    else: rating_team2 =+ game_test.loc[i].sum()
print(team1, ":",rating_team1)
print(team2, ":",rating_team2)

UFuncTypeError: ufunc 'add' did not contain a loop with signature matching types (dtype('int64'), dtype('<U6')) -> None

In [35]:
game_test['team'][1]

'Ravens'

In [None]:
#Standardazing the metrics to compare them
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

data = numre
scaler = StandardScaler
scaler.fit(data)
num_metrics = []


numeric_features = [0, 1, 4, 5, 6] 
numeric_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features) ])

In [36]:
game_test2 = df[df['game_id'] == 400951566]
game_test2

Unnamed: 0,game_id,team,player_name,player_url,pass_completion,pass_yds,pass_avg,pass_td,pass_int,sacks,...,kicking_pct,kicking_long,kicking_xp,kicking_pts,punting_no,punting_yds,punting_avg,punting_tb,punting_in_20,punting_long
2,400951566,Chiefs,Dee Ford,https://www.espn.com/nfl/player/_/id/16707/dee...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,400951566,Chiefs,Eric Berry,https://www.espn.com/nfl/player/_/id/13252/eri...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,400951566,Chiefs,Frank Zombo,https://www.espn.com/nfl/player/_/id/13779/fra...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,400951566,Chiefs,Chris Conley,http://www.espn.com/nfl/player/_/id/2578533/ch...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
20,400951566,Chiefs,Travis Kelce,http://www.espn.com/nfl/player/_/id/15847/trav...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1482,400951566,Patriots,Brandon King,https://www.espn.com/nfl/player/_/id/3051905/b...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1511,400951566,Patriots,David Harris,https://www.espn.com/nfl/player/_/id/10491/dav...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1540,400951566,Patriots,Dwayne Allen,https://www.espn.com/nfl/player/_/id/14901/dwa...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1569,400951566,Patriots,Ryan Allen,https://www.espn.com/nfl/player/_/id/16382/rya...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
game_test2.loc[(game_test2.team=='Chiefs')].sum()

  game_test2.loc[(game_test2.team=='Chiefs')].sum()


game_id                                                    11226643848
team                 ChiefsChiefsChiefsChiefsChiefsChiefsChiefsChie...
player_name          Dee FordEric BerryFrank ZomboChris ConleyTravi...
player_url           https://www.espn.com/nfl/player/_/id/16707/dee...
pass_yds                                                         368.0
pass_avg                                                          10.5
pass_td                                                            4.0
pass_int                                                           0.0
rtg                                                              148.6
rush_car                                                          27.0
rush_yds                                                         185.0
rush_avg                                                          40.8
rush_td                                                            2.0
rush_long                                                         98.0
recept

In [37]:
game_test2.loc[(game_test2.team=='Patriots')].sum()

game_id                                                    13231401678
team                 PatriotsPatriotsPatriotsPatriotsPatriotsPatrio...
player_name          Chris HoganDont'a HightowerMalcolm ButlerMike ...
player_url           https://www.espn.com/nfl/player/_/id/14402/chr...
pass_completion                                                      0
pass_yds                                                           0.0
pass_avg                                                           0.0
pass_td                                                            0.0
pass_int                                                           0.0
sacks                                                                0
qbr                                                                  0
rtg                                                                0.0
rush_car                                                           0.0
rush_yds                                                           0.0
rush_a

In [None]:
game_test2 = df[df['game_id'] == 401326597]
game_test2

Unnamed: 0,game_id,team,player_name,player_url,pass_completion,pass_yds,pass_avg,pass_td,pass_int,sacks,...,kicking_pct,kicking_long,kicking_xp,kicking_pts,punting_no,punting_yds,punting_avg,punting_tb,punting_in_20,punting_long
78684,401326597,Seahawks,Cody Thompson,http://www.espn.com/nfl/player/_/id/3126115/co...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78712,401326597,Seahawks,Bryan Mone,http://www.espn.com/nfl/player/_/id/3115979/br...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78740,401326597,Seahawks,Robert Nkemdiche,http://www.espn.com/nfl/player/_/id/3051886/ro...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78768,401326597,Seahawks,Travis Homer,https://www.espn.com/nfl/player/_/id/4037457/t...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
78794,401326597,Seahawks,Michael Dickson,https://www.espn.com/nfl/player/_/id/3929851/m...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,2.0,78.0,39.0,0.0,2.0,41.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79602,401326597,Cardinals,Dennis Gardeck,https://www.espn.com/nfl/player/_/id/4334300/d...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79607,401326597,Cardinals,Christian Kirk,http://www.espn.com/nfl/player/_/id/3895856/ch...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79611,401326597,Cardinals,Jonathan Ward,https://www.espn.com/nfl/player/_/id/4039274/j...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
79614,401326597,Cardinals,Jalen Thompson,https://www.espn.com/nfl/player/_/id/4043089/j...,0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [38]:
game_test2.loc[(game_test2.team=='Seahawks')].sum()

game_id              0.0
team                 0.0
player_name          0.0
player_url           0.0
pass_completion      0.0
pass_yds             0.0
pass_avg             0.0
pass_td              0.0
pass_int             0.0
sacks                0.0
qbr                  0.0
rtg                  0.0
rush_car             0.0
rush_yds             0.0
rush_avg             0.0
rush_td              0.0
rush_long            0.0
receptions           0.0
rec_yds              0.0
rec_avg              0.0
rec_td               0.0
rec_long             0.0
rec_tgs              0.0
fumbles              0.0
fumbles_lost         0.0
fumbles_rec          0.0
defense_tot          0.0
defense_solo         0.0
defense_sacks        0.0
defense_tfl          0.0
defense_pd           0.0
defense_qb_hits      0.0
defense_td           0.0
interceptions        0.0
interceptions_yds    0.0
interceptions_td     0.0
kicks_return_no      0.0
kicks_return_yds     0.0
kicks_return_avg     0.0
kicks_return_long    0.0


In [None]:
game_test2.loc[(game_test2.team=='Cardinals')].sum()

game_id                                                    12441124507
team                 CardinalsCardinalsCardinalsCardinalsCardinalsC...
player_name          Tahir WhiteheadChandler JonesBreon BordersDarr...
player_url           https://www.espn.com/nfl/player/_/id/15070/tah...
pass_completion                                                      0
pass_yds                                                           0.0
pass_avg                                                           0.0
pass_td                                                            0.0
pass_int                                                           0.0
sacks                                                                0
qbr                                                                  0
rtg                                                                0.0
rush_car                                                           0.0
rush_yds                                                           0.0
rush_a

#### Creation of a reduce dataset for rating

In [39]:
df.columns

Index(['game_id', 'team', 'player_name', 'player_url', 'pass_completion',
       'pass_yds', 'pass_avg', 'pass_td', 'pass_int', 'sacks', 'qbr', 'rtg',
       'rush_car', 'rush_yds', 'rush_avg', 'rush_td', 'rush_long',
       'receptions', 'rec_yds', 'rec_avg', 'rec_td', 'rec_long', 'rec_tgs',
       'fumbles', 'fumbles_lost', 'fumbles_rec', 'defense_tot', 'defense_solo',
       'defense_sacks', 'defense_tfl', 'defense_pd', 'defense_qb_hits',
       'defense_td', 'interceptions', 'interceptions_yds', 'interceptions_td',
       'kicks_return_no', 'kicks_return_yds', 'kicks_return_avg',
       'kicks_return_long', 'kicks_return_td', 'punt_return_no',
       'punt_return_yds', 'punt_return_avg', 'punt_return_long',
       'punt_return_td', 'kicking_fg', 'kicking_pct', 'kicking_long',
       'kicking_xp', 'kicking_pts', 'punting_no', 'punting_yds', 'punting_avg',
       'punting_tb', 'punting_in_20', 'punting_long'],
      dtype='object')

In [43]:
key_columns = ['game_id', 'team', 'player_name']
main_metrics = ['pass_completion', 'pass_yds', 'rush_yds', 'sacks', 'qbr', 'rtg']
df_focus = df[['game_id', 'team', 'player_name','pass_completion', 'pass_yds', 'rush_yds', 'sacks', 'qbr', 'rtg']]
df_focus.head()

Unnamed: 0,game_id,team,player_name,pass_completion,pass_yds,rush_yds,sacks,qbr,rtg
0,401030690,Ravens,Kenny Young,0,0.0,0.0,0,0,0.0
1,401030690,Ravens,Willie Snead IV,0,0.0,0.0,0,0,0.0
2,400951566,Chiefs,Dee Ford,0,0.0,0.0,0,0,0.0
3,401030690,Ravens,Patrick Onwuasor,0,0.0,0.0,0,0,0.0
4,400951566,Chiefs,Eric Berry,0,0.0,0.0,0,0,0.0


In [44]:
zoom1 = df_focus[df_focus['game_id'] == 401326597]
zoom1['team'].unique()

array(['Seahawks', 'Cardinals'], dtype=object)

In [49]:
zoom1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 61 entries, 78684 to 79617
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   game_id          61 non-null     int64  
 1   team             61 non-null     object 
 2   player_name      61 non-null     object 
 3   pass_completion  61 non-null     object 
 4   pass_yds         61 non-null     float64
 5   rush_yds         61 non-null     float64
 6   sacks            61 non-null     object 
 7   qbr              61 non-null     object 
 8   rtg              61 non-null     float64
dtypes: float64(3), int64(1), object(5)
memory usage: 4.8+ KB


In [None]:
zoom1 = zoom1.astype({'pass_completion':'float', 'sacks':'float', 'qbr':'float'})
zoom1.info()

In [56]:
zoom1_focus = zoom1[['game_id', 'team', 'player_name','pass_yds', 'rush_yds']]
zoom1_focus.head()

Unnamed: 0,game_id,team,player_name,pass_yds,rush_yds
78684,401326597,Seahawks,Cody Thompson,0.0,0.0
78712,401326597,Seahawks,Bryan Mone,0.0,0.0
78740,401326597,Seahawks,Robert Nkemdiche,0.0,0.0
78768,401326597,Seahawks,Travis Homer,0.0,4.0
78794,401326597,Seahawks,Michael Dickson,0.0,0.0


In [58]:
yds = ['pass_yds', 'rush_yds']
zoom1_focus['total_yds'] = zoom1_focus[yds].sum(axis=1)
zoom1_focus.head()

Unnamed: 0,game_id,team,player_name,pass_yds,rush_yds,total_yds
78684,401326597,Seahawks,Cody Thompson,0.0,0.0,0.0
78712,401326597,Seahawks,Bryan Mone,0.0,0.0,0.0
78740,401326597,Seahawks,Robert Nkemdiche,0.0,0.0,0.0
78768,401326597,Seahawks,Travis Homer,0.0,4.0,4.0
78794,401326597,Seahawks,Michael Dickson,0.0,0.0,0.0


In [None]:
rating_team1 = 0
rating_team2 = 0

for i in range(len(zoom1_focus)):
    if zoom1_focus['team'][i] == 'Seahawks':
        rating_team1 =+ zoom1_focus['total_yds'].loc[i]
    else: rating_team2 =+ zoom1_focus['total_yds'].loc[i]
print(team1, ":",rating_team1)
print(team2, ":",rating_team2)

In [67]:
team1 = zoom1_focus[zoom1_focus['team'] == 'Seahawks']
team1=team1['total_yds']. sum()


In [68]:
team1

440.0

In [82]:
# Generalisation of the rating function 
def rating_function(df, game_id, team):
    df_rating = df[df['game_id'] == game_id] # zoom on the match
    df_rating = df_rating[df_rating['team'] == team] # zoom on the team
    df_rating['total_yds'] = df_rating[yds].sum(axis=1) # creation of the total yds column
    rating = df_rating['total_yds'].sum() # sum of the ratings
    return team, rating

In [83]:
rating_function(zoom1_focus, '401326597', 'Seahawks')

('Seahawks', 0.0)

In [88]:
zoom1_focus['total_yds'].sum()

440.0