### References

- API creator: https://medium.com/clarktech-sports/python-sports-analytics-made-simple-part-2-40e591a7f3db

- Sports Reference site: https://www.sports-reference.com/

- Code example: https://towardsdatascience.com/sports-reference-api-intro-dbce09e89e52

- Code example: https://towardsdatascience.com/beating-the-odds-8d26b1a83f1b

In [1]:
#!pip install sportsreference
import pandas as pd
import sqlite3

### Team-level data

In [2]:
from sportsreference.nfl.teams import Teams
teams2020 = Teams(year = '2020')
print(dir(teams2020))

['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_add_stats_data', '_retrieve_all_teams', '_teams', 'dataframes']


In [3]:
nfl_teams = teams2020.dataframes

In [4]:
nfl_teams.head(1).T

Unnamed: 0,GNB
abbreviation,GNB
defensive_simple_rating_system,1.8
first_downs,358
first_downs_from_penalties,28
fumbles,6
games_played,16
interceptions,5
losses,3
margin_of_victory,8.8
name,Green Bay Packers


In [5]:
nfl_teams['year'] = 2020
nfl_teams['id'] = nfl_teams['abbreviation'].apply(lambda x: x + "_" + '2020')

In [6]:
counter = 0
for i in range(2019,1969,-1):
    year = str(i)
    temp_df = Teams(year = year).dataframes
    temp_df['id'] = temp_df['abbreviation'].apply(lambda x: x + "_" + year)
    temp_df['year'] = int(year)
    nfl_teams = pd.concat([nfl_teams, temp_df], axis = 0)
    if counter % 5 == 0:
        print(i)
    counter += 1

2019
2014
2009
2004
1999
1994
1989
1984
1979
1974


In [7]:
nfl_teams.set_index('id', inplace = True)
nfl_teams.shape

(1509, 41)

In [12]:
with sqlite3.connect('D:\\Data\\NFL.sqlite') as conn:
    cur = conn.cursor()
    cur.executescript(
    '''
    DROP TABLE IF EXISTS Teams;
    CREATE TABLE Teams (
        id TEXT, abbreviation TEXT, defensive_simple_rating_system FLOAT, first_downs INTEGER, 
        first_downs_from_penalties INTEGER, fumbles INTEGER, games_played INTEGER, interceptions INTEGER,
        losses INTEGER, margin_of_victory FLOAT, name TEXT, offensive_simple_rating_system FLOAT,
        pass_attempts INTEGER, pass_completions INTEGER, pass_first_downs INTEGER, 
        pass_net_yars_per_attempt FLOAT, pass_touchdowns INTEGER, pass_yards INTEGER, penalties INTEGER,
        percent_drives_with_points FLOAT, percent_drives_with_turnovers FLOAT, plays INTEGER,
        points_against INTEGER, points_contributed_by_offense FLOAT, points_difference INTEGER, 
        points_for INTEGER, post_season_result TEXT, rank INTEGER, rush_attempts INTEGER,
        rush_first_downs INTEGER, rush_touchdowns INTEGER, rush_yards INTEGER, rush_yards_per_attempt FLOAT,
        simple_rating_system FLOAT, strength_of_schedule FLOAT, turnovers INTEGER, win_percentage FLOAT,
        wins INTEGER, yards INTEGER, yards_from_penalties INTEGER, yards_per_play FLOAT, year INTEGER
        );    
    '''
    )

In [13]:
nfl_teams.to_sql('Teams', conn, if_exists='replace')

### Schedule

In [25]:
no2020 = Schedule('NOR', year = '2020')
no2020 = no2020.dataframe
no2020.head(1).T

Unnamed: 0,202009130nor
boxscore_index,202009130nor
date,September 13
datetime,2020-09-13 00:00:00
day,Sun
extra_points_attempted,4
extra_points_made,4
field_goals_attempted,2
field_goals_made,2
fourth_down_attempts,1
fourth_down_conversions,0


In [50]:
nfl_teams = ['GNB', 'BUF', 'TAM', 'OTI', 'NOR', 'KAN', 'RAV', 'SEA', 'CLT', 'RAI', 'MIN', 'PIT',
             'CRD', 'CLE', 'MIA', 'ATL', 'DAL', 'HTX', 'SDG', 'DET', 'SFO', 'RAM', 'CHI', 'CAR',
             'WAS', 'PHI', 'NWE', 'DEN', 'CIN', 'JAX', 'NYG', 'NYJ']

In [20]:
from sportsreference.nfl.schedule import Schedule

nfl_games = pd.DataFrame(columns=[
                   'boxscore_index', 'date', 'datetime', 'day', 'extra_points_attempted',
                   'extra_points_made', 'field_goals_attempted', 'field_goals_made',
                   'fourth_down_attempts', 'fourth_down_conversions', 'interceptions',
                   'location', 'opponent_abbr', 'opponent_name', 'overtime',
                   'pass_attempts', 'pass_completion_rate', 'pass_completions',
                   'pass_touchdowns', 'pass_yards', 'pass_yards_per_attempt',
                   'points_allowed', 'points_scored', 'punt_yards', 'punts',
                   'quarterback_rating', 'result', 'rush_attempts', 'rush_touchdowns',
                   'rush_yards', 'rush_yards_per_attempt', 'third_down_attempts',
                   'third_down_conversions', 'time_of_possession', 'times_sacked', 'type',
                   'week', 'yards_lost_from_sacks', 'season'
                    ]
                    )

counter = 0
for i in range(2020,1969,-1):
    year = str(i)
    if counter % 5 == 0:
        print(i)
    for team in nfl_teams:
        try:
            temp_df = Schedule(team, year).dataframe
            temp_df['season'] = int(year)
            nfl_games = pd.concat([nfl_games, temp_df], axis = 0)    
        except:
            continue
    counter += 1

2020
2015
2010
2005
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
2000
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be fo

The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun

The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun, and is the data available on www.sports-reference.com?
The requested page returned a valid response, but no data could be found. Has the season begun

In [21]:
with sqlite3.connect('D:\\Data\\NFL.sqlite') as conn:
    cur = conn.cursor()
    cur.executescript(
    '''
    DROP TABLE IF EXISTS Games;
    CREATE TABLE Games (
       boxscore_index TEXT, date TEXT, datetime DATETIME, day TEXT, extra_points_attempted INTEGER,
       extra_points_made INTEGER, field_goals_attempted INTEGER, field_goals_made INTEGER,
       fourth_down_attempts INTEGER, fourth_down_conversions INTEGER, interceptions INTEGER,
       location TEXT, opponent_abbr TEXT, opponent_name TEXT, overtime BOOLEAN,
       pass_attempts INTEGER, pass_completion_rate FLOAT, pass_completions INTEGER,
       pass_touchdowns INTEGER, pass_yards INTEGER, pass_yards_per_attempt FLOAT,
       points_allowed INTEGER, points_scored INTEGER, punt_yards INTEGER, punts INTEGER,
       quarterback_rating FLOAT, result TEXT, rush_attempts INTEGER, rush_touchdowns INTEGER,
       rush_yards INTEGER, rush_yards_per_attempt FLOAT, third_down_attempts INTEGER,
       third_down_conversions INTEGER, time_of_possession, times_sacked INTEGER, type TEXT,
       week INTEGER, yards_lost_from_sacks INTEGER, season INTEGER
        );    
    '''
    )

In [22]:
nfl_games.to_sql('Games', conn, if_exists='replace', index=False)

### Boxscore

In [23]:
from sportsreference.nfl.boxscore import Boxscore
game_data = Boxscore('202009130nor')
gameplay_data = game_data.dataframe
gameplay_data.head().T

Unnamed: 0,202009130nor
attendance,
away_first_downs,23
away_fourth_down_attempts,1
away_fourth_down_conversions,1
away_fumbles,2
away_fumbles_lost,1
away_interceptions,2
away_net_pass_yards,224
away_pass_attempts,36
away_pass_completions,23


- Player game stats

In [2]:
#List of boxscore ids

games = pd.read_csv('D:/Data/game_ids.csv', index_col=0)
games['year'] = [int(x[:4]) for x in games.id]
games = games[games.year >= 2016]
games.reset_index(inplace=True, drop=True)
games.drop(columns=['year'], inplace=True)
games

Unnamed: 0,id
0,201601030atl
1,201601030buf
2,201601030car
3,201601030chi
4,201601030cin
...,...
1359,202101170kan
1360,202101170nor
1361,202101240gnb
1362,202101240kan


In [3]:
from sportsreference.nfl.boxscore import Boxscore

players = pd.DataFrame()

for game in games.id:
    game_data = Boxscore(game)
    away = game_data.away_players
    home = game_data.home_players
    
    for player in away:
        p = player.dataframe
        p['id'] = game
        players = players.append(p)
    for player in home:
        p = player.dataframe
        p['id'] = game
        players = players.append(p)
    

players.tail()

Unnamed: 0,completed_passes,attempted_passes,passing_yards,passing_touchdowns,interceptions_thrown,times_sacked,yards_lost_from_sacks,longest_pass,quarterback_rating,rush_attempts,...,longest_punt_return,extra_points_made,extra_points_attempted,field_goals_made,field_goals_attempted,punts,total_punt_yards,yards_per_punt,longest_punt,id
SherAn00,,,,,,,,,,,...,,,,,,,,,,202102070tam
ThorJu00,,,,,,,,,,,...,,,,,,,,,,202102070tam
WattAr00,,,,,,,,,,,...,,,,,,,,,,202102070tam
TownTo01,,,,,,,,,,,...,,,,,,3.0,107.0,35.7,51.0,202102070tam
ButkHa00,,,,,,,,,,,...,,,,3.0,3.0,0.0,0.0,,0.0,202102070tam


In [11]:
players[['fumbles_recovered', 'yards_recovered_from_fumble',
       'fumbles_recovered_for_touchdown']]\
        .fillna(0).astype('int64').describe()

Unnamed: 0,fumbles_recovered,yards_recovered_from_fumble,fumbles_recovered_for_touchdown
count,82231.0,82231.0,82231.0
mean,0.029466,0.089455,0.089455
std,0.174767,2.003985,2.003985
min,0.0,-28.0,-28.0
25%,0.0,0.0,0.0
50%,0.0,0.0,0.0
75%,0.0,0.0,0.0
max,4.0,100.0,100.0


In [83]:
with sqlite3.connect('D:\\Data\\NFL.sqlite') as conn:
    cur = conn.cursor()
    cur.executescript(
    '''
    DROP TABLE IF EXISTS Gameplay;
    CREATE TABLE Gameplay (
       player_id TEXT, completed_passes INTEGER, attempted_passes INTEGER, passing_yards INTEGER,
       passing_touchdowns INTEGER, interceptions_thrown INTEGER, times_sacked INTEGER,
       yards_lost_from_sacks INTEGER, longest_pass INTEGER, quarterback_rating FLOAT,
       rush_attempts INTEGER, rush_yards INTEGER, rush_touchdowns INTEGER, longest_rush INTEGER,
       times_pass_target INTEGER, receptions INTEGER, receiving_yards INTEGER,
       receiving_touchdowns INTEGER, longest_reception INTEGER, fumbles INTEGER, fumbles_lost INTEGER,
       interceptions INTEGER, yards_returned_from_interception INTEGER,
       interceptions_returned_for_touchdown INTEGER, longest_interception_return INTEGER,
       passes_defended INTEGER, sacks INTEGER, combined_tackles INTEGER, solo_tackles INTEGER,
       assists_on_tackles INTEGER, tackles_for_loss INTEGER, quarterback_hits INTEGER,
       fumbles_recovered INTEGER, yards_recovered_from_fumble INTEGER,
       fumbles_recovered_for_touchdown INTEGER, fumbles_forced INTEGER, kickoff_returns INTEGER,
       kickoff_return_yards INTEGER, average_kickoff_return_yards FLOAT,
       kickoff_return_touchdown INTEGER, longest_kickoff_return INTEGER, punt_returns INTEGER,
       punt_return_yards INTEGER, yards_per_punt_return INTEGER, punt_return_touchdown INTEGER,
       longest_punt_return INTEGER, extra_points_made INTEGER, extra_points_attempted INTEGER,
       field_goals_made INTEGER, field_goals_attempted INTEGER, punts INTEGER,
       total_punt_yards INTEGER, yards_per_punt FLOAT, longest_punt INTEGER, game_id
        );    
    '''
    )

players.to_sql('Gameplay', conn, if_exists='replace')

### Roster

In [80]:
from sportsreference.nfl.roster import Roster
from sportsreference.nfl.roster import Player
rosters = pd.DataFrame()
for i in range(2020,2015,-1):
    year = str(i)
    for team in nfl_teams:
        t = Roster(team, year, slim=True)
        for  k,v in t.players.items():
            try:
                temp = Player(k).dataframe.loc[year]
                temp['season'] = year
                temp['roster_id'] = temp.player_id + '_' + temp.season
                rosters = rosters.append(temp.set_index('roster_id'))
            except:
                continue

In [84]:
with sqlite3.connect('D:\\Data\\NFL.sqlite') as conn:
    cur = conn.cursor()
    cur.executescript(
    '''
    DROP TABLE IF EXISTS Rosters;
    CREATE TABLE Rosters (
        roster_id TEXT,
        adjusted_net_yards_per_attempt_index FLOAT, adjusted_net_yards_per_pass_attempt FLOAT,
        adjusted_yards_per_attempt FLOAT, adjusted_yards_per_attempt_index FLOAT, all_purpose_yards INTEGER,
        approximate_value INTEGER, assists_on_tackles INTEGER, attempted_passes INTEGER,
        birth_date DATE, blocked_punts INTEGER, catch_percentage FLOAT, completed_passes INTEGER,
        completion_percentage_index FLOAT, espn_qbr FLOAT, extra_point_percentage FLOAT,
        extra_points_attempted INTEGER, extra_points_made INTEGER, field_goal_percentage FLOAT,
        field_goals_attempted INTEGER, field_goals_made INTEGER, fifty_plus_yard_field_goal_attempts
        fifty_plus_yard_field_goals_made INTEGER, fourth_quarter_comebacks INTEGER, 
        fourty_to_fourty_nine_yard_field_goal_attempts INTEGER, 
        fourty_to_fourty_nine_yard_field_goals_made INTEGER, fumbles INTEGER, fumbles_forced INTEGER,
        fumbles_recovered INTEGER, fumbles_recovered_for_touchdown INTEGER, game_winning_drives INTEGER,
        games INTEGER, games_started INTEGER, height FLOAT, interception_percentage FLOAT,
        interception_percentage_index FLOAT, interceptions INTEGER,
        interceptions_returned_for_touchdown INTEGER, interceptions_thrown INTEGER,
        kickoff_return_touchdown INTEGER, kickoff_return_yards INTEGER,
        kickoff_returns INTEGER, less_than_nineteen_yards_field_goal_attempts INTEGER,
        less_than_nineteen_yards_field_goals_made INTEGER, longest_field_goal_made INTEGER,
        longest_interception_return INTEGER, longest_kickoff_return INTEGER, longest_pass INTEGER,
        longest_punt INTEGER, longest_punt_return INTEGER, longest_reception INTEGER,
        longest_rush INTEGER, name TEXT, net_yards_per_attempt_index FLOAT,
        net_yards_per_pass_attempt FLOAT, passer_rating_index FLOAT, passes_defended INTEGER,
        passing_completion FLOAT, passing_touchdown_percentage FLOAT, passing_touchdowns INTEGER,
        passing_yards INTEGER, passing_yards_per_attempt FLOAT, player_id TEXT,
        position, punt_return_touchdown INTEGER, punt_return_yards INTEGER,
        punt_returns INTEGER, punts INTEGER, qb_record, quarterback_rating FLOAT,
        receiving_touchdowns INTEGER, receiving_yards INTEGER, receiving_yards_per_game FLOAT,
        receiving_yards_per_reception FLOAT, receptions INTEGER, receptions_per_game FLOAT,
        rush_attempts INTEGER, rush_attempts_per_game FLOAT, rush_touchdowns INTEGER,
        rush_yards INTEGER, rush_yards_per_attempt FLOAT, rush_yards_per_game FLOAT,
        rushing_and_receiving_touchdowns INTEGER, sack_percentage FLOAT,
        sack_percentage_index FLOAT, sacks INTEGER, safeties INTEGER, season,
        tackles INTEGER, team_abbreviation TEXT, thirty_to_thirty_nine_yard_field_goal_attempts INTEGER,
        thirty_to_thirty_nine_yard_field_goals_made INTEGER, times_pass_target INTEGER,
        times_sacked INTEGER, total_punt_yards INTEGER, touchdown_percentage_index FLOAT,
        touches INTEGER, twenty_to_twenty_nine_yard_field_goal_attempts INTEGER,
        twenty_to_twenty_nine_yard_field_goals_made INTEGER,
        weight,
        yards_from_scrimmage INTEGER, yards_lost_to_sacks INTEGER, yards_per_attempt_index FLOAT,
        yards_per_completed_pass FLOAT, yards_per_game_played FLOAT, yards_per_kickoff_return FLOAT,
        yards_per_punt FLOAT, yards_per_punt_return FLOAT, yards_per_touch FLOAT,
        yards_recovered_from_fumble INTEGER, yards_returned_from_interception INTEGER
        );    
    '''
    )

rosters.to_sql('Rosters', conn, if_exists='replace')