In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

import sqlalchemy

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

db_user = os.getenv("user")
db_pass = os.getenv("pass")
db_name = os.getenv("db_name")
db_port = os.getenv("port")

In [3]:
# connect to nfldb
connection_uri = 'postgresql://{username}:{password}@localhost:{port}/nfldb'
engine = sqlalchemy.create_engine(connection_uri.format(username=db_user, password=db_pass, port=db_port))
conn = engine.connect()

In [7]:
import unittest

class TestDbConn(unittest.TestCase):
    def test_conn(self):
        # engine connection test
        results = conn.execute("SELECT player_id FROM player WHERE full_name='Tom Brady';")
        player_id = results.fetchone().values()[0]
        self.assertIsNotNone(player_id)
        self.assertEqual(player_id, '00-0019596')
        
unittest.main(argv=['first-arg-is-ignored'], exit=False)

E
ERROR: test_conn (__main__.TestDbConn)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/Users/davidlin/workspace/sport-site/ss/lib/python3.9/site-packages/sqlalchemy/engine/base.py", line 1705, in _execute_context
    self.dialect.do_execute(
  File "/Users/davidlin/workspace/sport-site/ss/lib/python3.9/site-packages/sqlalchemy/engine/default.py", line 716, in do_execute
    cursor.execute(statement, parameters)
psycopg2.OperationalError: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.


The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "<ipython-input-7-686e0e0a8158>", line 6, in test_conn
    results = conn.execute("SELECT player_id FROM player WHERE full_name='Tom Brady';")
  File "/Users/davidlin/workspace/sport-site/ss/lib/python3.9/site-packages/sqlalchemy/engine/b

<unittest.main.TestProgram at 0x1323d2d60>

# Calculate Fantasy Points Averages

In the following cells, I query the nfldb postgresql database using sqlalchemy's expression language to get the relevant statistics for calculating the total fantasy points per season (and then subsequently the average points per game) for each player. 

(currently only for QB)

In [5]:
qb_stats = """select g.player_id, team, season_year, full_name, position,
games_played, total_passing_twopt, total_rushing_twopt,
total_rushing_yds, total_rushing_tds, total_fum_lost, total_rushing_att,
total_passing_att, total_passing_yds, total_passing_tds, total_ints,
total_rec_yds, total_rec_tds, total_recs, total_targets
from (select player_id, season_year, count(distinct(play_player.gsis_id)) as games_played,
    sum(passing_twoptm) as total_passing_twopt, sum(rushing_twoptm) as total_rushing_twopt,
    sum(rushing_yds) as total_rushing_yds, sum(rushing_tds) as total_rushing_tds,
    sum(receiving_yds) as total_rec_yds, sum(receiving_tds) as total_rec_tds, sum(receiving_rec) as total_recs,
    sum(passing_yds) as total_passing_yds, sum(passing_tds) as total_passing_tds,
    sum(fumbles_lost) as total_fum_lost, sum(passing_int) as total_ints,
    sum(rushing_att) as total_rushing_att,
    sum(receiving_tar) as total_targets,
    sum(passing_att) as total_passing_att
    from play_player
    join game on play_player.gsis_id = game.gsis_id 
    where season_type=\'Regular\'
    group by season_year, player_id) as g join player on player.player_id = g.player_id where position in {};"""
all_stats = qb_stats.format(("QB", "RB", "TE", "WR"))

In [6]:
query = conn.execute(all_stats)
qb_agg = pd.DataFrame(query)
qb_agg.columns = query.keys()

## Point Breakdown
- Passing Yards (PY)0.04
- TD Pass (PTD)4
- Interceptions Thrown (INT)-2
- 2pt Passing Conversion (2PC)2
- 2pt Rushing Conversion (2PC)2
- Rushing/Receiving Yards 0.1
- Rushing/Receiving TDs 6

### Additional Points Depending on League Settings
- 300-399 yard passing game (P300)1
- 400+ yard passing game (P400)2
- 100+ yard rushing/receiving 1
- 200+ yard rushing/receiving 2

In [45]:
def calc_z_score(position):
    if position not in ("QB", "RB", "WR", "TE"):
        raise ValueError("Position needs to be QB, RB, WR, or TE")
        
    df = qb_agg_2015.loc[qb_agg_2015["position"] == position].sort_values(by="y", ascending=False)
    df["z_score"] = (df["y"] - df["y"].mean()) / df["y"].std()
    df["p-score"] = st.norm.cdf(df["z_score"])
    return df.rename(columns={"y": "pred_avg_pts"})

In [8]:
# initialize tables and metadata for sqlalchemy expression language
metadata = sqlalchemy.MetaData(bind=None)
player = sqlalchemy.Table("player", metadata, autoload=True, autoload_with=engine)
game = sqlalchemy.Table("game", metadata, autoload=True, autoload_with=engine)
play_player = sqlalchemy.Table("play_player", metadata, autoload=True, autoload_with=engine)

  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(
  util.warn(


In [9]:
# calculate "big passing games" of >300 and >400 yards passing
# TODO: >100, >200 receiving/rushing

ypg = sqlalchemy.sql.select([player.c.player_id, game.c.season_year, \
                             sqlalchemy.func.sum(play_player.c.passing_yds).label('total_passing_yds_game')]) \
.select_from(
    game.join(play_player, game.c.gsis_id == play_player.c.gsis_id)
    .join(player, player.c.player_id == play_player.c.player_id)
).where(
    sqlalchemy.and_(player.c.player_id.in_(qb_agg["player_id"].unique()),
                    game.c.season_type == 'Regular'
                   )
).group_by(game.c.gsis_id, game.c.season_year, player.c.player_id)

ypg_subq = ypg.alias()
over_300 = sqlalchemy.sql.select([
    sqlalchemy.sql.column("player_id"),
    sqlalchemy.sql.column("season_year"),
    sqlalchemy.func.count(sqlalchemy.case([
        (sqlalchemy.sql.column("total_passing_yds_game").between(300, 399), 1)
    ])).label("over_300"),
    sqlalchemy.func.count(sqlalchemy.case([
        (sqlalchemy.sql.column("total_passing_yds_game") >= 400, 1)
    ])).label("over_400")
]).select_from(ypg_subq).group_by(sqlalchemy.sql.column("player_id"), sqlalchemy.sql.column("season_year"))

result = conn.execute(over_300)
big_passing_games = result.fetchall()
big_passing_games = pd.DataFrame(big_passing_games)
big_passing_games.columns = result.keys()


In [10]:
rpg = sqlalchemy.sql.select([player.c.player_id, game.c.season_year, \
                             sqlalchemy.func.sum(play_player.c.receiving_yds).label('total_receiving_yds_game')]) \
.select_from(
    game.join(play_player, game.c.gsis_id == play_player.c.gsis_id)
    .join(player, player.c.player_id == play_player.c.player_id)
).where(
    sqlalchemy.and_(player.c.player_id.in_(qb_agg["player_id"].unique()),
                    game.c.season_type == 'Regular'
                   )
).group_by(game.c.gsis_id, game.c.season_year, player.c.player_id)

rpg_subq = rpg.alias()
over_100 = sqlalchemy.sql.select([
    sqlalchemy.sql.column("player_id"),
    sqlalchemy.sql.column("season_year"),
    sqlalchemy.func.count(sqlalchemy.case([
        (sqlalchemy.sql.column("total_receiving_yds_game").between(100, 199), 1)
    ])).label("over_100"),
    sqlalchemy.func.count(sqlalchemy.case([
        (sqlalchemy.sql.column("total_receiving_yds_game") >= 200, 1)
    ])).label("over_200")
]).select_from(rpg_subq).group_by(sqlalchemy.sql.column("player_id"), sqlalchemy.sql.column("season_year"))

result = conn.execute(over_100)
big_rec_games = result.fetchall()
big_rec_games = pd.DataFrame(big_rec_games)
big_rec_games.columns = result.keys()


In [11]:
rupg = sqlalchemy.sql.select([player.c.player_id, game.c.season_year, \
                             sqlalchemy.func.sum(play_player.c.rushing_yds).label('total_rushing_yds_game')]) \
.select_from(
    game.join(play_player, game.c.gsis_id == play_player.c.gsis_id)
    .join(player, player.c.player_id == play_player.c.player_id)
).where(
    sqlalchemy.and_(player.c.player_id.in_(qb_agg["player_id"].unique()),
                    game.c.season_type == 'Regular'
                   )
).group_by(game.c.gsis_id, game.c.season_year, player.c.player_id)

rupg_subq = rupg.alias()
over_100_rush = sqlalchemy.sql.select([
    sqlalchemy.sql.column("player_id"),
    sqlalchemy.sql.column("season_year"),
    sqlalchemy.func.count(sqlalchemy.case([
        (sqlalchemy.sql.column("total_rushing_yds_game").between(100, 199), 1)
    ])).label("over_100_rush"),
    sqlalchemy.func.count(sqlalchemy.case([
        (sqlalchemy.sql.column("total_rushing_yds_game") >= 200, 1)
    ])).label("over_200_rush")
]).select_from(rupg_subq).group_by(sqlalchemy.sql.column("player_id"), sqlalchemy.sql.column("season_year"))

result = conn.execute(over_100_rush)
big_rush_games = result.fetchall()
big_rush_games = pd.DataFrame(big_rush_games)
big_rush_games.columns = result.keys()

In [12]:
for df in [big_passing_games, big_rec_games, big_rush_games]:
    qb_agg = qb_agg.merge(df, on=["player_id", "season_year"])

In [13]:
# 2pt pass, 2pt rush, rush yds, rush td, pass yd, pass td, fumble lost, int, >300, >400, TODO: >100, >200 receiving/rushing
pt_multipliers = (2, 2, 0.1, 6, 0.1, 6, 0.04, 4, -2, -2, 1, 2)

In [14]:
qb_agg["avg_fantasy"] = (qb_agg.loc[:, ("total_passing_twopt", "total_rushing_twopt", "total_rushing_yds", \
                                        "total_rushing_tds", "total_rec_yds", "total_rec_tds", \
                                        "total_passing_yds", "total_passing_tds", "total_fum_lost", \
                                        "total_ints", "over_300", "over_400")]\
                                * pt_multipliers).sum(axis=1) / qb_agg["games_played"]

In [29]:
def prepare_df(df):
    """
    This function should return a ready-to-train dataframe with the appropriate features for 
    all offensive positions (WR, RB, QB, TE)
    """
    
    # fill in target value with next year's actual values 
    past_ssn = df["season_year"].unique()[:-1]
    data = df.loc[(df["season_year"] >= np.min(past_ssn)) & (df["season_year"] <= np.max(past_ssn))]
    val = pd.DataFrame()
    # for player in df["player_id"].unique():
    for year in past_ssn:
        val = val.append(data.loc[data["season_year"] == year].merge(df.loc[df["season_year"] == year + 1, ("avg_fantasy","player_id")], on="player_id"))
    val = val.rename(columns={"avg_fantasy_y": "y", "avg_fantasy_x": "avg_fantasy"})
    data = val.dropna()

    y, x = data["y"], data.drop("y", axis=1)
    return x, y
            

In [16]:
x, y = prepare_df(qb_agg)

In [17]:
x.shape, y.shape

((1081, 27), (1081,))

# Training Models

In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# models
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.svm import SVR

# metric
from sklearn.metrics import mean_squared_error, mean_squared_log_error

# z-score to probability
import scipy.stats as st

In [21]:
# train test split
x_train, x_val, y_train, y_val = train_test_split(x.iloc[:,5:], y, test_size=0.33)

scaler = StandardScaler()
norm_x_train = scaler.fit_transform(x_train)
norm_x_val = scaler.transform(x_val)

In [22]:
final_mdl = None
min_mse = float('inf')
models = [DecisionTreeRegressor(max_depth=3), RandomForestRegressor(n_estimators=100), AdaBoostRegressor()]

for model in models:
    model.fit(x_train, y_train)
    y_pred = model.predict(x_val)
    mse = mean_squared_error(y_val, y_pred)
    print(model.__class__, mse)
    if mse < min_mse:
        min_mse = mse
        final_mdl = model

<class 'sklearn.tree._classes.DecisionTreeRegressor'> 12.06775580751657
<class 'sklearn.ensemble._forest.RandomForestRegressor'> 12.494487485433455
<class 'sklearn.ensemble._weight_boosting.AdaBoostRegressor'> 15.034148286995702


In [23]:
y_pred_2016 = model.predict(qb_agg.loc[qb_agg["season_year"] == 2015].iloc[:,5:])

In [41]:
qb_agg_2015 = qb_agg.loc[qb_agg["season_year"] == 2015].copy()
qb_agg_2015.loc[:, "y"] = y_pred_2016

In [46]:
calc_z_score("QB")

Unnamed: 0,player_id,team,season_year,full_name,position,games_played,total_passing_twopt,total_rushing_twopt,total_rushing_yds,total_rushing_tds,...,over_300,over_400,over_100,over_200,over_100_rush,over_200_rush,avg_fantasy,pred_avg_pts,z_score,p-score
1165,00-0022942,SD,2015,Philip Rivers,QB,16,2,0,28,0,...,6,2,0,0,0,0,18.407500,19.060718,1.403408,0.919752
1164,00-0022924,PIT,2015,Ben Roethlisberger,QB,12,8,0,29,0,...,7,1,0,0,0,0,19.701667,18.588717,1.308699,0.904682
1151,00-0020531,NO,2015,Drew Brees,QB,15,0,0,14,1,...,8,2,0,0,0,0,21.234667,18.588717,1.308699,0.904682
1154,00-0021429,ARI,2015,Carson Palmer,QB,16,0,0,24,1,...,8,1,0,0,0,0,19.952500,18.588717,1.308699,0.904682
1324,00-0029263,SEA,2015,Russell Wilson,QB,16,0,0,554,1,...,1,0,0,0,0,0,21.080000,18.012751,1.193128,0.883590
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1336,00-0029567,KC,2015,Nick Foles,QB,11,1,0,20,1,...,0,0,0,0,0,0,8.734545,6.919075,-1.032874,0.150831
1256,00-0027796,CAR,2015,Joe Webb,QB,9,0,0,-1,0,...,0,0,0,0,0,0,-0.011111,6.723838,-1.072049,0.141849
1160,00-0022787,ATL,2015,Matt Schaub,QB,2,0,0,10,0,...,1,0,0,0,0,0,13.800000,6.723838,-1.072049,0.141849
1173,00-0023578,DET,2015,Dan Orlovsky,QB,2,0,0,0,0,...,0,0,0,0,0,0,5.020000,6.723838,-1.072049,0.141849


In [47]:
calc_z_score("RB")

Unnamed: 0,player_id,team,season_year,full_name,position,games_played,total_passing_twopt,total_rushing_twopt,total_rushing_yds,total_rushing_tds,...,over_300,over_400,over_100,over_200,over_100_rush,over_200_rush,avg_fantasy,pred_avg_pts,z_score,p-score
1204,00-0026213,KC,2015,Jamaal Charles,RB,5,0,0,364,4,...,0,0,0,0,1,0,16.020000,14.335784,3.177514,0.999257
1188,00-0025394,MIN,2015,Adrian Peterson,RB,16,0,0,1485,11,...,0,0,0,0,6,1,14.418750,14.028300,3.028027,0.998769
1403,00-0030496,PIT,2015,Le'Veon Bell,RB,6,0,0,556,3,...,0,0,0,0,3,0,14.533333,12.291364,2.183596,0.985504
1552,00-0032241,UNK,2015,Todd Gurley,RB,13,0,0,1108,10,...,0,0,0,0,5,0,14.430769,12.291364,2.183596,0.985504
1465,00-0031285,ATL,2015,Devonta Freeman,RB,15,0,0,1061,11,...,0,0,0,0,4,0,16.260000,12.291364,2.183596,0.985504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1284,00-0028063,OAK,2015,Taiwan Jones,RB,12,0,0,72,0,...,0,0,0,0,0,0,1.816667,5.273981,-1.227985,0.109726
1384,00-0030282,ARI,2015,Stepfan Taylor,RB,9,0,0,58,0,...,0,0,0,0,0,0,0.688889,5.273981,-1.227985,0.109726
1221,00-0026932,CIN,2015,Cedric Peerman,RB,12,0,0,0,0,...,0,0,0,0,0,0,0.000000,5.193866,-1.266934,0.102589
1167,00-0022999,NO,2015,John Kuhn,RB,12,0,0,28,2,...,0,0,0,0,0,0,1.700000,5.193866,-1.266934,0.102589


In [48]:
calc_z_score("WR")

Unnamed: 0,player_id,team,season_year,full_name,position,games_played,total_passing_twopt,total_rushing_twopt,total_rushing_yds,total_rushing_tds,...,over_300,over_400,over_100,over_200,over_100_rush,over_200_rush,avg_fantasy,pred_avg_pts,z_score,p-score
1185,00-0024334,NYJ,2015,Brandon Marshall,WR,16,0,0,0,0,...,0,0,10,0,0,0,14.387500,10.729944,3.045159,0.998837
1255,00-0027793,PIT,2015,Antonio Brown,WR,16,0,0,28,0,...,0,0,8,1,0,0,15.137500,10.729944,3.045159,0.998837
1488,00-0031428,JAC,2015,Allen Robinson,WR,16,0,0,0,0,...,0,0,6,0,0,0,14.000000,9.588011,2.158759,0.984566
1456,00-0031235,NYG,2015,Odell Beckham,WR,15,0,0,3,0,...,0,0,8,0,0,0,14.880000,9.588011,2.158759,0.984566
1269,00-0027944,ATL,2015,Julio Jones,WR,16,0,0,0,0,...,0,0,9,0,0,0,14.568750,9.588011,2.158759,0.984566
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1229,00-0027057,BUF,2015,Brandon Tate,WR,15,0,0,0,0,...,0,0,0,0,0,0,0.793333,5.193866,-1.252099,0.105267
1187,00-0024535,ATL,2015,Eric Weems,WR,12,0,0,4,0,...,0,0,0,0,0,0,0.125000,4.713932,-1.624637,0.052120
1209,00-0026293,NE,2015,Matthew Slater,WR,12,0,0,0,0,...,0,0,0,0,0,0,0.000000,4.572954,-1.734068,0.041453
1513,00-0031818,UNK,2015,Bradley Marquez,WR,14,0,0,0,0,...,0,0,0,0,0,0,0.485714,4.572954,-1.734068,0.041453


In [49]:
calc_z_score("TE")

Unnamed: 0,player_id,team,season_year,full_name,position,games_played,total_passing_twopt,total_rushing_twopt,total_rushing_yds,total_rushing_tds,...,over_300,over_400,over_100,over_200,over_100_rush,over_200_rush,avg_fantasy,pred_avg_pts,z_score,p-score
1401,00-0030472,WAS,2015,Jordan Reed,TE,14,0,0,0,0,...,0,0,2,0,0,0,11.228571,9.844405,3.499794,0.999767
1241,00-0027656,NE,2015,Rob Gronkowski,TE,15,0,0,0,0,...,0,0,5,0,0,0,12.240000,9.011904,2.716610,0.996702
1207,00-0026281,CLE,2015,Gary Barnidge,TE,16,0,0,0,0,...,0,0,3,0,0,0,9.893750,7.795071,1.571861,0.942009
1415,00-0030549,CIN,2015,Tyler Eifert,TE,13,0,0,0,0,...,0,0,1,0,0,0,10.730769,7.792543,1.569483,0.941732
1190,00-0025418,CAR,2015,Greg Olsen,TE,16,0,0,0,0,...,0,0,3,0,0,0,9.400000,7.792543,1.569483,0.941732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1222,00-0026957,PIT,2015,David Johnson,TE,4,0,0,4,0,...,0,0,0,0,0,0,-0.300000,4.572954,-1.459380,0.072230
1375,00-0030110,NE,2015,Michael Williams,TE,9,0,0,0,0,...,0,0,0,0,0,0,0.288889,4.572954,-1.459380,0.072230
1313,00-0028987,TEN,2015,Phillip Supernaw,TE,10,0,0,0,0,...,0,0,0,0,0,0,0.120000,4.572954,-1.459380,0.072230
1311,00-0028958,UNK,2015,Cory Harkey,TE,9,0,0,0,0,...,0,0,0,0,0,0,0.288889,4.572954,-1.459380,0.072230


## Features for Consideration

* Average time of possession
* number of/percentage of scoring drives (adjusted for garbage time)
* opponents W-L records (strength of schedule)
* Teammate rankings

Need 3 models: 
1. QB
1. WR/TE
1. RB

### Avg Time of Possession + Points per Possession

In [None]:
def time_of_pos(team, season_year, season_type='regular'):
    """
    Get the average time of possession per game for given team, season year and type
    """
    
    "select SUM(CAST(regexp_replace(CAST(pos_time as TEXT), '\(|\)', '', 'g') as INT)) as time_of_possession, \
            a.gsis_id, result, pos_team from (select gsis_id, season_year, season_type, home_team, away_team \
                        from game where (home_team='NE' or away_team='NE') \
                        and season_year=2009 and season_type='Regular') as a join drive \
                        on a.gsis_id = drive.gsis_id where pos_team='NE' \
                        group by a.gsis_id, pos_team, result;"
    pass

In [None]:
def pts_per_pos(team, season_year, season_type='regular'):
    """
    points per possession.  
    """
    pass

### Strength of Schedule

Here, we're starting off with a preliminary representation of strength of schedule. We simply look at the win-loss records for teams that are on the schedule (by total number of wins)

In [30]:
def get_sched(team_name, year):
    """
    Takes in team_name and year of season and returns query for games scheduled and scores
    if the games have been played
    """
    return \
        sqlalchemy.sql.select([sqlalchemy.column("home_team"), sqlalchemy.column("away_team"),
                               sqlalchemy.column("home_score"), sqlalchemy.column("away_score")
                              ])\
        .select_from(game) \
        .where(
            sqlalchemy.and_(
                sqlalchemy.column("season_year") == year,
                sqlalchemy.or_(sqlalchemy.column("home_team") == team_name, \
                               sqlalchemy.column("away_team") == team_name),
                sqlalchemy.column("season_type") == "Regular"
            )
        )

In [31]:
def get_team_record(team_name, year):
    """
    Get the record of a specific team during a specific year (regular season only)
    
    Returns wins, losses (16-wins)
    """
    a = get_sched(team_name, year).alias()
    query = sqlalchemy.sql.select([
        sqlalchemy.func.sum(
            sqlalchemy.case([(sqlalchemy.column("winner") == team_name, 1)])
        )
    ]).select_from(
        sqlalchemy.sql.select([
            sqlalchemy.case([(a.c.home_score > a.c.away_score, sqlalchemy.column("home_team"))], \
                            else_=sqlalchemy.column("away_team")).label("winner")
        ])\
        .select_from(a).alias()
    )
    wins = conn.execute(query).fetchall()[0][0]
    return wins, 16-wins


In [32]:
get_team_record("NE", 2009)

(10, 6)

In [33]:
teams = (
    # NFC North
    "MIN", "GB", "DET", "CHI",
    
    # NFC South
    "TB", "CAR", "ATL", "NO",
    
    # NFC West
    "SF", "SEA", "STL", "ARI",
    
    # NFC East
    "DAL", "NYG", "WAS", "PHI",
    
    # AFC South
    "IND", "HOU", "JAC", "TEN",
    
    # AFC East
    "BAL", "CLE", "PIT", "CIN",
    
    # AFC West
    "OAK", "DEN", "KC", "SD",
    
    # AFC North
    "NE", "NYJ", "MIA", "BUF"
)

In [34]:
def team_records(season_year: int) -> dict:
    """
    Takes in a season year and returns a dictionary mapping teams to their records for that season
    """
    
    return {team: get_team_record(team, season_year) for team in teams}

In [35]:
season_year = 2010
team_records_2009 = team_records(season_year-1)
team_records_2009

{'MIN': (12, 4),
 'GB': (11, 5),
 'DET': (2, 14),
 'CHI': (7, 9),
 'TB': (3, 13),
 'CAR': (8, 8),
 'ATL': (9, 7),
 'NO': (13, 3),
 'SF': (8, 8),
 'SEA': (5, 11),
 'STL': (1, 15),
 'ARI': (10, 6),
 'DAL': (11, 5),
 'NYG': (8, 8),
 'WAS': (4, 12),
 'PHI': (11, 5),
 'IND': (14, 2),
 'HOU': (9, 7),
 'JAC': (7, 9),
 'TEN': (8, 8),
 'BAL': (9, 7),
 'CLE': (5, 11),
 'PIT': (9, 7),
 'CIN': (10, 6),
 'OAK': (5, 11),
 'DEN': (8, 8),
 'KC': (4, 12),
 'SD': (13, 3),
 'NE': (10, 6),
 'NYJ': (9, 7),
 'MIA': (7, 9),
 'BUF': (6, 10)}

In [38]:
def get_opp_wl(team_name: str, season_year: int) -> tuple:
    """
    Get the total W-L record for all teams that team_name played that year.
    """
    result = get_sched(team_name, season_year)
    sched = [row[0] if row[1] == team_name else row[1] for row in conn.execute(result).fetchall()]
    record_totals = [i for i in zip(*[team_records_2009[opp] for opp in sched])]
    return record_totals[0]

In [37]:
get_opp_wl("NE", 2009)

[(9, 9, 8, 3, 8, 7, 9, 14, 7, 7, 13, 6, 9, 9, 6, 8), (7, 7, 8, 13, 8, 9, 7, 2, 9, 9, 3, 10, 7, 7, 10, 8)]


(9, 9, 8, 3, 8, 7, 9, 14, 7, 7, 13, 6, 9, 9, 6, 8)

### Teammate Rankings

Find some way to "score" the team around them. For example, a QB's performance might be influenced by the presence of a good receiver and vice versa. Would be interesting to see if this would be noise or if it would actually be helpful.

Current thinking is to rank by position group and use key stats for each position group. For example, receiving core might be evaluated by number of receiving yards and number of receptions. Offensive line might be number of sacks (and, if available, pressures and knock backs). Runningbacks by yards per carry and rushing attempts.

In [462]:
import json
import redis

depth_chart_db = redis.Redis("localhost", 6379)

In [463]:
depth_chart_db.get("patriots")

b'{"WR": ["N\'Keal Harry", "Matthew Slater", "Isaiah Zuber", "Andre Baccellia", "Julian Edelman", "Damiere Byrd", "Jakobi Meyers", "Gunner Olszewski", "Devin Ross", "Jeff Thomas"], "LT": ["Isaiah Wynn", "Justin Herron"], "LG": ["Joe Thuney", "Hjalte Froholdt", "Jermaine Eluemunor"], "C": ["David Andrews", "Tyler Gauthier"], "RG": ["Shaq Mason", "Mike Onwenu", "Ben Braden"], "RT": ["Korey Cunningham", "Yodny Cajuste"], "TE": ["Ryan Izzo", "Devin Asiasi", "Dalton Keene", "Rashod Berry", "Jake Burt", "Paul Butler", "Paul Quessenberry"], "QB": ["Cam Newton", "Jarrett Stidham", "Brian Hoyer", "Brian Lewerke"], "RB": ["James White", "Jakob Johnson"]}'