In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import Lasso

In [2]:
years = range(1999, 2020)
data = pd.DataFrame()
for y in years:
    for w in range(1, 18):
        new_data = pd.read_csv("data\\weekly\\" + str(y) + "\\week" + str(w) + ".csv")
        new_data = new_data.assign(Year = y, Week = w)
        data = data.append(new_data)

stats = ['PassingYds', 'PassingTD', 'Int', 'PassingAtt', 'Cmp', 'RushingAtt', 'RushingYds', 'RushingTD', 'Rec', 'Tgt', 'ReceivingYds', 'ReceivingTD', 'FL']
for i in range(1, 18):
    data[[s + '-' + str(i) for s in stats]] = data.groupby('Player')[stats].shift(i)
data = data.dropna()
data = data.reset_index()
cols = data.columns.tolist()
cols.remove("Year")
cols.remove("Week")
cols.remove("PPRFantasyPoints")
cols.remove("StandardFantasyPoints") 
cols.remove("HalfPPRFantasyPoints")
cols = ["Year", "Week"] + cols[1:-3]
data = data[cols]

In [3]:
print(data.columns)

Index(['Year', 'Week', 'Player', 'Pos', 'Tm', 'PassingYds', 'PassingTD', 'Int',
       'PassingAtt', 'Cmp',
       ...
       'PassingYds-17', 'PassingTD-17', 'Int-17', 'PassingAtt-17', 'Cmp-17',
       'RushingAtt-17', 'RushingYds-17', 'RushingTD-17', 'Rec-17', 'Tgt-17'],
      dtype='object', length=236)


In [4]:
print(len(data))
data.head()

63184


Unnamed: 0,Year,Week,Player,Pos,Tm,PassingYds,PassingTD,Int,PassingAtt,Cmp,...,PassingYds-17,PassingTD-17,Int-17,PassingAtt-17,Cmp-17,RushingAtt-17,RushingYds-17,RushingTD-17,Rec-17,Tgt-17
0,2000,2,Frank Wycheck,TE,TEN,30.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0
1,2000,2,Steve Beuerlein,QB,CAR,364.0,3.0,1.0,32.0,24.0,...,207.0,1.0,1.0,32.0,16.0,1.0,8.0,0.0,0.0,0.0
2,2000,2,Brett Favre,QB,GNB,269.0,2.0,0.0,35.0,25.0,...,333.0,4.0,3.0,47.0,28.0,2.0,13.0,0.0,0.0,0.0
3,2000,2,Rich Gannon,QB,OAK,207.0,0.0,0.0,22.0,15.0,...,227.0,0.0,1.0,31.0,16.0,1.0,6.0,0.0,0.0,0.0
4,2000,2,Elvis Grbac,QB,KAN,144.0,1.0,0.0,21.0,13.0,...,283.0,1.0,1.0,42.0,20.0,0.0,0.0,0.0,0.0,0.0


In [5]:
print(data.shape)

(63184, 236)


In [6]:
data.head()

Unnamed: 0,Year,Week,Player,Pos,Tm,PassingYds,PassingTD,Int,PassingAtt,Cmp,...,PassingYds-17,PassingTD-17,Int-17,PassingAtt-17,Cmp-17,RushingAtt-17,RushingYds-17,RushingTD-17,Rec-17,Tgt-17
0,2000,2,Frank Wycheck,TE,TEN,30.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,4.0
1,2000,2,Steve Beuerlein,QB,CAR,364.0,3.0,1.0,32.0,24.0,...,207.0,1.0,1.0,32.0,16.0,1.0,8.0,0.0,0.0,0.0
2,2000,2,Brett Favre,QB,GNB,269.0,2.0,0.0,35.0,25.0,...,333.0,4.0,3.0,47.0,28.0,2.0,13.0,0.0,0.0,0.0
3,2000,2,Rich Gannon,QB,OAK,207.0,0.0,0.0,22.0,15.0,...,227.0,0.0,1.0,31.0,16.0,1.0,6.0,0.0,0.0,0.0
4,2000,2,Elvis Grbac,QB,KAN,144.0,1.0,0.0,21.0,13.0,...,283.0,1.0,1.0,42.0,20.0,0.0,0.0,0.0,0.0,0.0


In [7]:
data.iloc[:,18]

0          0.0
1        183.0
2        152.0
3        176.0
4        212.0
         ...  
63179      0.0
63180      0.0
63181      0.0
63182      0.0
63183      0.0
Name: PassingYds-1, Length: 63184, dtype: float64

In [8]:
class FantasyModel():
    def __init__(self, pos = None, year_range = [1999, 2019], offset = 17, drop_na=True):
        self.position = pos
        self.beginning = year_range[0]
        self.end = year_range[1]
        self.offset = offset
        self.stats = ['PassingYds', 'PassingTD', 'Int', 'PassingAtt', 'Cmp', \
                      'RushingAtt', 'RushingYds', 'RushingTD', 'Rec', 'Tgt', 'ReceivingYds', 'ReceivingTD', 'FL']
        self.load_data()
        
    def load_data(self):
        years = range(self.beginning, self.end)
        data = pd.DataFrame()
        for y in years:
            for w in range(1, 18):
                new_data = pd.read_csv("data\\weekly\\" + str(y) + "\\week" + str(w) + ".csv")
                new_data = new_data.assign(Year = y, Week = w)
                data = data.append(new_data)
        if not self.position is None:
            data = data[self.position in data['Pos']]
        for i in range(1, self.offset + 1):
            data[[s + '-' + str(i) for s in self.stats]] = data.groupby('Player')[self.stats].shift(i)
        if drop_na:
            data = data.dropna()
        data = data.reset_index()
        cols = data.columns.tolist()
        cols.remove("PPRFantasyPoints")
        cols.remove("StandardFantasyPoints") 
        cols.remove("HalfPPRFantasyPoints")
        cols.remove("Year")
        cols.remove("Week")
        cols = ["Year", "Week"] + cols[1:-3]
        self.data = data[cols]

In [9]:
class LinearModel(FantasyModel):
    def __init__(self, pos = None, year_range = [1999, 2019], offset = 17, alpha = 1.0, drop_na=True):
        FantasyModel.__init__(self, pos = pos, year_range = year_range, offset = offset)
        self.alpha = alpha
        self.models = {s: Lasso(alpha = self.alpha) for s in self.stats}
    
    def train(self):
        for s in self.stats:
            self.models[s].fit(self.data.iloc[:,18:], self.data[s])
    
    def predict(self, X):
        return {s: self.models[s].predict(X)[0] for s in self.stats}
    
    def loss(self, data):
        multipliers = {'PassingYds': 0.04, 'PassingTD': 4, 'Int': 2, 'PassingAtt': 0, 'Cmp': 0, \
               'RushingAtt': 0, 'RushingYds': 0.1, 'RushingTD': 6, 'Rec': 1, 'Tgt': 0, \
               'ReceivingYds': 0.1, 'ReceivingTD': 6, 'FL': 2}
        sum = 0
        n = len(data)
        for _, x in data.iterrows():
            prediction = self.predict(np.asarray(x[18:]).reshape(1, -1))
            for s in self.stats:
                sum += multipliers[s] * (x[s] - prediction[s])**2
        return sum / n

In [10]:
lm = LinearModel(year_range = [1999, 2018])
lm.train()

In [11]:
print(lm.predict(np.asarray(data.iloc[-1000, 18:]).reshape(1, -1)))
print(data.iloc[-1000, :18])

{'PassingYds': 194.15847184802575, 'PassingTD': 1.2998474880666004, 'Int': 0.6737236305041523, 'PassingAtt': 27.228109671999423, 'Cmp': 17.308637490045662, 'RushingAtt': 5.099605791877995, 'RushingYds': 21.059910421243934, 'RushingTD': 0.1350024734620691, 'Rec': 0.33159645518745196, 'Tgt': 0.28822765251433413, 'ReceivingYds': 1.0850507437171002, 'ReceivingTD': 0.03603671940838342, 'FL': 0.17242310855959303}
Year                      2019
Week                        13
Player          Ryan Tannehill
Pos                         QB
Tm                         TEN
PassingYds                 182
PassingTD                    2
Int                          0
PassingAtt                  22
Cmp                         17
RushingAtt                   4
RushingYds                   5
RushingTD                    0
Rec                          0
Tgt                          0
ReceivingYds                 0
ReceivingTD                  0
FL                           1
Name: 62184, dtype: object


In [12]:
print(lm.loss(data[data['Year'] == 2019]))

147.522618221084


In [14]:
alpha = [0.1, 1, 10]
for a in alpha:
    lm = LinearModel(year_range = [1999, 2018], alpha = a)
    lm.train()
    print(a, ":", lm.loss(data[data['Year'] == 2019]))

0.1 : 147.3318737189492
1 : 147.522618221084
10 : 149.05233964373494
