In [101]:
import skforecast
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('seaborn-v0_8-darkgrid')
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error
from dateutil.relativedelta import relativedelta
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import backtesting_forecaster
from skforecast.model_selection import grid_search_forecaster
from dataclasses import dataclass

In [109]:
df = pd.read_csv('database/DIM_PLAYER_GAMES.csv')
test = list(df['PLAYER'].unique()[:100])
data = df[df['PLAYER'].isin(test)]
test

['Christian McCaffrey',
 'Tyreek Hill',
 'CeeDee Lamb',
 'Josh Allen',
 'Raheem Mostert',
 'Jalen Hurts',
 'Keenan Allen',
 'Mike Evans',
 'A.J. Brown',
 'Travis Etienne',
 "Ja'Marr Chase",
 'Stefon Diggs',
 'Travis Kelce',
 'Dak Prescott',
 'Amon-Ra St. Brown',
 'Sam LaPorta',
 'T.J. Hockenson',
 'George Kittle',
 'Nico Collins',
 'D.J. Moore',
 'Derrick Henry',
 'Puka Nacua',
 'Brandon Aiyuk',
 'Kyren Williams',
 'Brian Robinson Jr.',
 'Taysom Hill',
 'C.J. Stroud',
 'Sam Howell',
 'Mark Andrews',
 'Isiah Pacheco',
 'DeVonta Smith',
 'Joe Mixon',
 'David Montgomery',
 'Josh Jacobs',
 'Tank Dell',
 'D.K. Metcalf',
 'Justin Herbert',
 'Deebo Samuel',
 'Rachaad White',
 'Brock Purdy',
 'Michael Pittman Jr.',
 'Bijan Robinson',
 'Gus Edwards',
 'Jake Ferguson',
 'Courtland Sutton',
 'Jordan Addison',
 'Cole Kmet',
 'Patrick Mahomes',
 'Lamar Jackson',
 'DeAndre Hopkins',
 'Jordan Love',
 'Tony Pollard',
 'Alvin Kamara',
 'Chris Olave',
 "D'Andre Swift",
 'Dalton Schultz',
 'Davante Adams

In [111]:
@dataclass
class Predict:
    df: pd.DataFrame
    players: list
    steps: int
    lags: int
   
    
    def __post_init__(self):
        self.results = []

    def predict(self, player: str):
        data = self.df.loc[self.df["PLAYER"] == player]
        data["FORECAST_DATE"] = pd.date_range(
            "2018-01-01", periods=len(data), freq="MS"
        )
        data["FORECAST_DATE"] = pd.to_datetime(data["FORECAST_DATE"], format="%Y-%m-%d")
        data = data.set_index("FORECAST_DATE")
        data = data.asfreq("MS")
        data = data["PPR"]
        end_train = max(data.index) - relativedelta(months=self.steps)
        forecaster = ForecasterAutoreg(
            regressor=RandomForestRegressor(random_state=123), lags=self.lags
        )

        forecaster.fit(y=data.loc[: max(data.index) - relativedelta(months=self.steps)])
        predictions = forecaster.predict(steps=self.steps+1)
        error_mape = mean_absolute_percentage_error(
            y_true=data.loc[end_train:], y_pred=predictions
        )
        self.results.append(
            [
                player,
                np.sum(data.loc[end_train:]),
                np.sum(predictions),
                abs(np.sum(data.loc[end_train:]) - np.sum(predictions))
                / np.sum(data.loc[end_train:]),
                error_mape,
                np.size(data.loc[end_train:]),
                forecaster.regressor,
            ]
        )
    def predict_all(self):
        for player in self.players:
            try:
                self.predict(player=player)
            
            except ValueError:
                print(player)

        return self.results



ValueError: mutable default <class 'list'> for field results is not allowed: use default_factory

In [110]:
df = pd.read_csv("database/DIM_PLAYER_GAMES.csv")
results = []
for player in test:
    data = df.loc[df["PLAYER"] == player]
    data["FORECAST_DATE"] = pd.date_range("2018-01-01", periods=len(data), freq="MS")
    data["FORECAST_DATE"] = pd.to_datetime(data["FORECAST_DATE"], format="%Y-%m-%d")
    data = data.set_index("FORECAST_DATE")
    data = data.asfreq("MS")
    data = data["PPR"]
    end_train = max(data.index) - relativedelta(months=16)
    forecaster = ForecasterAutoreg(regressor=RandomForestRegressor(random_state=123), lags=7)

    forecaster.fit(y=data.loc[: max(data.index) - relativedelta(months=16)])
    predictions = forecaster.predict(steps=17)
    error_mape = mean_absolute_percentage_error(
        y_true=data.loc[end_train:], y_pred=predictions
    )
    results.append(
        [
            player,
            np.sum(data.loc[end_train:]),
            np.sum(predictions),
            abs(np.sum(data.loc[end_train:]) - np.sum(predictions))
            / np.sum(data.loc[end_train:]),
            error_mape,
            np.size(data.loc[end_train:]),
            forecaster.regressor,
        ]
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["FORECAST_DATE"] = pd.date_range("2018-01-01", periods=len(data), freq="MS")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["FORECAST_DATE"] = pd.to_datetime(data["FORECAST_DATE"], format="%Y-%m-%d")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["FORECAST_DATE"] = pd.date_range("201

ValueError: The maximum lag (7) must be less than the length of the series (0).

In [108]:
df_results = pd.DataFrame(results, columns=['PLAYER', 'ACTUAL SEASON TOTAL', 'PREDICTED', 'SEASON MAPE',
                                             'GAME MAPE', 'GAMES PREDICTED', 'REGRESSOR'])
df_results

Unnamed: 0,PLAYER,ACTUAL SEASON TOTAL,PREDICTED,SEASON MAPE,GAME MAPE,GAMES PREDICTED,REGRESSOR
0,Christian McCaffrey,416.2,434.6,0.04421,0.300495,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
1,Tyreek Hill,388.9,316.803,0.185387,0.504053,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
2,CeeDee Lamb,363.8,238.922,0.34326,0.368047,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
3,Josh Allen,436.86,406.5198,0.069451,0.234079,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
4,Raheem Mostert,276.5,168.131,0.391931,0.395593,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
5,Jalen Hurts,452.56,375.1932,0.170954,0.284458,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
6,Keenan Allen,369.26,284.34,0.229973,0.357634,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
7,Mike Evans,308.4,246.081,0.202072,0.569348,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
8,A.J. Brown,333.0,285.373,0.143024,0.788248,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
9,Travis Etienne,266.2,54.4,0.795642,0.698151,17,"(DecisionTreeRegressor(max_features=1.0, rando..."
