In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import statistics as math
import random as rd

import seaborn as sea
import matplotlib.pyplot as plt
%matplotlib inline
rd.seed(23232301)

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

In [2]:
ff24 = pd.read_csv("fantasyFootball2024.csv")
ff23 = pd.read_csv("fantasyFootball2023.csv")
ff22 = pd.read_csv("fantasyFootball2022.csv")
ff21 = pd.read_csv("fantasyFootball2021.csv")

In [3]:
ff24

Unnamed: 0,Rk,Player,Tm,FantPos,Age,G,GS,Cmp,Att,Yds,...,2PM,2PP,FantPt,PPR,DKPt,FDPt,VBD,PosRank,OvRank,-9999
0,1,Saquon Barkley*+,PHI,RB,27,16,16,0.0,0.0,0.0,...,3.0,,322.0,355.3,362.3,338.8,163.0,1,1.0,BarkSa00
1,2,Derrick Henry*,BAL,RB,30,17,17,0.0,0.0,0.0,...,,,317.0,336.4,343.4,326.9,159.0,2,2.0,HenrDe00
2,3,Jahmyr Gibbs*,DET,RB,22,17,4,0.0,0.0,0.0,...,,,311.0,362.9,369.9,336.9,153.0,3,3.0,GibbJa01
3,4,Lamar Jackson*+,BAL,QB,27,17,17,316.0,474.0,4172.0,...,1.0,,430.0,430.4,445.4,434.4,140.0,1,4.0,JackLa00
4,5,Ja'Marr Chase*+,CIN,WR,24,17,16,0.0,0.0,0.0,...,,,276.0,403.0,406.0,339.5,138.0,1,5.0,ChasJa00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
625,626,Dylan Laube,LVR,RB,25,10,0,0.0,0.0,0.0,...,,,-2.0,-2.0,-1.0,-2.0,,156,,LaubDy00
626,627,Steven Sims,2TM,WR,27,9,0,0.0,0.0,0.0,...,,,-2.0,-2.0,-1.0,-2.0,,248,,SimsSt00
627,628,Kadarius Toney,CLE,WR,25,3,0,0.0,0.0,0.0,...,,,-2.0,-2.4,-1.4,-2.4,,249,,ToneKa00
628,629,Clayton Tune,ARI,QB,25,6,0,2.0,2.0,8.0,...,,,-2.0,-2.1,-1.1,-2.1,,79,,TuneCl00


In [4]:
def cleanStats(partial_stats): 
    
    if isinstance(partial_stats, list): 
        stats = pd.concat(partial_stats, ignore_index = True)
    else: 
        stats = partial_stats
    
    stats = stats.rename(columns = {
        'Rk': 'rank', 
        'Player': 'player', 
        'Tm': 'team', 
        'Age': 'age',
        'FantPos': 'fantasy_position',
        'G': 'games_played', 
        'GS': 'games_started', 
        'Cmp': 'passes_completed', 
        'Att': 'passes_attempted', 
        'Yds': 'passing_yds',
        'TD': 'passing_tds',
        'Int': 'interceptions',
        'Att.1': 'rushing_attempts', 
        'Yds.1': 'rush_yds', 
        'Y/A': 'rush_yds_per_attempt',
        'TD.1': 'rush_tds',
        'Tgt': 'targets', 
        'Rec': 'receptions', 
        'Yds.2': 'receiving_yds', 
        'Y/R': 'receiving_yds_per_reception',
        'TD.2': 'receiving_tds', 
        'Fmb': 'fumbles', 
        'FL': 'fumbles_lost', 
        'TD.3': 'total_tds', 
        '2PM': '2pt_conversions_made', 
        '2PP': '2pt_conversion_passes',
        'PosRank': 'position_rank', 
        'OvRank': 'overall_rank'
    })
    
    yds = 0.1*(stats['rush_yds'] + stats['receiving_yds']) + 0.04*stats['passing_yds']
    tds = 6*(stats['rush_tds'] + stats['receiving_tds']) + 4*stats['passing_tds']
    recs = 0.5*stats['receptions']
    two = 2*stats['2pt_conversions_made'] + 2*stats['2pt_conversion_passes']
    lost = -2*stats['fumbles'] - 1.5*stats['interceptions'] 
    stats['fantasy_points'] = yds + tds + recs + lost
    
    stats = stats.drop(columns = ['team',
                                  'rank', 
                                  'FantPt', 
                                  'PPR', 
                                  'DKPt', 
                                  'FDPt', 
                                  'VBD',  
                                  '-9999'], axis = 1)
    
    return stats

In [42]:
stats.columns

Index(['player', 'fantasy_position', 'age', 'games_played', 'games_started',
       'passes_completed', 'passes_attempted', 'passing_yds', 'passing_tds',
       'interceptions', 'rushing_attempts', 'rush_yds', 'rush_yds_per_attempt',
       'rush_tds', 'targets', 'receptions', 'receiving_yds',
       'receiving_yds_per_reception', 'receiving_tds', 'fumbles',
       'fumbles_lost', 'total_tds', '2pt_conversions_made',
       '2pt_conversion_passes', 'position_rank', 'overall_rank',
       'fantasy_points'],
      dtype='object')

In [43]:
def quarterbacks(stats): 
    
    qbs = stats[stats['fantasy_position'] == "QB"]
    
    qbs = qbs[['player', 
               'age', 
               'games_played', 
               'passes_completed', 
               'passing_yds', 
               'passing_tds', 
               'interceptions', 
               'rushing_attempts', 
               'rush_yds_per_attempt', 
               'total_tds', 
               '2pt_conversion_passes', 
               'fantasy_points']]
    
    qbs = qbs.dropna()
    
    return qbs

In [6]:
def runningBacks(stats):
    
    rbs = stats[stats['fantasy_position'] == "RB"]
    rbs = rbs[[
        'player', 
        'age', 
        'games_started', 
        'rush_yds', 
        'rush_yds_per_attempt', 
        'targets', 
        'receiving_yds', 
        'fumbles_lost', 
        'total_tds', 
        'fantasy_points'
    ]]
    
    rbs = rbs.dropna()
    
    return rbs
    

In [61]:
wrs.columns

Index(['player', 'fantasy_position', 'age', 'games_played', 'games_started',
       'rushing_attempts', 'rush_yds', 'rush_yds_per_attempt', 'rush_tds',
       'targets', 'receptions', 'receiving_yds', 'receiving_yds_per_reception',
       'receiving_tds', 'fumbles', 'fumbles_lost', 'total_tds',
       '2pt_conversions_made', 'position_rank', 'overall_rank',
       'fantasy_points'],
      dtype='object')

In [67]:
def wideReceivers(stats):
    
    wrs = stats[stats['fantasy_position'] == "WR"]
    
    wrs = wrs[[
        'player', 
        'age',
        'games_started', 
        'rush_yds', 
        'targets', 
        'receptions', 
        'receiving_yds_per_reception', 
        'fumbles_lost', 
        'total_tds', 
        'fantasy_points'
    ]]
    
    wrs = wrs.dropna()
    
    return wrs

In [86]:
def tightEnds(stats):
    
    tes = stats[stats['fantasy_position'] == "TE"]
    
    tes = tes[[
        'player', 
        'age', 
        'games_started', 
        'rushing_attempts', 
        'rush_yds_per_attempt', 
        'targets', 
        'receiving_yds_per_reception', 
        'fumbles_lost', 
        'total_tds',
        'fantasy_points'
    ]]
    
    tes = tes.dropna()
    
    return tes

In [9]:
stats = cleanStats([ff21, ff22, ff23, ff24])

In [10]:
def testPrediction(stats, estimators = 100): 
    
    x = stats.drop(columns = ['fantasy_points', 'player'])
    y = stats[['fantasy_points']]
    
    # 2. Split Data into Training and Testing Sets
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

    # 3. Initialize and Train the Random Forest Regressor
    # n_estimators: Number of trees in the forest
    # random_state: Controls the randomness for reproducibility
    model = RandomForestRegressor(n_estimators=estimators, random_state=42)
    model.fit(x_train, y_train)

    # 4. Make Predictions on the Test Set
    y_pred = model.predict(x_test)

    # 5. Evaluate the Model
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
    

In [44]:
qbs = quarterbacks(stats)
qbs.head()

Unnamed: 0,player,age,games_played,passes_completed,passing_yds,passing_tds,interceptions,rushing_attempts,rush_yds_per_attempt,total_tds,2pt_conversion_passes,fantasy_points
3,Josh Allen,25,17,409.0,4407.0,36.0,15.0,122.0,6.25,6,1.0,394.08
5,Justin Herbert*,23,17,443.0,5014.0,38.0,15.0,63.0,4.79,3,5.0,376.26
12,Patrick Mahomes*,26,17,436.0,4839.0,37.0,13.0,66.0,5.77,2,1.0,354.16
19,Matthew Stafford,33,17,404.0,4886.0,41.0,17.0,32.0,1.34,0,2.0,328.24
22,Dak Prescott,28,16,410.0,4449.0,37.0,10.0,48.0,3.04,1,2.0,303.56


In [56]:
testPrediction(qbs, 155)

  return fit_method(estimator, *args, **kwargs)


Root Mean Squared Error (RMSE): 41.10


In [13]:
def getPredictedFantasyPoints(stats, player, estimators): 
    
    x = stats.drop(columns = ['fantasy_points'])
    y = stats[['fantasy_points', 'player']]
    
    x_test = x[x["player"] == player].drop(columns = ['player'])
    x_train = x[x["player"] != player].drop(columns = ['player'])
    
    y_test = y[y["player"] == player].drop(columns = ['player'])
    y_train = y[y["player"] != player].drop(columns = ['player'])
    
    
    model = RandomForestRegressor(n_estimators=estimators, random_state=42)
    model.fit(x_train, y_train)

    # 4. Make Predictions on the Test Set
    y_pred = model.predict(x_test)

    # 5. Evaluate the Model
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"Prediction range: between {round(y_pred[0], 2)} and {round(y_pred[1], 2)} total points")
    print(f"Prediction average per week: between {round(y_pred[0]/14, 2)} and {round(y_pred[1]/14, 2)} points per week")
    
    
    return y_pred
    

In [14]:
pred = getPredictedFantasyPoints(qbs, "Josh Allen", 179)

  return fit_method(estimator, *args, **kwargs)


Root Mean Squared Error (RMSE): 40.63
Prediction range: between 364.03 and 340.66 total points
Prediction average per week: between 26.0 and 24.33 points per week


In [15]:
import re
split = re.sub('([A-Z][a-z]+)', r' \1', re.sub('([A-Z]+)', r' \1', "JoshAllen")).split()

In [16]:
split

['Josh', 'Allen']

In [17]:
qbs

Unnamed: 0,player,age,games_played,passes_completed,passing_yds,passing_tds,interceptions,rushing_attempts,total_tds,2pt_conversion_passes,fantasy_points
3,Josh Allen,25,17,409.0,4407.0,36.0,15.0,122.0,6,1.0,394.08
5,Justin Herbert*,23,17,443.0,5014.0,38.0,15.0,63.0,3,5.0,376.26
12,Patrick Mahomes*,26,17,436.0,4839.0,37.0,13.0,66.0,2,1.0,354.16
19,Matthew Stafford,33,17,404.0,4886.0,41.0,17.0,32.0,0,2.0,328.24
22,Dak Prescott,28,16,410.0,4449.0,37.0,10.0,48.0,1,2.0,303.56
...,...,...,...,...,...,...,...,...,...,...,...
2104,Mason Rudolph,29,8,146.0,1530.0,9.0,9.0,25.0,1,1.0,90.30
2134,Drew Lock,28,8,107.0,1071.0,6.0,5.0,18.0,2,1.0,74.64
2144,Deshaun Watson,29,7,137.0,1148.0,5.0,3.0,31.0,1,1.0,72.22
2207,Jacoby Brissett,32,8,95.0,826.0,2.0,1.0,15.0,0,1.0,37.74


In [18]:
rbs = runningBacks(stats)

In [19]:
rbs

Unnamed: 0,player,age,games_started,rush_yds,rush_yds_per_attempt,targets,receiving_yds,fumbles_lost,total_tds,fantasy_points
0,Jonathan Taylor*+,22,17,1811.0,5.45,51.0,360.0,2.0,20,349.1
4,Austin Ekeler,26,16,911.0,4.42,94.0,647.0,3.0,20,302.8
10,Joe Mixon*,25,16,1205.0,4.13,48.0,314.0,1.0,16,264.9
13,Najee Harris*,23,17,1200.0,3.91,94.0,467.0,0.0,10,263.7
15,James Conner*,26,6,752.0,3.72,39.0,375.0,0.0,18,235.2
...,...,...,...,...,...,...,...,...,...,...
2515,British Brooks,25,0,2.0,2.00,0.0,0.0,0.0,0,0.2
2525,Darrynton Evans,26,0,3.0,1.00,0.0,0.0,0.0,0,0.3
2544,Joshua Kelley,27,0,2.0,2.00,0.0,0.0,0.0,0,0.2
2582,Ke'Shawn Vaughn,27,0,4.0,2.00,0.0,0.0,0.0,0,0.4


In [20]:
player_stats = stats[stats["player"] == "Jonathan Taylor"]

In [21]:
player_stats.iloc[0]['fantasy_position']

'RB'

In [22]:
rbs = runningBacks(stats)

In [41]:
testPrediction(rbs, 290)

  return fit_method(estimator, *args, **kwargs)


Root Mean Squared Error (RMSE): 5.83


In [68]:
wrs = wideReceivers(stats)

In [82]:
testPrediction(wrs, 120)

  return fit_method(estimator, *args, **kwargs)


Root Mean Squared Error (RMSE): 6.16


In [100]:
tes = tightEnds(stats)
tes.head()

Unnamed: 0,player,age,games_started,rushing_attempts,rush_yds_per_attempt,targets,receiving_yds_per_reception,fumbles_lost,total_tds,fantasy_points
8,Mark Andrews*+,26,9,1.0,0.0,153.0,12.72,0.0,9,241.6
14,Travis Kelce*,32,16,2.0,1.5,134.0,12.23,1.0,10,216.8
36,George Kittle*,28,14,3.0,6.67,94.0,12.82,1.0,6,160.5
47,Taysom Hill,31,9,70.0,5.34,6.0,13.0,0.0,5,118.22
63,Zach Ertz,31,14,1.0,4.0,112.0,10.31,0.0,5,143.7


In [99]:
testPrediction(tes, 170)

  return fit_method(estimator, *args, **kwargs)


Root Mean Squared Error (RMSE): 28.69
