In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import statistics as math
import random as rd

import seaborn as sea
import matplotlib.pyplot as plt
%matplotlib inline
rd.seed(23232301)

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

from predictMethods import *

In [2]:
base_path = "C:/Users/cmqua/Documents/Fantasy-Points-Prediction-Project/data"
ff24 = pd.read_csv(f"{base_path}/fantasyFootball2024.csv")
ff23 = pd.read_csv(f"{base_path}/fantasyFootball2023.csv")
ff22 = pd.read_csv(f"{base_path}/fantasyFootball2022.csv")
ff21 = pd.read_csv(f"{base_path}/fantasyFootball2021.csv")

In [3]:
stats = cleanStats([ff24, ff23, ff22, ff21])

In [4]:
qbs = quarterbacks(stats)

In [5]:
rbs = runningBacks(stats)

In [6]:
wrs = wideReceivers(stats)

In [7]:
tes = tightEnds(stats)

In [15]:
def trainModel(stats, estimators):
    x = stats.drop(columns = ['fantasy_points'])
    y = stats[['fantasy_points', 'player']]
            
    x_train = x[x["player"] != player].drop(columns = ['player'])
    y_train = y[y["player"] != player].drop(columns = ['player'])
            
    model = RandomForestRegressor(n_estimators=estimators, random_state=42)
    model.fit(x_train, y_train.values.ravel())

    return model

In [33]:
#te_rankings = pd.DataFrame(columns = ['TE', 'Points'])
qb_rankings = []
for player in qbs['player']:
    ypred = getPredictedFantasyPoints(qbs, player, 170)
    pts = round(sum(ypred)/len(ypred), 2)
    qb_rankings.append((player, pts))

qb_rankings = pd.DataFrame(qb_rankings, columns = ['QB', 'Points'])
qb_rankings = qb_rankings.drop_duplicates(subset=['QB']).sort_values(by='Points', ascending=False)
qb_rankings.to_csv(f"{base_path}/qb_rankings.csv")

In [34]:
qb_rankings.head()

Unnamed: 0,QB,Points
49,Patrick Mahomes,349.18
1,Baker Mayfield,342.48
51,Joe Burrow,340.13
23,Dak Prescott,323.12
3,Jared Goff,304.96


In [18]:
qb_rankings = []
qb_model = trainModel(qbs, 155)
#x_test = x[x["player"] == player].drop(columns = ['player'])
for player in qbs['player']:
    #ypred = getPredictedFantasyPoints(qbs, player, 155)
    x = qbs.drop(columns = ['fantasy_points'])
    x_test = x[x["player"] == player].drop(columns = ['player'])
    ypred = qb_model.predict(x_test)
    pts = round(sum(ypred)/len(ypred), 2)
    qb_rankings.append((player, pts))

qb_rankings = pd.DataFrame(qb_rankings, columns = ['QB', 'Points'])
qb_rankings = qb_rankings.drop_duplicates(subset=['QB']).sort_values(by='Points', ascending=False)
qb_rankings.head()

Unnamed: 0,QB,Points
49,Patrick Mahomes,373.44
0,Josh Allen,369.69
51,Joe Burrow,332.53
2,Jayden Daniels,323.05
23,Dak Prescott,322.26


In [19]:
qb_rankings.to_csv(f"{base_path}/secondary_qb_rankings.csv")

In [37]:
#rb_rankings = pd.DataFrame(columns = ['RB', 'Points'])
rb_rankings = []
for player in rbs['player']:
    ypred = getPredictedFantasyPoints(rbs, player, 290)
    pts = round(sum(ypred)/len(ypred), 2)
    rb_rankings.append((player, pts))

rb_rankings = pd.DataFrame(rb_rankings, columns = ['RB', 'Points'])
rb_rankings = rb_rankings.drop_duplicates(subset=['RB']).sort_values(by='Points', ascending=False)
rb_rankings.to_csv(f"{base_path}/rb_rankings.csv")

In [23]:
rb_rankings = []
rb_model = trainModel(rbs, 290)
#x_test = x[x["player"] == player].drop(columns = ['player'])
for player in rbs['player']:
    #ypred = getPredictedFantasyPoints(qbs, player, 155)
    x = rbs.drop(columns = ['fantasy_points'])
    x_test = x[x["player"] == player].drop(columns = ['player'])
    ypred = rb_model.predict(x_test)
    pts = round(sum(ypred)/len(ypred), 2)
    rb_rankings.append((player, pts))

rb_rankings = pd.DataFrame(rb_rankings, columns = ['RB', 'Points'])
rb_rankings = rb_rankings.drop_duplicates(subset=['RB']).sort_values(by='Points', ascending=False)
rb_rankings.head()

Unnamed: 0,RB,Points
2,Jahmyr Gibbs,262.88
3,Bijan Robinson,258.52
1,Derrick Henry,252.26
10,Joe Mixon,230.35
4,Josh Jacobs,226.89


In [24]:
rb_rankings.to_csv(f"{base_path}/secondary_rb_rankings.csv")

In [38]:
#wr_rankings = pd.DataFrame(columns = ['WR', 'Points'])
rankings = []
for player in wrs['player']:
    ypred = getPredictedFantasyPoints(wrs, player, 120)
    pts = round(sum(ypred)/len(ypred), 2)
    rankings.append((player, pts))

wr_rankings = pd.DataFrame(rankings, columns = ['WR', 'Points'])
wr_rankings = wr_rankings.drop_duplicates(subset=['WR']).sort_values(by='Points', ascending=False)
wr_rankings.to_csv(f"{base_path}/wr_rankings.csv")

In [27]:
wr_rankings = []
wr_model = trainModel(wrs, 120)
#x_test = x[x["player"] == player].drop(columns = ['player'])
for player in wrs['player']:
    #ypred = getPredictedFantasyPoints(qbs, player, 155)
    x = wrs.drop(columns = ['fantasy_points'])
    x_test = x[x["player"] == player].drop(columns = ['player'])
    ypred = wr_model.predict(x_test)
    pts = round(sum(ypred)/len(ypred), 2)
    wr_rankings.append((player, pts))

wr_rankings = pd.DataFrame(wr_rankings, columns = ['WR', 'Points'])
wr_rankings = wr_rankings.drop_duplicates(subset=['WR']).sort_values(by='Points', ascending=False)
wr_rankings.head()

Unnamed: 0,WR,Points
22,Tyreek Hill,251.34
0,Ja'Marr Chase,250.22
1,Justin Jefferson,249.51
11,Davante Adams,244.34
8,CeeDee Lamb,242.4


In [28]:
wr_rankings.to_csv(f"{base_path}/secondary_wr_rankings.csv")

In [32]:
#te_rankings = pd.DataFrame(columns = ['TE', 'Points'])
te_rankings = []
for player in tes['player']:
    ypred = getPredictedFantasyPoints(tes, player, 170)
    pts = round(sum(ypred)/len(ypred), 2)
    te_rankings.append((player, pts))

te_rankings = pd.DataFrame(te_rankings, columns = ['TE', 'Points'])
te_rankings = te_rankings.drop_duplicates(subset=['TE']).sort_values(by='Points', ascending=False)
te_rankings.to_csv(f"{base_path}/te_rankings.csv")

In [35]:
te_rankings.head()

Unnamed: 0,TE,Points
0,Brock Bowers,194.44
1,Trey McBride,188.8
5,Travis Kelce,164.24
54,Zach Ertz,158.18
14,Sam LaPorta,153.85


In [29]:
te_rankings = []
te_model = trainModel(tes, 155)
#x_test = x[x["player"] == player].drop(columns = ['player'])
for player in tes['player']:
    #ypred = getPredictedFantasyPoints(qbs, player, 155)
    x = tes.drop(columns = ['fantasy_points'])
    x_test = x[x["player"] == player].drop(columns = ['player'])
    ypred = te_model.predict(x_test)
    pts = round(sum(ypred)/len(ypred), 2)
    te_rankings.append((player, pts))

te_rankings = pd.DataFrame(te_rankings, columns = ['TE', 'Points'])
te_rankings = te_rankings.drop_duplicates(subset=['TE']).sort_values(by='Points', ascending=False)
te_rankings.head()

Unnamed: 0,TE,Points
0,Brock Bowers,199.65
5,Travis Kelce,194.99
1,Trey McBride,184.36
14,Sam LaPorta,178.21
3,Mark Andrews,159.55


In [30]:
te_rankings.to_csv(f"{base_path}/secondary_te_rankings.csv")