In [1]:
# Import dependencies
import pandas as pd
from sklearn import linear_model
from sqlalchemy import create_engine

In [2]:
#Import functions
def model(df, position, var):
    df = df.dropna()
    df = df[df['pos'] == position]   

    # the model will use different independent variables depending on position
    if position == 'QB':
        X = df[var]
    elif position == 'RB':
        X = df[var]
    elif position == 'WR' or 'TE':
        X = df[var]
    else:
        print('Invalid position entered')
        return

    y = df['next_yr_ppg']
    reg = linear_model.LinearRegression()
    reg.fit(X, y)
    return reg

def testModelAccuracy(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return

    yTest = df['next_yr_ppg']
    results = model.score(XTest, yTest)
    return results

def testModelDifference(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return


    yPred = model.predict(XTest)
    predAndActual = {'Name': df['player'], 'Predicted PPG': yPred,
                     'Actual PPG': df['next_yr_ppg']}

    # creates df from dictionary above
    database = pd.DataFrame(predAndActual)

    # creates a difference column which depicts the difference between the
    # predicted PPG and actual PPG
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals=3)
    database['Difference'] = database['Predicted PPG'] - database['Actual PPG']
    database['Difference'] = database['Difference'].round(decimals=3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)

    return database, meanDiff, medianDiff, meanAbsDiff, medianAbsDiff

def testModel(model, test, train, position, var):
    accuracy = testModelAccuracy(model, train, position, var)
    differences = testModelDifference(model, test, position, var)
    meanDiff = differences[1]
    medDiff = differences[2]
    meanAbsDiff = differences[3]
    medAbsDiff = differences[4]

    print('The accuracy of the {0} model is {1}'.format(position, accuracy))
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medDiff, medAbsDiff))
    print('\n')
    return

def checkModel(model, df1, df2, position, var):
    df1.dropna()
    df2.dropna()
    # checks where the Fant Pos is the position given and returns a data frame
    # with only the rows that include said position
    df1 = df1[df1['pos'] == position]
    df2 = df2[df2['pos'] == position]
    df2 = df2[['plid','player','ppr_g']]
    newdf = df1.merge(df2,how='inner',left_on='plid',right_on='plid')

    # the model will use difference parameters based on position
    if position == 'QB':
        X = df1[var]
    elif position == 'RB':
        X = df1[var]
    elif position == 'WR' or 'TE':
        X = df1[var]
    else:
        print('Invalid position entered')
        return
    yPred = model.predict(X)

    # creates new df with the name of player, their position, and their
    # predicted PPG
    databaseDict = {'Name': df1['player'], 'Pos': df1['pos'], 'Predicted PPG': yPred}
    database = pd.DataFrame(databaseDict)
    database = database.merge(df2,how='inner',left_on='Name',right_on='player')
    database = database.drop(columns=['player','plid'])
    database['Difference'] = database['Predicted PPG'] - database['ppr_g']
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals = 3)
    database['Predicted PPR'] = 17 * database['Predicted PPG']
    database['Predicted PPR'] = database['Predicted PPR'].round(decimals = 3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medianDiff, medianAbsDiff))
    database = database.sort_values(by = ['ppr_g'], ascending = False)
    posRank = []
    posRankNum = 1
    for index, row in database.iterrows():
        posRank.append(posRankNum)
        posRankNum += 1

    database['ActPosRank'] = posRank
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
     # this adds a position rank column to the dataframe
    posRank = []
    posRankNum = 1
    for index, row in database.iterrows():
        posRank.append(posRankNum)
        posRankNum += 1

    database['PosRank'] = posRank
    

    database = database.reset_index(drop = True)

    return database

def finalPredict(model, df, position, var):
    df.dropna()
    # checks where the Fant Pos is the position given and returns a data frame
    # with only the rows that include said position
    df = df[df['pos'] == position]

    # the model will use difference parameters based on position
    if position == 'QB':
        X = df[var]
    elif position == 'RB':
        X = df[var]
    elif position == 'WR' or 'TE':
        X = df[var]
    else:
        print('Invalid position entered')
        return
    yPred = model.predict(X)

    # creates new df with the name of player, their position, and their
    # predicted PPG
    databaseDict = {'Name': df['player'], 'Pos': df['pos'], 'Predicted PPG': yPred}
    database = pd.DataFrame(databaseDict)
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals = 3)
    database['Predicted PPR'] = 17 * database['Predicted PPG']
    database['Predicted PPR'] = database['Predicted PPR'].round(decimals = 3)


     #this adds a position rank column to the dataframe
    posRank = []
    posRankNum = 1
    for index, row in df.iterrows():
        posRank.append(posRankNum)
        posRankNum += 1

    database['PosRank'] = posRank


    database = database.reset_index(drop = True)

    return database

In [3]:
# Connect to Database
# Postgres username, password, and database name
POSTGRES_ADDRESS = 'fballfinalproject.c6sg90iemyn2.us-east-2.rds.amazonaws.com' ## INSERT YOUR DB ADDRESS 
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = 'postgres' ## CHANGE THIS TO YOUR POSTGRES USERNAME
POSTGRES_PASSWORD = 'FFForesight5!!' ## CHANGE THIS TO YOUR POSTGRES PASSWORD 
POSTGRES_DBNAME = 'postgres' ## CHANGE THIS TO YOUR DATABASE NAME
# A long string that contains the necessary Postgres login information
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}').format(
    username=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    ipaddress=POSTGRES_ADDRESS,
    port=POSTGRES_PORT,
    dbname=POSTGRES_DBNAME)
# Create the connection
cnx = create_engine(postgres_str)

  """)


In [4]:
# Read in dataframes and merge
# fantasy = pd.read_sql_query('''SELECT * FROM fantasy;''', cnx)
# teams = pd.read_sql_query('''SELECT * FROM teams;''', cnx)
# advrush = pd.read_sql_query('''SELECT * FROM advrush;''', cnx)
# df = pd.merge(fantasy,teams[['tmid','passrate']],on='tmid', how='inner')
# df = pd.merge(df,advrush,on='plid',how='inner')
# df = df.drop(columns=['player_y', 'tm_y', 'pos_y',
#                       'age_y','G_y', 'gs_y','tmid_y'])
# df = df.rename(columns={'player_x':'player','tm_x':'tm','pos_x':'pos','age_x':'age','G_x':'G','gs_x':'gs','tmid_x':'tmid'})
# df
df = pd.read_sql_query('''SELECT fantasy.*,
teams.pasatt_g AS tm_passatt,
teams.passrate,teams.rushatt_g AS tm_rushatt
FROM fantasy 
INNER JOIN teams 
ON fantasy.tmid = teams.tmid;''',cnx)

df2 = pd.read_sql_query('''SELECT fantasy.*,
                        teams.pasatt_g  AS tm_passatt, 
                        teams.passrate, teams.rushatt_g AS tm_rushatt,
                        ar.avgdot, ar.ybc_g, ar.ydsbc_rec, ar.yac_g, ar.ydsac_rec, ar.brktkl_g, ar.drop_g
                        FROM fantasy
                        INNER JOIN teams 
                        ON fantasy.tmid = teams.tmid
                        INNER JOIN advrec AS ar
                        ON fantasy.plid = ar.plid;''',cnx)

In [5]:
df

Unnamed: 0,player,tm,pos,age,G,gs,plid,tmid,cmp_g,passatt_g,...,posrk,ovrank,yr,ppr_g,starter,next_yr_ppg,next_yr_starter,tm_passatt,passrate,tm_rushatt
0,AJBrown,TEN,WR,24,13,13,AJBrown2021,TEN2021,0.000,0.154,...,32,85,2021,13.915,0,,,31.471,90.0,32.412
1,AJBrown,TEN,WR,23,14,12,AJBrown2020,TEN2020,0.000,0.000,...,9,38,2020,17.679,0,13.915,0.0,30.312,105.9,32.562
2,AJBrown,TEN,WR,22,16,11,AJBrown2019,TEN2019,0.000,0.000,...,9,62,2019,13.569,0,17.679,0.0,28.000,108.3,27.812
3,AJDerby,MIA,TE,27,4,0,AJDerby2018,MIA2018,0.000,0.000,...,76,400,2018,3.450,0,,,28.438,93.0,23.188
4,AJDillon,GNB,RB,23,17,2,AJDillon2021,GNB2021,0.000,0.000,...,21,80,2021,10.918,0,,,34.882,107.4,26.235
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5261,ZayJones,LVR,WR,25,16,2,ZayJones2020,LVR2020,0.062,0.062,...,130,321,2020,2.288,0,6.229,0.0,34.438,100.4,28.562
5262,ZayJones,BUF,WR,23,16,15,ZayJones2018,BUF2018,0.000,0.062,...,34,93,2018,10.325,0,3.260,0.0,31.188,62.6,29.250
5263,ZayJones,BUF,WR,22,15,10,ZayJones2017,BUF2017,0.000,0.000,...,90,218,2017,4.707,0,10.325,0.0,29.750,82.1,30.438
5264,ZurlonTipton,IND,RB,25,10,0,ZurlonTipton2015,IND2015,0.000,0.000,...,124,419,2015,1.270,0,,,38.688,77.5,24.750


In [6]:
df2

Unnamed: 0,player,tm,pos,age,G,gs,plid,tmid,cmp_g,passatt_g,...,tm_passatt,passrate,tm_rushatt,avgdot,ybc_g,ydsbc_rec,yac_g,ydsac_rec,brktkl_g,drop_g
0,AJBrown,TEN,WR,24,13,13,AJBrown2021,TEN2021,0.000,0.154,...,31.471,90.0,32.412,11.6,47.769,9.9,19.077,3.9,0.154,0.615
1,AJBrown,TEN,WR,23,14,12,AJBrown2020,TEN2020,0.000,0.000,...,30.312,105.9,32.562,10.9,45.929,9.2,30.857,6.2,0.929,0.357
2,AJBrown,TEN,WR,22,16,11,AJBrown2019,TEN2019,0.000,0.000,...,28.000,108.3,27.812,13.2,36.625,11.3,29.062,8.9,0.500,0.125
3,AJDillon,GNB,RB,23,17,2,AJDillon2021,GNB2021,0.000,0.000,...,34.882,107.4,26.235,0.4,0.176,0.1,18.235,9.1,0.294,0.000
4,AJDillon,GNB,RB,22,11,0,AJDillon2020,GNB2020,0.000,0.000,...,32.875,121.5,27.688,2.5,0.455,2.5,1.455,8.0,0.091,0.000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1404,ZachPascal,IND,WR,25,16,13,ZachPascal2019,IND2019,0.000,0.062,...,32.062,85.0,29.438,10.8,23.250,9.1,14.688,5.7,0.188,0.188
1405,ZackMoss,BUF,RB,24,13,0,ZackMoss2021,BUF2021,0.000,0.000,...,38.529,91.3,27.118,0.8,-0.231,-0.1,15.385,8.7,0.154,0.231
1406,ZackMoss,BUF,RB,23,13,0,ZackMoss2020,BUF2020,0.000,0.000,...,37.250,107.6,25.688,1.8,0.615,0.6,6.692,6.2,0.231,0.000
1407,ZayJones,LVR,WR,26,17,9,ZayJones2021,LVR2021,0.000,0.000,...,36.941,93.8,24.353,14.0,22.118,8.0,10.000,3.6,0.176,0.118


In [7]:
# Manipulate df to add rush and target share
df['rushshare'] = df['rushatt_g'] / df['tm_rushatt']
df['targetshare'] = df['tgt_g'] / df['tm_passatt']
df2['rushshare'] = df2['rushatt_g'] / df2['tm_rushatt']
df2['targetshare'] = df2['tgt_g'] / df2['tm_passatt']

In [8]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False])
df2 = df2.sort_values(by = ['player','yr'], ascending = [True,False])

In [9]:
nextYearTm = []
nextYearTmID = []
lastPlayer = 'NaN'
lastPlayerTm = 'NaN'


for index, row in df.iterrows():
    player = row['player']
    if lastPlayer == player:
        nextYearTm.append(lastPlayerTm)
        yr = row['yr'] + 1
        nextYearTmID.append(lastPlayerTm + str(yr))
    else:
        nextYearTm.append('NaN')
        nextYearTmID.append('NaN')
    lastPlayer = row['player']
    lastPlayerTm = row['tm']

df['NextTm'] = nextYearTm
df['NextTmID'] = nextYearTmID

In [10]:
nextYearTm = []
nextYearTmID = []
lastPlayer = 'NaN'
lastPlayerTm = 'NaN'


for index, row in df2.iterrows():
    player = row['player']
    if lastPlayer == player:
        nextYearTm.append(lastPlayerTm)
        yr = row['yr'] + 1
        nextYearTmID.append(lastPlayerTm + str(yr))
    else:
        nextYearTm.append('NaN')
        nextYearTmID.append('NaN')
    lastPlayer = row['player']
    lastPlayerTm = row['tm']

df2['NextTm'] = nextYearTm
df2['NextTmID'] = nextYearTmID

In [11]:
starters = pd.read_sql_query('''SELECT starter AS qbstarter,
tm,
"Year",
plid AS stid,
tmid
FROM starters;''',cnx)

In [12]:
import numpy as np
df = pd.merge(df,starters[['tmid','qbstarter','stid']],left_on='NextTmID',right_on='tmid',how='outer')
df = df[df['player'].notna()]
df.drop(columns='tmid_y',inplace=True)
df.rename(columns={"tmid_x": "tmid"},inplace=True)
df['age'] = df['age'].apply(np.int64)
df['G'] = df['G'].apply(np.int64)
df['gs'] = df['gs'].apply(np.int64)
df['posrk'] = df['posrk'].apply(np.int64)
df['ovrank'] = df['ovrank'].apply(np.int64)
df['yr'] = df['yr'].apply(np.int64)
df['starter'] = df['starter'].apply(np.int64)


In [13]:
df2 = pd.merge(df2,starters[['tmid','qbstarter','stid']],left_on='NextTmID',right_on='tmid',how='outer')
df2 = df2[df2['player'].notna()]
df2.drop(columns='tmid_y',inplace=True)
df2.rename(columns={"tmid_x": "tmid"},inplace=True)
df2['age'] = df2['age'].apply(np.int64)
df2['G'] = df2['G'].apply(np.int64)
df2['gs'] = df2['gs'].apply(np.int64)
df2['posrk'] = df2['posrk'].apply(np.int64)
df2['ovrank'] = df2['ovrank'].apply(np.int64)
df2['yr'] = df2['yr'].apply(np.int64)
df2['starter'] = df2['starter'].apply(np.int64)

In [14]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)
df2 = df2.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)

In [15]:
connect = []
for index, row in df.iterrows():
    if row['qbstarter'] == 'NaN':
        connect.append('NaN')
    else:
        connect.append(str(row['qbstarter'])+str(row['yr']))
df['connect'] = connect

In [16]:
connect = []
for index, row in df2.iterrows():
    if row['qbstarter'] == 'NaN':
        connect.append('NaN')
    else:
        connect.append(str(row['qbstarter'])+str(row['yr']))
df2['connect'] = connect

In [17]:
startpass = pd.read_sql_query('''SELECT player,
team,
cmp,
att,
yds,
td,
"Int",
avgrate AS averagepassrate,
passrate AS overallpassrate,
"Year",
plid AS startid
FROM passerratings;''',cnx)

In [18]:
df = pd.merge(df,startpass[['startid','averagepassrate','overallpassrate']],left_on='connect',right_on='startid',how='outer')
df = df[df['player'].notna()]
df.drop(columns='startid',inplace=True)
df['age'] = df['age'].apply(np.int64)
df['G'] = df['G'].apply(np.int64)
df['gs'] = df['gs'].apply(np.int64)
df['posrk'] = df['posrk'].apply(np.int64)
df['ovrank'] = df['ovrank'].apply(np.int64)
df['yr'] = df['yr'].apply(np.int64)
df['starter'] = df['starter'].apply(np.int64)

In [19]:
df2 = pd.merge(df2,startpass[['startid','averagepassrate','overallpassrate']],left_on='connect',right_on='startid',how='outer')
df2 = df2[df2['player'].notna()]
df2.drop(columns='startid',inplace=True)
df2['age'] = df2['age'].apply(np.int64)
df2['G'] = df2['G'].apply(np.int64)
df2['gs'] = df2['gs'].apply(np.int64)
df2['posrk'] = df2['posrk'].apply(np.int64)
df2['ovrank'] = df2['ovrank'].apply(np.int64)
df2['yr'] = df2['yr'].apply(np.int64)
df2['starter'] = df2['starter'].apply(np.int64)

In [20]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)
df2 = df2.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)

In [21]:
start22 = starters[starters['Year'] == 2022]

In [22]:
starters2022 = []
for index, row in start22.iterrows():
    starters2022.append(row['qbstarter'])
    print(row['qbstarter'])

KylerMurray
MarcusMariota
LamarJackson
JoshAllen
BakerMayfield
JustinFields
JoeBurrow
DeshaunWatson
DakPrescott
RussellWilson
JaredGoff
AaronRodgers
DavisMills
MattRyan
TrevorLawrence
PatrickMahomes
JustinHerbert
MatthewStafford
DerekCarr
TuaTagovailoa
KirkCousins
JameisWinston
MacJones
DanielJones
ZachWilson
JalenHurts
MitchellTrubisky
GenoSmith
TreyLance
TomBrady
RyanTannehill
CarsonWentz


In [23]:
for index, row in df.iterrows():
    if (row['yr'] == 2021) & (row['pos'] == 'QB'):
        if row['player'] in starters2022:
            df.at[index,'next_yr_starter'] = 1
        else:
            df.at[index,'next_yr_starter'] = 0
    else:
        continue

In [24]:
for index, row in df2.iterrows():
    if (row['yr'] == 2021) & (row['pos'] == 'QB'):
        if row['player'] in starters2022:
            df2.at[index,'next_yr_starter'] = 1
        else:
            df2.at[index,'next_yr_starter'] = 0
    else:
        continue

In [25]:
# List out columns
df.columns

Index(['player', 'tm', 'pos', 'age', 'G', 'gs', 'plid', 'tmid', 'cmp_g',
       'passatt_g', 'passyds_g', 'passtd_g', 'int_g', 'rushatt_g', 'rushyds_g',
       'rushyds_att', 'rushtd_g', 'tgt_g', 'rec_g', 'recyds_g', 'yds_rec',
       'rectd_g', 'fmb_g', 'fl_g', 'tottd_g', '2PM_G', '2PP_G', 'fpts',
       'posrk', 'ovrank', 'yr', 'ppr_g', 'starter', 'next_yr_ppg',
       'next_yr_starter', 'tm_passatt', 'passrate', 'tm_rushatt', 'rushshare',
       'targetshare', 'NextTm', 'NextTmID', 'qbstarter', 'stid', 'connect',
       'averagepassrate', 'overallpassrate'],
      dtype='object')

In [26]:
df2.columns

Index(['player', 'tm', 'pos', 'age', 'G', 'gs', 'plid', 'tmid', 'cmp_g',
       'passatt_g', 'passyds_g', 'passtd_g', 'int_g', 'rushatt_g', 'rushyds_g',
       'rushyds_att', 'rushtd_g', 'tgt_g', 'rec_g', 'recyds_g', 'yds_rec',
       'rectd_g', 'fmb_g', 'fl_g', 'tottd_g', '2PM_G', '2PP_G', 'fpts',
       'posrk', 'ovrank', 'yr', 'ppr_g', 'starter', 'next_yr_ppg',
       'next_yr_starter', 'tm_passatt', 'passrate', 'tm_rushatt', 'avgdot',
       'ybc_g', 'ydsbc_rec', 'yac_g', 'ydsac_rec', 'brktkl_g', 'drop_g',
       'rushshare', 'targetshare', 'NextTm', 'NextTmID', 'qbstarter', 'stid',
       'connect', 'averagepassrate', 'overallpassrate'],
      dtype='object')

In [51]:
# Select columns for each position (WR + TE joined in recvar)
qbvar = ['age','passatt_g','passyds_g','passtd_g','rushatt_g','rushyds_g',
         'rushyds_att','ppr_g','starter','next_yr_starter']
rbvar = ['age','rushatt_g','rushyds_g','rushyds_att','tgt_g','rec_g',
         'recyds_g','yds_rec','ppr_g','rushshare','targetshare']
recvar = ['age','tgt_g','rec_g','recyds_g','yds_rec','avgdot','ybc_g', 
          'ydsbc_rec', 'yac_g', 'ydsac_rec','passrate','targetshare']

In [52]:
qbvar1 = ['age','passatt_g','passyds_g','passtd_g','rushatt_g','rushyds_g',
         'rushyds_att','ppr_g','starter','next_yr_starter']
rbvar1 = ['age','rushatt_g','rushyds_g','rushyds_att','tgt_g','rec_g',
         'recyds_g','yds_rec','ppr_g','brktkl_g','rushshare','targetshare']
recvar1 = ['age','tgt_g','rec_g','recyds_g','yds_rec','avgdot','ybc_g', 
          'ydsbc_rec', 'yac_g', 'ydsac_rec','passrate','targetshare']

In [53]:
# Separate into training, testing, using, and actual data by Yr (Fantasy) or Year (other dataframes)
train = df[df['yr'] <= 2020]
test = df[(df['yr'] >= 2020) & (df['yr'] <= 2021)]
use = df[df['yr'] == 2020]
use = use[use['next_yr_starter'].notna()]
use = use.astype({"next_yr_starter": int})
use = use[use['overallpassrate'].notna()]
use = use.astype({"overallpassrate": int})
actuals = df[df['yr'] == 2021]

In [54]:
# Separate into training, testing, using, and actual data by Yr (Fantasy) or Year (other dataframes)
train2 = df2[df2['yr'] <= 2020]
test2 = df2[(df2['yr'] >= 2020) & (df2['yr'] <= 2021)]
use2 = df2[df2['yr'] == 2020]
use2 = use2[use2['next_yr_starter'].notna()]
use2 = use2.astype({"next_yr_starter": int})
use2 = use2[use2['overallpassrate'].notna()]
use2 = use2.astype({"overallpassrate": int})
actuals2 = df2[df2['yr'] == 2021]

In [55]:
# Name model and put in position
QBmodel = model(train, "QB", qbvar)
RBmodel = model(train2, "RB", rbvar)
WRmodel = model(train2, "WR", recvar)
TEmodel = model(train2, "TE", recvar)
testModel(QBmodel, test, train, "QB", qbvar)
testModel(RBmodel, test2, train2, "RB", rbvar)
testModel(WRmodel, test2, train2, "WR", recvar)
testModel(TEmodel, test2, train2, "TE", recvar)

The accuracy of the QB model is 0.6968358574620971
The QB model has an average error of 0.152 PPG and an average absolute error of 2.912 PPG
The QB model has a median error of 0.418 PPG and a median absolute error of 2.471 PPG


The accuracy of the RB model is 0.639051375244476
The RB model has an average error of -0.062 PPG and an average absolute error of 2.558 PPG
The RB model has a median error of 0.272 PPG and a median absolute error of 1.914 PPG


The accuracy of the WR model is 0.6376890914549966
The WR model has an average error of 0.309 PPG and an average absolute error of 2.569 PPG
The WR model has a median error of 0.767 PPG and a median absolute error of 2.114 PPG


The accuracy of the TE model is 0.7124162838122609
The TE model has an average error of 0.058 PPG and an average absolute error of 1.855 PPG
The TE model has a median error of 0.544 PPG and a median absolute error of 1.766 PPG




In [56]:
# Use model to make predictions and check predictions
QBStats = checkModel(QBmodel, use, actuals, 'QB', qbvar)
RBStats = checkModel(RBmodel, use2, actuals2,'RB', rbvar)
WRStats = checkModel(WRmodel, use2, actuals2, 'WR', recvar)
TEStats = checkModel(TEmodel, use2, actuals2, 'TE', recvar)

The QB model has an average error of 0.152 PPG and an average absolute error of 2.912 PPG
The QB model has a median error of 0.418 PPG and a median absolute error of 2.471 PPG
The RB model has an average error of -0.062 PPG and an average absolute error of 2.558 PPG
The RB model has a median error of 0.272 PPG and a median absolute error of 1.914 PPG
The WR model has an average error of 0.4 PPG and an average absolute error of 2.511 PPG
The WR model has a median error of 0.774 PPG and a median absolute error of 2.114 PPG
The TE model has an average error of 0.058 PPG and an average absolute error of 1.855 PPG
The TE model has a median error of 0.544 PPG and a median absolute error of 1.766 PPG


In [33]:
#pd.set_option('display.max_rows', None)
QBStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
0,JoshAllen,QB,20.466,23.682,-3.215617,347.922,3.215617,1,1
1,DakPrescott,QB,20.17,20.038,0.132371,342.89,0.132371,8,2
2,PatrickMahomes,QB,18.991,21.276,-2.285492,322.847,2.285492,5,3
3,KylerMurray,QB,18.987,21.464,-2.477068,322.779,2.477068,4,4
4,JustinHerbert,QB,18.618,22.4,-3.782013,316.506,3.782013,2,5
5,LamarJackson,QB,18.576,20.0,-1.423992,315.792,1.423992,9,6
6,AaronRodgers,QB,18.505,20.831,-2.326339,314.585,2.326339,6,7
7,TomBrady,QB,18.132,22.041,-3.9092,308.244,3.9092,3,8
8,RyanTannehill,QB,17.994,15.788,2.206099,305.898,2.206099,15,9
9,JaredGoff,QB,17.821,13.893,3.927837,302.957,3.927837,22,10


In [34]:
QBStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
23,DanielJones,QB,15.191,15.227,-0.036338,258.247,0.036338,16,24
11,RussellWilson,QB,17.472,17.343,0.128886,297.024,0.128886,13,12
1,DakPrescott,QB,20.17,20.038,0.132371,342.89,0.132371,8,2
22,JimmyGaroppolo,QB,15.47,15.167,0.303054,262.99,0.303054,18,23
40,ColtMcCoy,QB,4.858,5.162,-0.304122,82.586,0.304122,37,41
27,CamNewton,QB,11.334,10.8,0.533773,192.678,0.533773,30,28
43,TimBoyle,QB,4.317,4.86,-0.542608,73.389,0.542608,39,44
37,MikeGlennon,QB,5.701,5.15,0.551017,96.917,0.551017,38,38
28,GardnerMinshewII,QB,7.635,8.425,-0.78961,129.795,0.78961,35,29
45,PJWalker,QB,3.614,2.76,0.854464,61.438,0.854464,41,46


In [35]:
RBStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
0,ChristianMcCaffrey,RB,22.774,18.214,4.560025,387.158,4.560025,5,1
1,DalvinCook,RB,19.102,15.869,3.232624,324.734,3.232624,9,2
2,DerrickHenry,RB,17.379,24.162,-6.783119,295.443,6.783119,1,3
3,DavidMontgomery,RB,17.000,15.000,2.000041,289.000,2.000041,13,4
4,AustinEkeler,RB,16.929,21.488,-4.559423,287.793,4.559423,3,5
...,...,...,...,...,...,...,...,...,...
84,MichaelBurton,RB,1.042,0.919,0.122546,17.714,0.122546,79,85
85,ReggieGilliam,RB,1.042,0.350,0.692199,17.714,0.692199,86,86
86,JonathanWard,RB,1.013,0.746,0.266716,17.221,0.266716,84,87
87,KhariBlasingame,RB,0.814,0.091,0.723143,13.838,0.723143,88,88


In [36]:
RBStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
57,TravisHomer,RB,4.440,4.414,0.025620,75.480,0.025620,56,58
46,BostonScott,RB,6.092,6.162,-0.070115,103.564,0.070115,44,47
37,ZackMoss,RB,8.018,8.092,-0.073727,136.306,0.073727,36,38
35,SonyMichel,RB,8.492,8.606,-0.113919,144.364,0.113919,33,36
84,MichaelBurton,RB,1.042,0.919,0.122546,17.714,0.122546,79,85
...,...,...,...,...,...,...,...,...,...
2,DerrickHenry,RB,17.379,24.162,-6.783119,295.443,6.783119,1,3
23,JamesConner,RB,10.227,17.180,-6.952540,173.859,6.952540,7,24
60,DarrelWilliams,RB,4.326,11.529,-7.202719,73.542,7.202719,23,61
11,JonathanTaylor,RB,13.590,21.947,-8.356550,231.030,8.356550,2,12


In [37]:
WRStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
0,DavanteAdams,WR,21.147,21.519,-0.371935,359.499,0.371935,2,1
1,DeeboSamuel,WR,17.963,21.188,-3.224898,305.371,3.224898,3,2
2,StefonDiggs,WR,17.283,16.794,0.488518,293.811,0.488518,9,3
3,AJBrown,WR,16.728,13.915,2.813008,284.376,2.813008,26,4
4,DeAndreHopkins,WR,16.617,14.720,1.897037,282.489,1.897037,18,5
...,...,...,...,...,...,...,...,...,...
119,JamesProche,WR,1.603,2.586,-0.983003,27.251,0.983003,95,120
120,AshtonDulin,WR,1.369,3.029,-1.659721,23.273,1.659721,92,121
121,DiontaeSpencer,WR,1.275,-0.220,1.494799,21.675,1.494799,124,122
122,JakeKumerow,WR,1.123,0.320,0.802874,19.091,0.802874,120,123


In [38]:
WRStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
81,DevinDuvernay,WR,5.211,5.200,0.010781,88.587,0.010781,74,82
13,BrandinCooks,WR,14.456,14.488,-0.031775,245.752,0.031775,20,14
90,TylerJohnson,WR,4.164,4.235,-0.071231,70.788,0.071231,79,91
123,DedeWestbrook,WR,0.993,1.120,-0.126820,16.881,0.126820,109,124
31,JarvisLandry,WR,11.213,11.083,0.130097,190.621,0.130097,35,32
...,...,...,...,...,...,...,...,...,...
14,RobbyAnderson,WR,14.304,8.147,6.157399,243.168,6.157399,52,15
96,VanJefferson,WR,3.678,9.894,-6.215866,62.526,6.215866,40,97
39,KekeCoutee,WR,10.426,0.750,9.676110,177.242,9.676110,116,40
11,WillFuller,WR,14.863,4.300,10.563320,252.671,10.563320,78,12


In [39]:
TEStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
0,TravisKelce,TE,17.651,16.425,1.225619,300.067,1.225619,2,1
1,GeorgeKittle,TE,15.438,14.143,1.294699,262.446,1.294699,4,2
2,DarrenWaller,TE,14.026,12.136,1.890149,238.442,1.890149,6,3
3,MarkAndrews,TE,12.595,17.712,-5.117435,214.115,5.117435,1,4
4,MikeGesicki,TE,10.475,9.706,0.769376,178.075,0.769376,12,5
...,...,...,...,...,...,...,...,...,...
62,LukeStocker,TE,0.811,0.267,0.543595,13.787,0.543595,63,63
63,JohnnyMundt,TE,0.596,0.317,0.279338,10.132,0.279338,61,64
64,CharlieWoerner,TE,0.476,0.600,-0.123766,8.092,0.123766,57,65
65,JeremySprinkle,TE,0.412,0.359,0.053200,7.004,0.053200,60,66


In [40]:
TEStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank
60,EricSaubert,TE,1.071,1.100,-0.028535,18.207,0.028535,51,61
27,AnthonyFirkser,TE,5.311,5.273,0.038398,90.287,0.038398,24,28
65,JeremySprinkle,TE,0.412,0.359,0.053200,7.004,0.053200,60,66
59,BlakeBell,TE,1.240,1.169,0.071184,21.080,0.071184,49,60
19,RobertTonyan,TE,6.416,6.300,0.116168,109.072,0.116168,21,20
...,...,...,...,...,...,...,...,...,...
35,TylerConklin,TE,3.573,8.135,-4.562059,60.741,4.562059,15,36
3,MarkAndrews,TE,12.595,17.712,-5.117435,214.115,5.117435,1,4
12,OJHoward,TE,7.590,1.971,5.619188,129.030,5.619188,46,13
51,MaxxWilliams,TE,2.460,8.260,-5.800441,41.820,5.800441,14,52


In [41]:
def combineData(listOfDataFrames):
    # creates a df which contains the dataframes in the list which is passed in
    df = pd.concat(listOfDataFrames)
    df = df.sort_values(by = ['Predicted PPR'], ascending = False)

    # gives players an overall ranking
    rankings = []
    rank = 1
    for index, row in df.iterrows():
        rankings.append(rank)
        rank += 1

    df['Ovr Rank'] = rankings

    df = df.reset_index(drop = True)
    return df

In [42]:
data = [QBStats, RBStats, WRStats, TEStats]
fullData = combineData(data)
fullData.to_csv('predictedAdvStats2021.csv')

In [43]:
fullData

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank,Ovr Rank
0,ChristianMcCaffrey,RB,22.774,18.214,4.560025,387.158,4.560025,5,1,1
1,DavanteAdams,WR,21.147,21.519,-0.371935,359.499,0.371935,2,1,2
2,JoshAllen,QB,20.466,23.682,-3.215617,347.922,3.215617,1,1,3
3,DakPrescott,QB,20.170,20.038,0.132371,342.890,0.132371,8,2,4
4,DalvinCook,RB,19.102,15.869,3.232624,324.734,3.232624,9,2,5
...,...,...,...,...,...,...,...,...,...,...
327,PatrickRicard,RB,0.804,1.592,-0.787916,13.668,0.787916,73,89,328
328,JohnnyMundt,TE,0.596,0.317,0.279338,10.132,0.279338,61,64,329
329,CharlieWoerner,TE,0.476,0.600,-0.123766,8.092,0.123766,57,65,330
330,JeremySprinkle,TE,0.412,0.359,0.053200,7.004,0.053200,60,66,331


In [44]:
#QBSfinal = finalPredict(QBmodel,actuals, 'QB', qbvar1)
QBSfinal = finalPredict(QBmodel, actuals, 'QB', qbvar)
RBSfinal = finalPredict(RBmodel, actuals2,'RB', rbvar1)
WRSfinal = finalPredict(WRmodel, actuals2, 'WR', recvar1)
TESfinal = finalPredict(TEmodel, actuals2, 'TE', recvar1)

In [45]:
QBSfinal

Unnamed: 0,Name,Pos,Predicted PPG,Predicted PPR,PosRank
0,JalenHurts,QB,19.392,329.664,1
1,KylerMurray,QB,19.125,325.125,2
2,LamarJackson,QB,18.302,311.134,3
3,JustinHerbert,QB,18.243,310.131,4
4,JoshAllen,QB,18.166,308.822,5
...,...,...,...,...,...
76,KellenMond,QB,2.807,47.719,77
77,MikeGlennon,QB,2.730,46.410,78
78,JoshRosen,QB,2.344,39.848,79
79,JakeFromm,QB,2.122,36.074,80


In [46]:
RBSfinal

Unnamed: 0,Name,Pos,Predicted PPG,Predicted PPR,PosRank
0,DerrickHenry,RB,19.124,325.108,1
1,NajeeHarris,RB,18.428,313.276,2
2,ChristianMcCaffrey,RB,17.215,292.655,3
3,AustinEkeler,RB,17.213,292.621,4
4,JonathanTaylor,RB,16.182,275.094,5
...,...,...,...,...,...
138,JakobJohnson,RB,0.872,14.824,139
139,NickBawden,RB,0.749,12.733,140
140,AndyJanovich,RB,0.450,7.650,141
141,DerekWatt,RB,0.395,6.715,142


In [47]:
WRSfinal

Unnamed: 0,Name,Pos,Predicted PPG,Predicted PPR,PosRank
0,CooperKupp,WR,21.765,370.005,1
1,DeeboSamuel,WR,20.022,340.374,2
2,DavanteAdams,WR,18.461,313.837,3
3,JustinJefferson,WR,18.084,307.428,4
4,ChrisGodwin,WR,17.570,298.690,5
...,...,...,...,...,...
210,DiontaeSpencer,WR,1.151,19.567,211
211,JakeKumerow,WR,1.072,18.224,212
212,MilesBoykin,WR,0.965,16.405,213
213,DanielBrown,WR,0.881,14.977,214


In [48]:
TESfinal

Unnamed: 0,Name,Pos,Predicted PPG,Predicted PPR,PosRank
0,MarkAndrews,TE,15.201,258.417,1
1,GeorgeKittle,TE,14.614,248.438,2
2,KylePitts,TE,14.400,244.800,3
3,RobGronkowski,TE,14.179,241.043,4
4,DallasGoedert,TE,13.397,227.749,5
...,...,...,...,...,...
110,LukeStocker,TE,0.612,10.404,111
111,DerekCarrier,TE,0.574,9.758,112
112,EthanWolf,TE,0.362,6.154,113
113,TrevonWesco,TE,0.170,2.890,114


In [49]:
data = [QBSfinal,RBSfinal, WRSfinal, TESfinal]
fullData = combineData(data)
fullData.to_csv('predictedStats2022.csv')

In [50]:
pd.set_option('display.max_rows', None)
fullData

Unnamed: 0,Name,Pos,Predicted PPG,Predicted PPR,PosRank,Ovr Rank
0,CooperKupp,WR,21.765,370.005,1,1
1,DeeboSamuel,WR,20.022,340.374,2,2
2,JalenHurts,QB,19.392,329.664,1,3
3,KylerMurray,QB,19.125,325.125,2,4
4,DerrickHenry,RB,19.124,325.108,1,5
5,DavanteAdams,WR,18.461,313.837,3,6
6,NajeeHarris,RB,18.428,313.276,2,7
7,LamarJackson,QB,18.302,311.134,3,8
8,JustinHerbert,QB,18.243,310.131,4,9
9,JoshAllen,QB,18.166,308.822,5,10


In [None]:
df[(df['yr'] == 2021) & (df['pos'] == 'QB')]

In [None]:
starters

In [None]:
start22 = starters[starters['Year'] == 2022]

In [None]:
start22

In [None]:
pd.set_option('display.max_columns', None)
df

In [None]:
tempdf = df[(df['pos']=='QB') & (df['yr']==2021)]
tempdf

In [None]:
starters2022 = []
for index, row in start22.iterrows():
    starters2022.append(row['qbstarter'])
    print(row['qbstarter'])

In [None]:
nextyrstart = []
for index, row in tempdf.iterrows():
    player = row['player']
    if player in starters2022:
        nextyrstart.append(1)
    else:
        nextyrstart.append(0)

In [None]:
tempdfnew = pd.DataFrame({'Player':tempdf['player'],'NextYrStart':nextyrstart})
tempdfnew

In [None]:
df[(df['next_yr_starter'].isnull())&(df['yr']<=2020)]

In [None]:
for index, row in df.iterrows():
    if (row['yr'] == 2021) & (row['pos'] == 'QB'):
        if row['player'] in starters2022:
            df.at[index,'next_yr_starter'] = 1
        else:
            df.at[index,'next_yr_starter'] = 0
    else:
        continue
            
df[(df['pos']=='QB') & (df['yr']==2021)]

In [None]:
pd.set_option('display.max_rows', None)
dfdf = df[(df['pos'] == 'QB') & (df['yr'] == 2021) & (df['next_yr_starter'] == 1.0)][['player','yr','next_yr_starter']]
dfdf