In [1]:
# Import dependencies
import pandas as pd
from sklearn import linear_model
from sqlalchemy import create_engine

In [2]:
#Import functions
def model(df, position, var):
    df = df.dropna()
    df = df[df['pos'] == position]   

    # the model will use different independent variables depending on position
    if position == 'QB':
        X = df[var]
    elif position == 'RB':
        X = df[var]
    elif position == 'WR' or 'TE':
        X = df[var]
    else:
        print('Invalid position entered')
        return

    y = df['next_yr_ppg']
    reg = linear_model.LinearRegression()
    reg.fit(X, y)
    return reg

def testModelAccuracy(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return

    yTest = df['next_yr_ppg']
    results = model.score(XTest, yTest)
    return results

def testModelDifference(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return


    yPred = model.predict(XTest)
    predAndActual = {'Name': df['player'], 'Predicted PPG': yPred,
                     'Actual PPG': df['next_yr_ppg']}

    # creates df from dictionary above
    database = pd.DataFrame(predAndActual)

    # creates a difference column which depicts the difference between the
    # predicted PPG and actual PPG
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals=3)
    database['Difference'] = database['Predicted PPG'] - database['Actual PPG']
    database['Difference'] = database['Difference'].round(decimals=3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)

    return database, meanDiff, medianDiff, meanAbsDiff, medianAbsDiff

def testModel(model, test, train, position, var):
    accuracy = testModelAccuracy(model, train, position, var)
    differences = testModelDifference(model, test, position, var)
    meanDiff = differences[1]
    medDiff = differences[2]
    meanAbsDiff = differences[3]
    medAbsDiff = differences[4]

    print('The accuracy of the {0} model is {1}'.format(position, accuracy))
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medDiff, medAbsDiff))
    print('\n')
    return

def checkModel(model, df1, df2, position, var):
    df1.dropna()
    df2.dropna()
    # checks where the Fant Pos is the position given and returns a data frame
    # with only the rows that include said position
    df1 = df1[df1['pos'] == position]
    df2 = df2[df2['pos'] == position]
    df2 = df2[['plid','player','ppr_g']]
    newdf = df1.merge(df2,how='inner',left_on='plid',right_on='plid')

    # the model will use difference parameters based on position
    if position == 'QB':
        X = df1[var]
    elif position == 'RB':
        X = df1[var]
    elif position == 'WR' or 'TE':
        X = df1[var]
    else:
        print('Invalid position entered')
        return
    yPred = model.predict(X)

    # creates new df with the name of player, their position, and their
    # predicted PPG
    databaseDict = {'Name': df1['player'], 'Pos': df1['pos'], 'Predicted PPG': yPred}
    database = pd.DataFrame(databaseDict)
    database = database.merge(df2,how='inner',left_on='Name',right_on='player')
    database = database.drop(columns=['player','plid'])
    database['Difference'] = database['Predicted PPG'] - database['ppr_g']
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals = 3)
    database['Predicted PPR'] = 17 * database['Predicted PPG']
    database['Predicted PPR'] = database['Predicted PPR'].round(decimals = 3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medianDiff, medianAbsDiff))
    database = database.sort_values(by = ['ppr_g'], ascending = False)
    posRank = []
    posRankNum = 1
    for index, row in database.iterrows():
        posRank.append(posRankNum)
        posRankNum += 1

    database['ActPosRank'] = posRank
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
     # this adds a position rank column to the dataframe
    posRank = []
    posRankNum = 1
    for index, row in database.iterrows():
        posRank.append(posRankNum)
        posRankNum += 1

    database['PosRank'] = posRank
    

    database = database.reset_index(drop = True)

    return database

def finalPredict(model, df, position, var):
    df.dropna()
    # checks where the Fant Pos is the position given and returns a data frame
    # with only the rows that include said position
    df = df[df['pos'] == position]

    # the model will use difference parameters based on position
    if position == 'QB':
        X = df[var]
    elif position == 'RB':
        X = df[var]
    elif position == 'WR' or 'TE':
        X = df[var]
    else:
        print('Invalid position entered')
        return
    yPred = model.predict(X)

    # creates new df with the name of player, their position, and their
    # predicted PPG
    databaseDict = {'Name': df['player'], 'Pos': df['pos'], 'Predicted PPG': yPred}
    database = pd.DataFrame(databaseDict)
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals = 3)
    database['Predicted PPR'] = 17 * database['Predicted PPG']
    database['Predicted PPR'] = database['Predicted PPR'].round(decimals = 3)


     #this adds a position rank column to the dataframe
    posRank = []
    posRankNum = 1
    for index, row in df.iterrows():
        posRank.append(posRankNum)
        posRankNum += 1

    database['PosRank'] = posRank


    database = database.reset_index(drop = True)

    return database

def combineData(listOfDataFrames):
    # creates a df which contains the dataframes in the list which is passed in
    df = pd.concat(listOfDataFrames)
    df = df.sort_values(by = ['Predicted PPR'], ascending = False)

    # gives players an overall ranking
    rankings = []
    rank = 1
    for index, row in df.iterrows():
        rankings.append(rank)
        rank += 1

    df['Ovr Rank'] = rankings

    df = df.reset_index(drop = True)
    return df

In [3]:
# Connect to Database
# Postgres username, password, and database name
POSTGRES_ADDRESS = 'fballfinalproject.c6sg90iemyn2.us-east-2.rds.amazonaws.com' ## INSERT YOUR DB ADDRESS 
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = 'postgres' ## CHANGE THIS TO YOUR POSTGRES USERNAME
POSTGRES_PASSWORD = 'FFForesight5!!' ## CHANGE THIS TO YOUR POSTGRES PASSWORD 
POSTGRES_DBNAME = 'postgres' ## CHANGE THIS TO YOUR DATABASE NAME
# A long string that contains the necessary Postgres login information
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}').format(
    username=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    ipaddress=POSTGRES_ADDRESS,
    port=POSTGRES_PORT,
    dbname=POSTGRES_DBNAME)
# Create the connection
cnx = create_engine(postgres_str)

  """)


In [4]:
# Read in dataframes from AWS database
# Read in dataframes from AWS database
df = pd.read_sql_query('''SELECT fantasy.*,
teams.pasatt_g AS tm_passatt,
teams.passrate,teams.rushatt_g AS tm_rushatt
FROM fantasy 
INNER JOIN teams 
ON fantasy.tmid = teams.tmid;''',cnx)

df2 = pd.read_sql_query('''SELECT fantasy.*,
teams.pasatt_g  AS tm_passatt, 
teams.passrate, teams.rushatt_g AS tm_rushatt,
arus.ybc_g, arus.ybcont_a, arus.yac_g, arus.yacont_a, arus.brktkl_g, arus.att_brtk
FROM fantasy
INNER JOIN teams 
ON fantasy.tmid = teams.tmid
INNER JOIN advrush AS arus
ON fantasy.plid = arus.plid;''',cnx)

df3 = pd.read_sql_query('''SELECT fantasy.*,
teams.pasatt_g  AS tm_passatt, 
teams.passrate, teams.rushatt_g AS tm_rushatt,
arec.avgdot, arec.ybc_g, arec.ydsbc_rec, arec.yac_g, arec.ydsac_rec, arec.brktkl_g, arec.drop_g, arec.passrat
FROM fantasy
INNER JOIN teams 
ON fantasy.tmid = teams.tmid
INNER JOIN advrec AS arec
ON fantasy.plid = arec.plid;''',cnx)

starters = pd.read_sql_query('''SELECT starter AS qbstarter,
tm,
"Year",
plid AS stid,
tmid
FROM starters;''',cnx)

startpass = pd.read_sql_query('''SELECT player,
team,
cmp,
att,
yds,
td,
"Int",
avgrate AS averagepassrate,
passrate AS overallpassrate,
"Year",
plid AS startid
FROM passerratings;''',cnx)

In [5]:
# Manipulate df to add rush and target share
df['rushshare'] = df['rushatt_g'] / df['tm_rushatt']
df['targetshare'] = df['tgt_g'] / df['tm_passatt']
df2['rushshare'] = df2['rushatt_g'] / df2['tm_rushatt']
df2['targetshare'] = df2['tgt_g'] / df2['tm_passatt']
df3['rushshare'] = df3['rushatt_g'] / df3['tm_rushatt']
df3['targetshare'] = df3['tgt_g'] / df3['tm_passatt']

In [6]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False])
df2 = df2.sort_values(by = ['player','yr'], ascending = [True,False])
df3 = df3.sort_values(by = ['player','yr'], ascending = [True,False])

In [7]:
# Add next year team to each player
nextYearTm = []
nextYearTmID = []
lastPlayer = 'NaN'
lastPlayerTm = 'NaN'


for index, row in df.iterrows():
    player = row['player']
    if lastPlayer == player:
        nextYearTm.append(lastPlayerTm)
        yr = row['yr'] + 1
        nextYearTmID.append(lastPlayerTm + str(yr))
    else:
        nextYearTm.append('NaN')
        nextYearTmID.append('NaN')
    lastPlayer = row['player']
    lastPlayerTm = row['tm']

df['NextTm'] = nextYearTm
df['NextTmID'] = nextYearTmID

In [8]:
# Add next year team to each player
nextYearTm = []
nextYearTmID = []
lastPlayer = 'NaN'
lastPlayerTm = 'NaN'


for index, row in df2.iterrows():
    player = row['player']
    if lastPlayer == player:
        nextYearTm.append(lastPlayerTm)
        yr = row['yr'] + 1
        nextYearTmID.append(lastPlayerTm + str(yr))
    else:
        nextYearTm.append('NaN')
        nextYearTmID.append('NaN')
    lastPlayer = row['player']
    lastPlayerTm = row['tm']

df2['NextTm'] = nextYearTm
df2['NextTmID'] = nextYearTmID

In [9]:
# Add next year team to each player
nextYearTm = []
nextYearTmID = []
lastPlayer = 'NaN'
lastPlayerTm = 'NaN'


for index, row in df3.iterrows():
    player = row['player']
    if lastPlayer == player:
        nextYearTm.append(lastPlayerTm)
        yr = row['yr'] + 1
        nextYearTmID.append(lastPlayerTm + str(yr))
    else:
        nextYearTm.append('NaN')
        nextYearTmID.append('NaN')
    lastPlayer = row['player']
    lastPlayerTm = row['tm']

df3['NextTm'] = nextYearTm
df3['NextTmID'] = nextYearTmID

In [10]:
# Merge with starter dataframe and cleanup
import numpy as np
df = pd.merge(df,starters[['tmid','qbstarter','stid']],left_on='NextTmID',right_on='tmid',how='outer')
df = df[df['player'].notna()]
df.drop(columns='tmid_y',inplace=True)
df.rename(columns={"tmid_x": "tmid"},inplace=True)
df['age'] = df['age'].apply(np.int64)
df['G'] = df['G'].apply(np.int64)
df['gs'] = df['gs'].apply(np.int64)
df['posrk'] = df['posrk'].apply(np.int64)
df['ovrank'] = df['ovrank'].apply(np.int64)
df['yr'] = df['yr'].apply(np.int64)
df['starter'] = df['starter'].apply(np.int64)

In [11]:
# Merge with starter dataframe and cleanup
df2 = pd.merge(df2,starters[['tmid','qbstarter','stid']],left_on='NextTmID',right_on='tmid',how='outer')
df2 = df2[df2['player'].notna()]
df2.drop(columns='tmid_y',inplace=True)
df2.rename(columns={"tmid_x": "tmid"},inplace=True)
df2['age'] = df2['age'].apply(np.int64)
df2['G'] = df2['G'].apply(np.int64)
df2['gs'] = df2['gs'].apply(np.int64)
df2['posrk'] = df2['posrk'].apply(np.int64)
df2['ovrank'] = df2['ovrank'].apply(np.int64)
df2['yr'] = df2['yr'].apply(np.int64)
df2['starter'] = df2['starter'].apply(np.int64)

In [12]:
# Merge with starter dataframe and cleanup
df3 = pd.merge(df3,starters[['tmid','qbstarter','stid']],left_on='NextTmID',right_on='tmid',how='outer')
df3 = df3[df3['player'].notna()]
df3.drop(columns='tmid_y',inplace=True)
df3.rename(columns={"tmid_x": "tmid"},inplace=True)
df3['age'] = df3['age'].apply(np.int64)
df3['G'] = df3['G'].apply(np.int64)
df3['gs'] = df3['gs'].apply(np.int64)
df3['posrk'] = df3['posrk'].apply(np.int64)
df3['ovrank'] = df3['ovrank'].apply(np.int64)
df3['yr'] = df3['yr'].apply(np.int64)
df3['starter'] = df3['starter'].apply(np.int64)

In [13]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)
df2 = df2.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)
df3 = df3.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)

In [14]:
# Create connection for passratings df
connect = []
for index, row in df.iterrows():
    if row['qbstarter'] == 'NaN':
        connect.append('NaN')
    else:
        connect.append(str(row['qbstarter'])+str(row['yr']))
df['connect'] = connect

In [15]:
# Create connection for passratings df
connect = []
for index, row in df2.iterrows():
    if row['qbstarter'] == 'NaN':
        connect.append('NaN')
    else:
        connect.append(str(row['qbstarter'])+str(row['yr']))
df2['connect'] = connect

In [16]:
# Create connection for passratings df
connect = []
for index, row in df3.iterrows():
    if row['qbstarter'] == 'NaN':
        connect.append('NaN')
    else:
        connect.append(str(row['qbstarter'])+str(row['yr']))
df3['connect'] = connect

In [17]:
# Merge df with passerratings of QBs
df = pd.merge(df,startpass[['startid','averagepassrate','overallpassrate']],left_on='connect',right_on='startid',how='outer')
df = df[df['player'].notna()]
df.drop(columns='startid',inplace=True)
df['age'] = df['age'].apply(np.int64)
df['G'] = df['G'].apply(np.int64)
df['gs'] = df['gs'].apply(np.int64)
df['posrk'] = df['posrk'].apply(np.int64)
df['ovrank'] = df['ovrank'].apply(np.int64)
df['yr'] = df['yr'].apply(np.int64)
df['starter'] = df['starter'].apply(np.int64)

In [18]:
# Merge df with passerratings of QBs
df2 = pd.merge(df2,startpass[['startid','averagepassrate','overallpassrate']],left_on='connect',right_on='startid',how='outer')
df2 = df2[df2['player'].notna()]
df2.drop(columns='startid',inplace=True)
df2['age'] = df2['age'].apply(np.int64)
df2['G'] = df2['G'].apply(np.int64)
df2['gs'] = df2['gs'].apply(np.int64)
df2['posrk'] = df2['posrk'].apply(np.int64)
df2['ovrank'] = df2['ovrank'].apply(np.int64)
df2['yr'] = df2['yr'].apply(np.int64)
df2['starter'] = df2['starter'].apply(np.int64)

In [19]:
# Merge df with passerratings of QBs
df3 = pd.merge(df3,startpass[['startid','averagepassrate','overallpassrate']],left_on='connect',right_on='startid',how='outer')
df3 = df3[df3['player'].notna()]
df3.drop(columns='startid',inplace=True)
df3['age'] = df3['age'].apply(np.int64)
df3['G'] = df3['G'].apply(np.int64)
df3['gs'] = df3['gs'].apply(np.int64)
df3['posrk'] = df3['posrk'].apply(np.int64)
df3['ovrank'] = df3['ovrank'].apply(np.int64)
df3['yr'] = df3['yr'].apply(np.int64)
df3['starter'] = df3['starter'].apply(np.int64)

In [20]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)
df2 = df2.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)
df3 = df3.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)

In [21]:
start22 = starters[starters['Year'] == 2022]

In [22]:
# Get list of 2022 starters to add to rows
starters2022 = []
for index, row in start22.iterrows():
    starters2022.append(row['qbstarter'])

In [23]:
# Designate if QB is starting in 2022
for index, row in df.iterrows():
    if (row['yr'] == 2021) & (row['pos'] == 'QB'):
        if row['player'] in starters2022:
            df.at[index,'next_yr_starter'] = 1
        else:
            df.at[index,'next_yr_starter'] = 0
    else:
        continue

In [24]:
# Designate if QB is starting in 2022
for index, row in df2.iterrows():
    if (row['yr'] == 2021) & (row['pos'] == 'QB'):
        if row['player'] in starters2022:
            df2.at[index,'next_yr_starter'] = 1
        else:
            df2.at[index,'next_yr_starter'] = 0
    else:
        continue

In [25]:
# Designate if QB is starting in 2022
for index, row in df3.iterrows():
    if (row['yr'] == 2021) & (row['pos'] == 'QB'):
        if row['player'] in starters2022:
            df3.at[index,'next_yr_starter'] = 1
        else:
            df3.at[index,'next_yr_starter'] = 0
    else:
        continue

In [26]:
# List out df columns
df.columns

Index(['player', 'tm', 'pos', 'age', 'G', 'gs', 'plid', 'tmid', 'cmp_g',
       'passatt_g', 'passyds_g', 'passtd_g', 'int_g', 'rushatt_g', 'rushyds_g',
       'rushyds_att', 'rushtd_g', 'tgt_g', 'rec_g', 'recyds_g', 'yds_rec',
       'rectd_g', 'fmb_g', 'fl_g', 'tottd_g', '2PM_G', '2PP_G', 'fpts',
       'posrk', 'ovrank', 'yr', 'ppr_g', 'starter', 'next_yr_ppg',
       'next_yr_starter', 'tm_passatt', 'passrate', 'tm_rushatt', 'rushshare',
       'targetshare', 'NextTm', 'NextTmID', 'qbstarter', 'stid', 'connect',
       'averagepassrate', 'overallpassrate'],
      dtype='object')

In [27]:
# List out df2 columns
df2.columns

Index(['player', 'tm', 'pos', 'age', 'G', 'gs', 'plid', 'tmid', 'cmp_g',
       'passatt_g', 'passyds_g', 'passtd_g', 'int_g', 'rushatt_g', 'rushyds_g',
       'rushyds_att', 'rushtd_g', 'tgt_g', 'rec_g', 'recyds_g', 'yds_rec',
       'rectd_g', 'fmb_g', 'fl_g', 'tottd_g', '2PM_G', '2PP_G', 'fpts',
       'posrk', 'ovrank', 'yr', 'ppr_g', 'starter', 'next_yr_ppg',
       'next_yr_starter', 'tm_passatt', 'passrate', 'tm_rushatt', 'ybc_g',
       'ybcont_a', 'yac_g', 'yacont_a', 'brktkl_g', 'att_brtk', 'rushshare',
       'targetshare', 'NextTm', 'NextTmID', 'qbstarter', 'stid', 'connect',
       'averagepassrate', 'overallpassrate'],
      dtype='object')

In [28]:
# List out df2 columns
df3.columns

Index(['player', 'tm', 'pos', 'age', 'G', 'gs', 'plid', 'tmid', 'cmp_g',
       'passatt_g', 'passyds_g', 'passtd_g', 'int_g', 'rushatt_g', 'rushyds_g',
       'rushyds_att', 'rushtd_g', 'tgt_g', 'rec_g', 'recyds_g', 'yds_rec',
       'rectd_g', 'fmb_g', 'fl_g', 'tottd_g', '2PM_G', '2PP_G', 'fpts',
       'posrk', 'ovrank', 'yr', 'ppr_g', 'starter', 'next_yr_ppg',
       'next_yr_starter', 'tm_passatt', 'passrate', 'tm_rushatt', 'avgdot',
       'ybc_g', 'ydsbc_rec', 'yac_g', 'ydsac_rec', 'brktkl_g', 'drop_g',
       'passrat', 'rushshare', 'targetshare', 'NextTm', 'NextTmID',
       'qbstarter', 'stid', 'connect', 'averagepassrate', 'overallpassrate'],
      dtype='object')

In [29]:
# Select columns for each position (WR + TE joined in recvar)
qbvar = ['age','passatt_g','passyds_g','passtd_g','rushatt_g','rushyds_g',
         'rushyds_att','ppr_g','starter','next_yr_starter']
rbvar = ['age','rushatt_g','rushyds_g','rushyds_att','tgt_g','rec_g','ybc_g',
         'ybcont_a', 'yac_g', 'yacont_a', 'brktkl_g', 'att_brtk',
         'recyds_g','yds_rec','ppr_g','rushshare','targetshare']
recvar = ['age','tgt_g','rec_g','recyds_g','yds_rec','avgdot','ybc_g', 
          'ydsbc_rec', 'yac_g', 'ydsac_rec','passrate','targetshare']

In [30]:
# Separate into training, testing, using, and actual data by Yr (Fantasy) or Year (other dataframes)
train = df[df['yr'] <= 2020]
test = df[(df['yr'] >= 2020) & (df['yr'] <= 2021)]
use = df[df['yr'] == 2020]
use = use[use['next_yr_starter'].notna()]
use = use.astype({"next_yr_starter": int})
use = use[use['overallpassrate'].notna()]
use = use.astype({"overallpassrate": int})
actuals = df[df['yr'] == 2021]

In [31]:
# Separate into training, testing, using, and actual data by Yr (Fantasy) or Year (other dataframes)
train2 = df2[df2['yr'] <= 2020]
test2 = df2[(df2['yr'] >= 2020) & (df2['yr'] <= 2021)]
use2 = df2[df2['yr'] == 2020]
use2 = use2[use2['next_yr_starter'].notna()]
use2 = use2.astype({"next_yr_starter": int})
use2 = use2[use2['overallpassrate'].notna()]
use2 = use2.astype({"overallpassrate": int})
actuals2 = df2[df2['yr'] == 2021]

In [32]:
# Separate into training, testing, using, and actual data by Yr (Fantasy) or Year (other dataframes)
train3 = df3[df3['yr'] <= 2020]
test3 = df3[(df3['yr'] >= 2020) & (df3['yr'] <= 2021)]
use3 = df3[df3['yr'] == 2020]
use3 = use3[use3['next_yr_starter'].notna()]
use3 = use3.astype({"next_yr_starter": int})
use3 = use3[use3['overallpassrate'].notna()]
use3 = use3.astype({"overallpassrate": int})
actuals3 = df3[df3['yr'] == 2021]

In [33]:
# Name model and put in position
QBmodel = model(train, "QB", qbvar)
RBmodel = model(train2, "RB", rbvar)
WRmodel = model(train3, "WR", recvar)
TEmodel = model(train3, "TE", recvar)
testModel(QBmodel, test, train, "QB", qbvar)
testModel(RBmodel, test2, train2, "RB", rbvar)
testModel(WRmodel, test3, train3, "WR", recvar)
testModel(TEmodel, test3, train3, "TE", recvar)

The accuracy of the QB model is 0.6968358574620971
The QB model has an average error of 0.152 PPG and an average absolute error of 2.912 PPG
The QB model has a median error of 0.418 PPG and a median absolute error of 2.471 PPG


The accuracy of the RB model is 0.6172613628529671
The RB model has an average error of -0.107 PPG and an average absolute error of 2.756 PPG
The RB model has a median error of 0.227 PPG and a median absolute error of 2.113 PPG


The accuracy of the WR model is 0.6376890914549966
The WR model has an average error of 0.309 PPG and an average absolute error of 2.569 PPG
The WR model has a median error of 0.767 PPG and a median absolute error of 2.114 PPG


The accuracy of the TE model is 0.7124162838122609
The TE model has an average error of 0.058 PPG and an average absolute error of 1.855 PPG
The TE model has a median error of 0.544 PPG and a median absolute error of 1.766 PPG




In [34]:
# Use model to make predictions and check predictions
QBStats = checkModel(QBmodel, use, actuals, 'QB', qbvar)
RBStats = checkModel(RBmodel, use2, actuals2,'RB', rbvar)
WRStats = checkModel(WRmodel, use3, actuals3, 'WR', recvar)
TEStats = checkModel(TEmodel, use3, actuals3, 'TE', recvar)

The QB model has an average error of 0.152 PPG and an average absolute error of 2.912 PPG
The QB model has a median error of 0.418 PPG and a median absolute error of 2.471 PPG
The RB model has an average error of -0.107 PPG and an average absolute error of 2.756 PPG
The RB model has a median error of 0.227 PPG and a median absolute error of 2.113 PPG
The WR model has an average error of 0.4 PPG and an average absolute error of 2.511 PPG
The WR model has a median error of 0.774 PPG and a median absolute error of 2.114 PPG
The TE model has an average error of 0.058 PPG and an average absolute error of 1.855 PPG
The TE model has a median error of 0.544 PPG and a median absolute error of 1.766 PPG


In [35]:
data = [QBStats, RBStats, WRStats, TEStats]
fullData = combineData(data)
fullData.to_csv('predictedStats2021.csv')

In [36]:
fullData

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,ActPosRank,PosRank,Ovr Rank
0,ChristianMcCaffrey,RB,24.140,18.214,5.926460,410.380,5.926460,5,1,1
1,DavanteAdams,WR,21.147,21.519,-0.371935,359.499,0.371935,2,1,2
2,JoshAllen,QB,20.466,23.682,-3.215617,347.922,3.215617,1,1,3
3,DakPrescott,QB,20.170,20.038,0.132371,342.890,0.132371,8,2,4
4,DalvinCook,RB,19.042,15.869,3.172836,323.714,3.172836,9,2,5
...,...,...,...,...,...,...,...,...,...,...
328,JohnnyMundt,TE,0.596,0.317,0.279338,10.132,0.279338,61,64,329
329,CharlieWoerner,TE,0.476,0.600,-0.123766,8.092,0.123766,57,65,330
330,JeremySprinkle,TE,0.412,0.359,0.053200,7.004,0.053200,60,66,331
331,PatrickRicard,RB,0.288,1.592,-1.303928,4.896,1.303928,77,90,332


In [37]:
#QBSfinal = finalPredict(QBmodel,actuals, 'QB', qbvar1)
QBSfinal = finalPredict(QBmodel, actuals, 'QB', qbvar)
RBSfinal = finalPredict(RBmodel, actuals2,'RB', rbvar)
WRSfinal = finalPredict(WRmodel, actuals3, 'WR', recvar)
TESfinal = finalPredict(TEmodel, actuals3, 'TE', recvar)

In [38]:
data = [QBSfinal,RBSfinal, WRSfinal, TESfinal]
fullData = combineData(data)
fullData.to_csv('predictedStats2022.csv')

In [39]:
fullData

Unnamed: 0,Name,Pos,Predicted PPG,Predicted PPR,PosRank,Ovr Rank
0,CooperKupp,WR,21.765,370.005,1,1
1,DeeboSamuel,WR,20.022,340.374,2,2
2,JalenHurts,QB,19.392,329.664,1,3
3,DerrickHenry,RB,19.210,326.570,1,4
4,KylerMurray,QB,19.125,325.125,2,5
...,...,...,...,...,...,...
558,DerekWatt,RB,0.504,8.568,152,559
559,EthanWolf,TE,0.362,6.154,113,560
560,AlexErickson,WR,0.249,4.233,215,561
561,TrevonWesco,TE,0.170,2.890,114,562
