In [1]:
import pandas as pd
from sklearn import linear_model
df = pd.read_csv('../Database_CSVs/fantasyDB.csv')
teams = pd.read_csv('../Database_CSVs/teamsDB.csv')

In [3]:
df = pd.merge(df,teams[['TmID','PassRate']],on='TmID', how='inner')
df

Unnamed: 0,Player,Tm,Pos,Age,G,GS,PlID,TmID,Cmp/G,PassAtt/G,...,2PP/G,FPts,PosRk,OvRank,Yr,PPR/G,Starter,Next_Yr_PPG,Next_Yr_Starter,PassRate
0,AJBrown,TEN,WR,24,13,13,AJBrown2021,TEN2021,0.0,0.154,...,0.0,180.9,32,85,2021,13.915,0,,,90.0
1,AnthonyFirkser,TEN,TE,26,15,1,AnthonyFirkser2021,TEN2021,0.0,0.000,...,0.0,79.1,29,222,2021,5.273,0,,,90.0
2,CameronBatson,TEN,WR,26,4,0,CameronBatson2021,TEN2021,0.0,0.000,...,0.0,11.0,167,442,2021,2.750,0,,,90.0
3,ChesterRogers,TEN,WR,27,16,2,ChesterRogers2021,TEN2021,0.0,0.000,...,0.0,65.0,115,251,2021,4.062,0,,,90.0
4,CodyHollister,TEN,WR,28,3,1,CodyHollister2021,TEN2021,0.0,0.000,...,0.0,7.3,197,479,2021,2.433,0,,,90.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5261,RoddyWhite,ATL,WR,34,16,16,RoddyWhite2015,ATL2015,0.0,0.000,...,0.0,99.6,82,180,2015,6.225,0,,,87.8
5262,SeanRenfree,ATL,QB,25,2,0,SeanRenfree2015,ATL2015,1.5,3.500,...,0.0,-2.0,73,536,2015,-1.000,0,,,87.8
5263,TerronWard,ATL,RB,23,13,0,TerronWard2015,ATL2015,0.0,0.000,...,0.0,31.8,96,327,2015,2.446,0,3.44,0.0,87.8
5264,TevinColeman,ATL,RB,22,12,3,TevinColeman2015,ATL2015,0.0,0.000,...,0.0,42.6,74,287,2015,3.550,0,14.70,0.0,87.8


In [4]:
df.columns

Index(['Player', 'Tm', 'Pos', 'Age', 'G', 'GS', 'PlID', 'TmID', 'Cmp/G',
       'PassAtt/G', 'PassYds/G', 'PassTD/G', 'Int/G', 'RushAtt/G', 'RushYds/G',
       'RushYds/Att', 'RushTD/G', 'Tgt/G', 'Rec/G', 'RecYds/G', 'Yds/Rec',
       'RecTD/G', 'Fmb/G', 'FL/G', 'TotTD/G', '2PM/G', '2PP/G', 'FPts',
       'PosRk', 'OvRank', 'Yr', 'PPR/G', 'Starter', 'Next_Yr_PPG',
       'Next_Yr_Starter', 'PassRate'],
      dtype='object')

In [5]:
# var must include 'Next_Yr_PPG'
qbvar = ['Age','PassAtt/G','PassYds/G','PassTD/G','RushAtt/G','RushYds/G',
         'RushYds/Att','PPR/G','Starter','Next_Yr_Starter']
rbvar = ['Age', 'RushAtt/G', 'RushYds/G','RushYds/Att', 'RushTD/G', 'Tgt/G',
         'Rec/G', 'RecYds/G', 'Yds/Rec','RecTD/G','PPR/G']
recvar = ['Tgt/G','Rec/G','RecYds/G','Yds/Rec','RecTD/G','TotTD/G','PPR/G','PassRate']

In [6]:
train = df[df['Yr'] <= 2018]
test = df[(df['Yr'] >= 2018) & (df['Yr'] <= 2019)]
use = df[df['Yr'] == 2019]
use = use[use['Next_Yr_Starter'].notna()]
use = use.astype({"Next_Yr_Starter": int})
actuals = df[df['Yr'] == 2020]

In [7]:
def model(df, position, var):
    df = df.dropna()
    df = df[df['Pos'] == position]   

    # the model will use different independent variables depending on position
    if position == 'QB':
        X = df[var]
    elif position == 'RB':
        X = df[var]
    elif position == 'WR' or 'TE':
        X = df[var]
    else:
        print('Invalid position entered')
        return

    y = df['Next_Yr_PPG']
    reg = linear_model.LinearRegression()
    reg.fit(X, y)
    return reg

In [8]:
def testModelAccuracy(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['Pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return

    yTest = df['Next_Yr_PPG']
    results = model.score(XTest, yTest)
    return results

In [9]:
def testModelDifference(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['Pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return


    yPred = model.predict(XTest)
    predAndActual = {'Name': df['Player'], 'Predicted PPG': yPred,
                     'Actual PPG': df['Next_Yr_PPG']}

    # creates df from dictionary above
    database = pd.DataFrame(predAndActual)

    # creates a difference column which depicts the difference between the
    # predicted PPG and actual PPG
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals=3)
    database['Difference'] = database['Predicted PPG'] - database['Actual PPG']
    database['Difference'] = database['Difference'].round(decimals=3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)

    return database, meanDiff, medianDiff, meanAbsDiff, medianAbsDiff

In [10]:
def testModel(model, test, train, position, var):
    accuracy = testModelAccuracy(model, train, position, var)
    differences = testModelDifference(model, test, position, var)
    meanDiff = differences[1]
    medDiff = differences[2]
    meanAbsDiff = differences[3]
    medAbsDiff = differences[4]

    print('The accuracy of the {0} model is {1}'.format(position, accuracy))
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medDiff, medAbsDiff))
    print('\n')
    return

In [11]:
#name model and put in position
QBmodel = model(train,"QB",qbvar)
testModel(QBmodel,test,train,"QB",qbvar)

The accuracy of the QB model is 0.7047972629686028
The QB model has an average error of -0.611 PPG and an average absolute error of 3.519 PPG
The QB model has a median error of 0.045 PPG and a median absolute error of 2.835 PPG




In [12]:
def useModel(model, df1, df2, position, var):
    df1.dropna()
    df2.dropna()
    # checks where the Fant Pos is the position given and returns a data frame
    # with only the rows that include said position
    df1 = df1[df1['Pos'] == position]
    df2 = df2[df2['Pos'] == position]
    df2 = df2[['PlID','Player','PPR/G']]
    newdf = df1.merge(df2,how='inner',left_on='PlID',right_on='PlID')

    # the model will use difference parameters based on position
    if position == 'QB':
        X = df1[var]
    elif position == 'RB':
        X = df1[var]
    elif position == 'WR' or 'TE':
        X = df1[var]
    else:
        print('Invalid position entered')
        return
    yPred = model.predict(X)

    # creates new df with the name of player, their position, and their
    # predicted PPG
    databaseDict = {'Name': df1['Player'], 'Pos': df1['Pos'], 'Predicted PPG': yPred}
    database = pd.DataFrame(databaseDict)
    database = database.merge(df2,how='inner',left_on='Name',right_on='Player')
    database = database.drop(columns=['Player','PlID'])
    database['Difference'] = database['Predicted PPG'] - database['PPR/G']
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals = 3)
    database['Predicted PPR'] = 17 * database['Predicted PPG']
    database['Predicted PPR'] = database['Predicted PPR'].round(decimals = 3)


     # this adds a position rank column to the dataframe
#     posRank = []
#     posRankNum = 1
#     for index, row in newdf.iterrows():
#         posRank.append(posRankNum)
#         posRankNum += 1

#     database['PosRank'] = posRank


    database = database.reset_index(drop = True)

    return database

In [13]:
QBStats = useModel(QBmodel, use, actuals,'QB', qbvar)

In [14]:
QBStats

Unnamed: 0,Name,Pos,Predicted PPG,PPR/G,Difference,Predicted PPR
0,LamarJackson,QB,18.293,22.187,-3.894332,310.981
1,DeshaunWatson,QB,18.227,23.081,-4.853536,309.859
2,RussellWilson,QB,18.106,22.488,-4.382343,307.802
3,RyanTannehill,QB,17.996,21.525,-3.529474,305.932
4,JoshAllen,QB,17.947,24.756,-6.808613,305.099
5,MatthewStafford,QB,17.75,16.288,1.462474,301.75
6,PatrickMahomes,QB,17.548,24.96,-7.411779,298.316
7,DakPrescott,QB,17.524,27.12,-9.595552,297.908
8,JimmyGaroppolo,QB,17.322,10.717,6.605424,294.474
9,DrewBrees,QB,16.973,17.458,-0.485048,288.541
