In [1]:
# Import dependencies
import pandas as pd
from sklearn import linear_model
from sqlalchemy import create_engine

In [2]:
#Import functions
def model(df, position, var):
    df = df.dropna()
    df = df[df['pos'] == position]   

    # the model will use different independent variables depending on position
    if position == 'QB':
        X = df[var]
    elif position == 'RB':
        X = df[var]
    elif position == 'WR' or 'TE':
        X = df[var]
    else:
        print('Invalid position entered')
        return

    y = df['next_yr_ppg']
    reg = linear_model.LinearRegression()
    reg.fit(X, y)
    return reg

def testModelAccuracy(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return

    yTest = df['next_yr_ppg']
    results = model.score(XTest, yTest)
    return results

def testModelDifference(model, df, position, var):
    # creates a df from the csvFile, drops na values and rows where FantPos
    # does not equal the position parameter
    df = df.dropna()
    df = df[df['pos'] == position]

    # the model will use different independent variables depending on position
    if position == 'QB':
        XTest = df[var]
    elif position == 'RB':
        XTest = df[var]
    elif position == 'WR' or 'TE':
        XTest = df[var]
    else:
        print('Invalid position entered')
        return


    yPred = model.predict(XTest)
    predAndActual = {'Name': df['player'], 'Predicted PPG': yPred,
                     'Actual PPG': df['next_yr_ppg']}

    # creates df from dictionary above
    database = pd.DataFrame(predAndActual)

    # creates a difference column which depicts the difference between the
    # predicted PPG and actual PPG
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals=3)
    database['Difference'] = database['Predicted PPG'] - database['Actual PPG']
    database['Difference'] = database['Difference'].round(decimals=3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)

    return database, meanDiff, medianDiff, meanAbsDiff, medianAbsDiff

def testModel(model, test, train, position, var):
    accuracy = testModelAccuracy(model, train, position, var)
    differences = testModelDifference(model, test, position, var)
    meanDiff = differences[1]
    medDiff = differences[2]
    meanAbsDiff = differences[3]
    medAbsDiff = differences[4]

    print('The accuracy of the {0} model is {1}'.format(position, accuracy))
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medDiff, medAbsDiff))
    print('\n')
    return

def useModel(model, df1, df2, position, var):
    df1.dropna()
    df2.dropna()
    # checks where the Fant Pos is the position given and returns a data frame
    # with only the rows that include said position
    df1 = df1[df1['pos'] == position]
    df2 = df2[df2['pos'] == position]
    df2 = df2[['plid','player','ppr_g']]
    newdf = df1.merge(df2,how='inner',left_on='plid',right_on='plid')

    # the model will use difference parameters based on position
    if position == 'QB':
        X = df1[var]
    elif position == 'RB':
        X = df1[var]
    elif position == 'WR' or 'TE':
        X = df1[var]
    else:
        print('Invalid position entered')
        return
    yPred = model.predict(X)

    # creates new df with the name of player, their position, and their
    # predicted PPG
    databaseDict = {'Name': df1['player'], 'Pos': df1['pos'], 'Predicted PPG': yPred}
    database = pd.DataFrame(databaseDict)
    database = database.merge(df2,how='inner',left_on='Name',right_on='player')
    database = database.drop(columns=['player','plid'])
    database['Difference'] = database['Predicted PPG'] - database['ppr_g']
    database = database.sort_values(by = ['Predicted PPG'], ascending = False)
    database['Predicted PPG'] = database['Predicted PPG'].round(decimals = 3)
    database['Predicted PPR'] = 17 * database['Predicted PPG']
    database['Predicted PPR'] = database['Predicted PPR'].round(decimals = 3)
    database['AbsDifference'] = database['Difference'].abs()
    meanDiff = round(database['Difference'].mean(), 3)
    medianDiff = round(database['Difference'].median(), 3)
    meanAbsDiff = round(database['AbsDifference'].mean(), 3)
    medianAbsDiff = round(database['AbsDifference'].median(), 3)
    print('The {0} model has an average error of {1} PPG and an average absolute error of {2} PPG'.format(position, meanDiff, meanAbsDiff))
    print('The {0} model has a median error of {1} PPG and a median absolute error of {2} PPG'.format(position, medianDiff, medianAbsDiff))


     # this adds a position rank column to the dataframe
#     posRank = []
#     posRankNum = 1
#     for index, row in newdf.iterrows():
#         posRank.append(posRankNum)
#         posRankNum += 1

#     database['PosRank'] = posRank


    database = database.reset_index(drop = True)

    return database

In [3]:
# Connect to Database
# Postgres username, password, and database name
POSTGRES_ADDRESS = 'fballfinalproject.c6sg90iemyn2.us-east-2.rds.amazonaws.com' ## INSERT YOUR DB ADDRESS 
POSTGRES_PORT = '5432'
POSTGRES_USERNAME = 'postgres' ## CHANGE THIS TO YOUR POSTGRES USERNAME
POSTGRES_PASSWORD = 'FFForesight5!!' ## CHANGE THIS TO YOUR POSTGRES PASSWORD 
POSTGRES_DBNAME = 'postgres' ## CHANGE THIS TO YOUR DATABASE NAME
# A long string that contains the necessary Postgres login information
postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}').format(
    username=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    ipaddress=POSTGRES_ADDRESS,
    port=POSTGRES_PORT,
    dbname=POSTGRES_DBNAME)
# Create the connection
cnx = create_engine(postgres_str)

  """)


In [4]:
# Read in dataframes and merge
# fantasy = pd.read_sql_query('''SELECT * FROM fantasy;''', cnx)
# teams = pd.read_sql_query('''SELECT * FROM teams;''', cnx)
# advrush = pd.read_sql_query('''SELECT * FROM advrush;''', cnx)
# df = pd.merge(fantasy,teams[['tmid','passrate']],on='tmid', how='inner')
# df = pd.merge(df,advrush,on='plid',how='inner')
# df = df.drop(columns=['player_y', 'tm_y', 'pos_y',
#                       'age_y','G_y', 'gs_y','tmid_y'])
# df = df.rename(columns={'player_x':'player','tm_x':'tm','pos_x':'pos','age_x':'age','G_x':'G','gs_x':'gs','tmid_x':'tmid'})
# df
df = pd.read_sql_query('''SELECT fantasy.*,
teams.pasatt_g AS tm_passatt,
teams.passrate,teams.rushatt_g AS tm_rushatt
FROM fantasy 
INNER JOIN teams 
ON fantasy.tmid = teams.tmid;''',cnx)

In [5]:
df

Unnamed: 0,player,tm,pos,age,G,gs,plid,tmid,cmp_g,passatt_g,...,posrk,ovrank,yr,ppr_g,starter,next_yr_ppg,next_yr_starter,tm_passatt,passrate,tm_rushatt
0,AJBrown,TEN,WR,24,13,13,AJBrown2021,TEN2021,0.000,0.154,...,32,85,2021,13.915,0,,,31.471,90.0,32.412
1,AJBrown,TEN,WR,23,14,12,AJBrown2020,TEN2020,0.000,0.000,...,9,38,2020,17.679,0,13.915,0.0,30.312,105.9,32.562
2,AJBrown,TEN,WR,22,16,11,AJBrown2019,TEN2019,0.000,0.000,...,9,62,2019,13.569,0,17.679,0.0,28.000,108.3,27.812
3,AJDerby,MIA,TE,27,4,0,AJDerby2018,MIA2018,0.000,0.000,...,76,400,2018,3.450,0,,,28.438,93.0,23.188
4,AJDillon,GNB,RB,23,17,2,AJDillon2021,GNB2021,0.000,0.000,...,21,80,2021,10.918,0,,,34.882,107.4,26.235
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5261,ZayJones,LVR,WR,25,16,2,ZayJones2020,LVR2020,0.062,0.062,...,130,321,2020,2.288,0,6.229,0.0,34.438,100.4,28.562
5262,ZayJones,BUF,WR,23,16,15,ZayJones2018,BUF2018,0.000,0.062,...,34,93,2018,10.325,0,3.260,0.0,31.188,62.6,29.250
5263,ZayJones,BUF,WR,22,15,10,ZayJones2017,BUF2017,0.000,0.000,...,90,218,2017,4.707,0,10.325,0.0,29.750,82.1,30.438
5264,ZurlonTipton,IND,RB,25,10,0,ZurlonTipton2015,IND2015,0.000,0.000,...,124,419,2015,1.270,0,,,38.688,77.5,24.750


In [6]:
# Manipulate df to add rush and target share
df['rushshare'] = df['rushatt_g'] / df['tm_rushatt']
df['targetshare'] = df['tgt_g'] / df['tm_passatt']

In [7]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False])

In [8]:
nextYearTm = []
nextYearTmID = []
lastPlayer = 'NaN'
lastPlayerTm = 'NaN'


for index, row in df.iterrows():
    player = row['player']
    if lastPlayer == player:
        nextYearTm.append(lastPlayerTm)
        yr = row['yr'] + 1
        nextYearTmID.append(lastPlayerTm + str(yr))
    else:
        nextYearTm.append('NaN')
        nextYearTmID.append('NaN')
    lastPlayer = row['player']
    lastPlayerTm = row['tm']

df['NextTm'] = nextYearTm
df['NextTmID'] = nextYearTmID

In [9]:
starters = pd.read_csv('Database_CSVs/starters.csv')

In [10]:
import numpy as np
starters.rename({'PlID':'StID'},axis=1,inplace=True)
starters.rename({'Starter':'qbstarter'},axis=1,inplace=True)
df = pd.merge(df,starters[['TmID','qbstarter','StID']],left_on='NextTmID',right_on='TmID',how='outer')
df = df[df['player'].notna()]
df.drop(columns='TmID',inplace=True)
df['age'] = df['age'].apply(np.int64)
df['G'] = df['G'].apply(np.int64)
df['gs'] = df['gs'].apply(np.int64)
df['posrk'] = df['posrk'].apply(np.int64)
df['ovrank'] = df['ovrank'].apply(np.int64)
df['yr'] = df['yr'].apply(np.int64)
df['starter'] = df['starter'].apply(np.int64)

In [11]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)

In [12]:
connect = []
for index, row in df.iterrows():
    if row['qbstarter'] == 'NaN':
        connect.append('NaN')
    else:
        connect.append(str(row['qbstarter'])+str(row['yr']))
df['connect'] = connect

In [13]:
startpass = pd.read_csv('Database_CSVs/passerratings.csv')

In [14]:
startpass.rename({'PlID':'StartID'},axis=1,inplace=True)
startpass.rename({'AvgRate':'AveragePassRate'},axis=1,inplace=True)
startpass.rename({'PassRate':'OverallPassRate'},axis=1,inplace=True)
df = pd.merge(df,startpass[['StartID','AveragePassRate','OverallPassRate']],left_on='connect',right_on='StartID',how='outer')
df = df[df['player'].notna()]
df.drop(columns='StartID',inplace=True)
df['age'] = df['age'].apply(np.int64)
df['G'] = df['G'].apply(np.int64)
df['gs'] = df['gs'].apply(np.int64)
df['posrk'] = df['posrk'].apply(np.int64)
df['ovrank'] = df['ovrank'].apply(np.int64)
df['yr'] = df['yr'].apply(np.int64)
df['starter'] = df['starter'].apply(np.int64)

In [15]:
df = df.sort_values(by = ['player','yr'], ascending = [True,False],ignore_index=True)

In [16]:
# List out columns
df.columns

Index(['player', 'tm', 'pos', 'age', 'G', 'gs', 'plid', 'tmid', 'cmp_g',
       'passatt_g', 'passyds_g', 'passtd_g', 'int_g', 'rushatt_g', 'rushyds_g',
       'rushyds_att', 'rushtd_g', 'tgt_g', 'rec_g', 'recyds_g', 'yds_rec',
       'rectd_g', 'fmb_g', 'fl_g', 'tottd_g', '2PM_G', '2PP_G', 'fpts',
       'posrk', 'ovrank', 'yr', 'ppr_g', 'starter', 'next_yr_ppg',
       'next_yr_starter', 'tm_passatt', 'passrate', 'tm_rushatt', 'rushshare',
       'targetshare', 'NextTm', 'NextTmID', 'qbstarter', 'StID', 'connect',
       'AveragePassRate', 'OverallPassRate'],
      dtype='object')

In [17]:
# Select columns for each position (WR + TE joined in recvar)
qbvar = ['age','passatt_g','passyds_g','passtd_g','rushatt_g','rushyds_g',
         'rushyds_att','ppr_g','starter','next_yr_starter']
rbvar = ['age','rushatt_g','rushyds_g','rushyds_att','tgt_g','rec_g',
         'recyds_g','yds_rec','ppr_g','rushshare','targetshare']
recvar = ['age','tgt_g','rec_g','recyds_g','yds_rec','passrate','targetshare']

In [18]:
# Separate into training, testing, using, and actual data by Yr (Fantasy) or Year (other dataframes)
train = df[df['yr'] <= 2012]
test = df[(df['yr'] >= 2018) & (df['yr'] <= 2019)]
use = df[df['yr'] == 2019]
use = use[use['next_yr_starter'].notna()]
use = use.astype({"next_yr_starter": int})
# use = use[use['OverallPassRate'].notna()]
# use = use.astype({"OverallPassRate": int})
actuals = df[df['yr'] == 2020]

In [19]:
# Name model and put in position
QBmodel = model(train, "QB", qbvar)
RBmodel = model(train, "RB", rbvar)
WRmodel = model(train, "WR", recvar)
TEmodel = model(train, "TE", recvar)
testModel(QBmodel, test, train, "QB", qbvar)
testModel(RBmodel, test, train, "RB", rbvar)
testModel(WRmodel, test, train, "WR", recvar)
testModel(TEmodel, test, train, "TE", recvar)

The accuracy of the QB model is 0.7806590538367456
The QB model has an average error of -1.156 PPG and an average absolute error of 4.12 PPG
The QB model has a median error of -0.377 PPG and a median absolute error of 3.633 PPG


The accuracy of the RB model is 0.5436434451016764
The RB model has an average error of 0.446 PPG and an average absolute error of 2.961 PPG
The RB model has a median error of 0.76 PPG and a median absolute error of 2.157 PPG


The accuracy of the WR model is 0.6035288454694109
The WR model has an average error of 0.771 PPG and an average absolute error of 3.176 PPG
The WR model has a median error of 1.072 PPG and a median absolute error of 2.749 PPG


The accuracy of the TE model is 0.5879605084276767
The TE model has an average error of 0.179 PPG and an average absolute error of 2.287 PPG
The TE model has a median error of 0.536 PPG and a median absolute error of 1.745 PPG




In [20]:
# Use model to make predictions and check predictions
QBStats = useModel(QBmodel, use, actuals, 'QB', qbvar)
RBStats = useModel(RBmodel, use, actuals,'RB', rbvar)
WRStats = useModel(WRmodel, use, actuals, 'WR', recvar)
TEStats = useModel(TEmodel, use, actuals, 'TE', recvar)

The QB model has an average error of -1.058 PPG and an average absolute error of 4.111 PPG
The QB model has a median error of -0.43 PPG and a median absolute error of 3.626 PPG
The RB model has an average error of 0.16 PPG and an average absolute error of 2.975 PPG
The RB model has a median error of 0.612 PPG and a median absolute error of 2.462 PPG
The WR model has an average error of 0.346 PPG and an average absolute error of 3.226 PPG
The WR model has a median error of 0.738 PPG and a median absolute error of 2.825 PPG
The TE model has an average error of -0.015 PPG and an average absolute error of 2.179 PPG
The TE model has a median error of 0.4 PPG and a median absolute error of 1.671 PPG


In [21]:
#pd.set_option('display.max_rows', None)
QBStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
0,DakPrescott,QB,18.12,27.12,-8.999699,308.04,8.999699
1,LamarJackson,QB,17.994,22.187,-4.192946,305.898,4.192946
2,PatrickMahomes,QB,17.8,24.96,-7.160396,302.6,7.160396
3,DanielJones,QB,17.704,12.857,4.846819,300.968,4.846819
4,BakerMayfield,QB,17.365,15.538,1.826792,295.205,1.826792
5,KylerMurray,QB,17.212,23.669,-6.456676,292.604,6.456676
6,MattRyan,QB,17.134,17.65,-0.516151,291.278,0.516151
7,PhilipRivers,QB,16.836,15.0,1.836199,286.212,1.836199
8,JaredGoff,QB,16.658,16.0,0.65761,283.186,0.65761
9,DerekCarr,QB,16.541,17.006,-0.464699,281.197,0.464699


In [22]:
QBStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
54,BrianHoyer,QB,1.928,2.0,-0.071723,32.776,0.071723
11,MatthewStafford,QB,16.425,16.288,0.136628,279.225,0.136628
34,JeffDriskel,QB,9.637,10.033,-0.396038,163.829,0.396038
15,MitchellTrubisky,QB,15.818,15.37,0.447998,268.906,0.447998
9,DerekCarr,QB,16.541,17.006,-0.464699,281.197,0.464699
51,JarrettStidham,QB,3.079,2.58,0.498835,52.343,0.498835
6,MattRyan,QB,17.134,17.65,-0.516151,291.278,0.516151
8,JaredGoff,QB,16.658,16.0,0.65761,283.186,0.65761
49,ChaseDaniel,QB,3.716,3.05,0.666133,63.172,0.666133
16,CarsonWentz,QB,15.808,16.533,-0.725187,268.736,0.725187


In [23]:
RBStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
0,ChristianMcCaffrey,RB,24.877,30.133,-5.256442,422.909,5.256442
1,DalvinCook,RB,19.172,24.129,-4.956564,325.924,4.956564
2,AustinEkeler,RB,19.044,16.530,2.513755,323.748,2.513755
3,SaquonBarkley,RB,17.018,7.700,9.317700,289.306,9.317700
4,LeonardFournette,RB,16.742,10.154,6.588040,284.614,6.588040
...,...,...,...,...,...,...,...
100,NickBellore,RB,1.203,0.150,1.053456,20.451,1.053456
101,AnthonySherman,RB,0.872,0.638,0.233776,14.824,0.233776
102,TonyBrooksJames,RB,0.740,0.400,0.340181,12.580,0.340181
103,SenorisePerry,RB,0.588,0.129,0.459227,9.996,0.459227


In [24]:
RBStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
98,RyanNall,RB,1.312,1.294,0.017835,22.304,0.017835
32,JamaalWilliams,RB,9.057,9.079,-0.021876,153.969,0.021876
75,TravisHomer,RB,3.707,3.644,0.062662,63.019,0.062662
101,AnthonySherman,RB,0.872,0.638,0.233776,14.824,0.233776
39,AdrianPeterson,RB,8.023,7.781,0.241973,136.391,0.241973
...,...,...,...,...,...,...,...
28,DavidMontgomery,RB,9.488,17.653,-8.165277,161.296,8.165277
77,JeffWilson,RB,3.579,11.858,-8.278846,60.843,8.278846
7,AlvinKamara,RB,16.092,25.187,-9.095231,273.564,9.095231
3,SaquonBarkley,RB,17.018,7.700,9.317700,289.306,9.317700


In [25]:
WRStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
0,MichaelGallup,WR,18.693,10.831,7.862182,317.781,7.862182
1,MikeEvans,WR,17.786,15.538,2.248477,302.362,2.248477
2,MichaelThomas,WR,17.701,11.986,5.715484,300.917,5.715484
3,JulioJones,WR,17.352,16.233,1.119103,294.984,1.119103
4,ChrisGodwin,WR,17.040,15.917,1.123246,289.680,1.123246
...,...,...,...,...,...,...,...
139,AndreRoberts,WR,0.667,0.207,0.460357,11.339,0.460357
140,KhaDarelHodge,WR,0.291,3.222,-2.930590,4.947,2.930590
141,ChrisHogan,WR,0.288,5.160,-4.872237,4.896,4.872237
142,LeeSmith,WR,-0.440,1.950,-2.389875,-7.480,2.389875


In [26]:
WRStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
91,ScottMiller,WR,6.419,6.406,0.012856,109.123,0.012856
26,JarvisLandry,WR,12.605,12.533,0.072052,214.285,0.072052
84,WillieSnead,WR,7.060,7.246,-0.185562,120.020,0.185562
41,JohnBrown,WR,11.000,10.756,0.243659,187.000,0.243659
110,LaquonTreadwell,WR,4.851,4.580,0.270918,82.467,0.270918
...,...,...,...,...,...,...,...
39,DeontayBurnett,WR,11.067,2.450,8.616648,188.139,8.616648
32,AudenTate,WR,12.129,3.222,8.907286,206.193,8.907286
56,DedeWestbrook,WR,9.297,-0.300,9.597265,158.049,9.597265
6,DavanteAdams,WR,15.726,25.600,-9.874162,267.342,9.874162


In [27]:
TEStats

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
0,GeorgeKittle,TE,15.450,15.638,-0.188481,262.650,0.188481
1,TravisKelce,TE,15.068,20.853,-5.784535,256.156,5.784535
2,DarrenWaller,TE,14.226,17.412,-3.185645,241.842,3.185645
3,MarkAndrews,TE,13.603,12.150,1.452762,231.251,1.452762
4,ZachErtz,TE,13.168,7.045,6.122882,223.856,6.122882
...,...,...,...,...,...,...,...
76,PharaohBrown,TE,1.065,3.254,-2.188610,18.105,2.188610
77,ChrisManhertz,TE,0.890,0.700,0.189672,15.130,0.189672
78,MyColePruitt,TE,0.700,2.536,-1.835744,11.900,1.835744
79,DeonYelder,TE,0.520,0.757,-0.237306,8.840,0.237306


In [28]:
TEStats.sort_values(by=['AbsDifference'])

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference
36,DarrenFells,TE,4.800,4.762,0.038107,81.600,0.038107
53,MaxxWilliams,TE,2.644,2.689,-0.044693,44.948,0.044693
63,DarrellDaniels,TE,1.873,1.933,-0.059732,31.841,0.059732
0,GeorgeKittle,TE,15.450,15.638,-0.188481,262.650,0.188481
77,ChrisManhertz,TE,0.890,0.700,0.189672,15.130,0.189672
...,...,...,...,...,...,...,...
4,ZachErtz,TE,13.168,7.045,6.122882,223.856,6.122882
40,CJUzomah,TE,4.196,11.350,-7.154172,71.332,7.154172
72,DaltonSchultz,TE,1.264,9.156,-7.892448,21.488,7.892448
50,LoganThomas,TE,3.076,11.038,-7.961827,52.292,7.961827


In [29]:
def combineData(listOfDataFrames):
    # creates a df which contains the dataframes in the list which is passed in
    df = pd.concat(listOfDataFrames)
    df = df.sort_values(by = ['Predicted PPR'], ascending = False)

    # gives players an overall ranking
    rankings = []
    rank = 1
    for index, row in df.iterrows():
        rankings.append(rank)
        rank += 1

    df['Ovr Rank'] = rankings

    df = df.reset_index(drop = True)
    return df

In [30]:
data = [QBStats, RBStats, WRStats, TEStats]
fullData = combineData(data)
fullData.to_csv('predictedStats.csv')

In [31]:
fullData

Unnamed: 0,Name,Pos,Predicted PPG,ppr_g,Difference,Predicted PPR,AbsDifference,Ovr Rank
0,ChristianMcCaffrey,RB,24.877,30.133,-5.256442,422.909,5.256442,1
1,DalvinCook,RB,19.172,24.129,-4.956564,325.924,4.956564,2
2,AustinEkeler,RB,19.044,16.530,2.513755,323.748,2.513755,3
3,MichaelGallup,WR,18.693,10.831,7.862182,317.781,7.862182,4
4,DakPrescott,QB,18.120,27.120,-8.999699,308.040,8.999699,5
...,...,...,...,...,...,...,...,...
383,KhaDarelHodge,WR,0.291,3.222,-2.930590,4.947,2.930590,384
384,ChrisHogan,WR,0.288,5.160,-4.872237,4.896,4.872237,385
385,TrevonWesco,TE,-0.421,0.125,-0.546415,-7.157,0.546415,386
386,LeeSmith,WR,-0.440,1.950,-2.389875,-7.480,2.389875,387
