In [1]:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px


In [2]:
def getStat(year,subject, start):
    
    url = f'https://www.pro-football-reference.com/years/{year}/{subject}.htm'
    html = urlopen(url)
    page = BeautifulSoup(html)
    
    column_headers = page.findAll('tr')[start]
    column_headers = [i.getText() for i in column_headers.findAll('th')]

    #Collect Rows
    rows = page.findAll('tr')[start+1:]
    #Gets stats
    qb_stats = []
    for i in range(len(rows)):
        qb_stats.append([col.getText() for col in rows[i].findAll('td')])

    return pd.DataFrame(qb_stats, columns = column_headers[1:])

#method for finding fantasy football stat per game average 
def ffPoints(df,name):    
    df[f'ff{name}'] = (((df['G'] * df['Y/G']) * 0.04)
                      + (df['TD'] * 4)
                      + (df['Int'] * -2)
                      + (df['RushYards'] * .1) 
                      + ((df['Fmb'] - df['FR']) * -2)
                      + (df['RushTD'] * 6)) / df['G'] 
    return df[[f'ff{name}']]


#Takes Passing DF and uses rushing DF to get QB Rushing yards/tds and defensive to get Fumbles and fumbles recovered
def fantasyDF(df1,df2, df3,year):
    #Take needed columns from passingDF, drop non qbs
    df1 = df1[['Player','Pos','G','Y/G','TD', 'Int','Sk','Yds','TD']]
    df1['Pos'] = df1['Pos'].str.upper()
    df1 = df1.where(df1['Pos'] == 'QB').dropna()

    #Edit rushingDF, defenseDF to get only columns needed for fantasy score
    df2 = df2[['Player', 'Yds', "TD", 'Fmb']]
    df2 = df2.rename(columns = {'Yds': 'RushYards', 'TD': 'RushTD'})
    df3 = df3[['Player','FR']]

    #Join all three to DF1, fillna with 0 
    df1 = df1.set_index('Player').join(df2.set_index('Player')).join(df3.set_index('Player'))
    df1 = df1.fillna(0)
    
    #Make all Columns into Floats
    for i in df1.columns[1:]:
        df1[i] = df1[i].astype(float)
    df1 = df1.loc[:,~df1.columns.duplicated()]
    df1 = ffPoints(df1,year)
    
    #Remove any naming quirks
    df1 = df1.rename(index = lambda x: x.replace("*", " "))
    df1 = df1.rename(index = lambda x: x.replace("+", " "))
    df1 = df1.rename(index = lambda x: x.strip())
    
    return df1

    

In [3]:
#Get 2019 stats for function
df19 = getStat("2019", "passing", 0)
r19 = getStat("2019","rushing",1)
fumble19 = getStat("2019","defense",1)

#Get 2020 stats for method
df20 = getStat("2020", "passing", 0)
r20 = getStat("2020","rushing",1)
fumble20 = getStat("2020","defense",1)


In [5]:
#call function to create the two DFs
ff19 = fantasyDF(df19,r19,fumble19,'2019')
ff20 = fantasyDF(df20,r20,fumble20,'2020')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [6]:
#subtract to find the difference between this year and last, dropnas to remove any players that weren't in both years
ff20['ff2019'] = ff19['ff2019']
ff20['Change'] = ff20['ff2020'] - ff19['ff2019']
ff20 = ff20.dropna()
ff20 = ff20.sort_values(['Change'], ascending = False)

In [7]:
ff20

Unnamed: 0_level_0,ff2020,ff2019,Change
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Josh Allen,31.306667,16.9115,14.395167
Russell Wilson,32.665333,19.9135,12.751833
Kyler Murray,26.713333,17.454,9.259333
Patrick Mahomes,28.638667,20.358857,8.27981
Aaron Rodgers,24.694667,17.02275,7.671917
Dak Prescott,28.306667,20.73725,7.569417
Jared Goff,19.908,14.846,5.062
Matt Ryan,21.278667,17.421333,3.857333
Teddy Bridgewater,13.545333,9.829778,3.715556
Dwayne Haskins,10.565333,7.856889,2.708444
