In [259]:
#####################################################
## Basketball/Football Reference Data Classes
import pandas as pd
import requests
from bs4 import BeautifulSoup
import pickle

BASKETBALL_REFERENCE_URL_FORMAT_STRING = 'https://www.basketball-reference.com/players/{0}/{1}.html'
FOOTBALL_REFERENCE_URL_FORMAT_STRING = 'https://www.pro-football-reference.com/players/{0}/{1}.htm'
FOOTBALL_FANTASY_POINTS_URL_FORMAT_STRING = 'https://www.fantasypros.com/nfl/projections/{0}.php'
FOOTBALL_STATS_HISTORY_URL_FORMAT_STRING = 'https://www.fantasypros.com/nfl/stats/{0}.php'
ESPN_RANKINGS_URL = 'https://www.espn.com/fantasy/basketball/story/_/id/30662776/updated-fantasy-basketball-head-head-categories-rankings'
FOOTBALL_RANKINGS_URL = 'https://www.cbssports.com/fantasy/football/rankings/'

#####################################################
# Helper methods
def get_top_200_from_espn():
    res = requests.get(ESPN_RANKINGS_URL)
    soup = BeautifulSoup(res.content,'lxml')
    paragraphs = soup.find_all('p')[7]
    player_names = paragraphs.find_all('a')
    players_list = []
    for player_text in player_names: 
        players_list.append(player_text.get_text())

    return players_list

FOOTBALL_ROOKIES = ['travis-etienne', 'breece-hall', 'dameon-pierce', 'drake-london', 'brian-robinson-jr',
                    'george-pickens', 'michael-carter', 'james-cook', 'kenneth-walker-iii', 'chris-olave',
                    'jahan-dotson', 'isiah-pacheco', 'treylon-burks', 'garrett-wilson', 'tyler-allgeier',
                    'zamir-white', 'romeo-doubs', 'rachaad-white', 'jalen-tolbert', 'skyy-moore', 'mike-davis',
                    'danny-gray', 'jameson-williams', 'wandale-robinson', 'isaiah-spiller', 'tyrion-davisprice',
                    'gus-edwards', 'christian-watson', 'alec-pierce', 'kyle-philips', 'velus-jones-jr',
                    'jaylen-warren', 'irv-smith', 'david-bell', 'jamaal-williams']

def get_top_200_football():
    res = requests.get(FOOTBALL_RANKINGS_URL)
    soup = BeautifulSoup(res.content,'lxml')
    player_paragraphs = soup.find_all('div', {"class": "player"})
    players_list =[]
    for p in player_paragraphs:
        player_link = p.find_all('a')
        if len(player_link) > 0:
            player_attrs = player_link[0].attrs
            if 'href' in player_attrs:
                player_url = player_attrs['href']
                name_length = player_url[::-1][1:].index('/')
                player_name = player_url[len(player_url) - name_length - 1:len(player_url) - 1]
                if not player_name in FOOTBALL_ROOKIES:
                    players_list.append(player_name.replace('-', ' '))
    return players_list
            

#####################################################
# Collector Class for Basketball/Football Reference
class SportsDataCollector:
    def __init__(self, sport_type):
        self._player_names = []
        self._player_urls = dict()
        self._sport_type = sport_type
        self._yby_column_headers = ['Season', 'Age', 'Tm', 'Lg', 'Pos', 'G', 'GS', 'MP', 'FG',
                                    'FGA', 'FG%', '3P', '3PA', '3P%', '2P', '2PA', '2P%', 'eFG%', 'FT',
                                    'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF',
                                    'PTS']
        self._gbg_column_headers = ['Rk', 'G', 'Date', 'Age', 'Tm', 'Opp', 'GS', 'MP', 'FG',
                                    'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB',
                                    'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'GmSc', '+/-']
    
    ###################################################
    # Data Collection Utility Methods
    def get_game_stats(self, player, years=['2022'], yearly_df=None):
        '''
            This method only supports basketball for now, you can either
            specify a yearly data df which will find the game by game stats
            for each year that has a row, or a list 'years' (years are last
            year in season as that is what breference website uses)
        '''
        # Only works for basketball
        scraped_game_stats = dict()
        
        years_to_find = []
        if yearly_df is not None:
            for idx, row in yearly_df.iterrows():
                ending_year = row['Season'].split('-')[1]

                if int(ending_year) > 80:
                    ending_year = f'19{ending_year}'
                else:
                    ending_year = f'20{ending_year}'
                years_to_find.append(ending_year)
        else:
            years_to_find = years
                
        for ending_year in years_to_find:
            yearly_url = self._generate_basketball_reference_url(player).replace('.html', '')
            year_games_url = f'{yearly_url}/gamelog/{ending_year}'
            print(year_games_url)
            
            res = requests.get(year_games_url)
            soup = BeautifulSoup(res.content, 'lxml')
            table = soup.find_all('table')[7]
            df = pd.read_html(str(table))[0]
            
            df = self.clean_basketball_game_stats_df(df)
            
            scraped_game_stats[ending_year] = df
        
        return scraped_game_stats

    def get_stats(self, players):
        stats = dict()

        for player in players:
            player_stats = self._get_yearly_stats_player(player)
            stats[player] = player_stats 
        
        return stats
    
    def _get_yearly_stats_player(self, player): 
        if self._sport_type == 'basketball':
            url = self._generate_basketball_reference_url(player)
            res = requests.get(url)
            soup = BeautifulSoup(res.content,'lxml')
            tables = soup.find_all('table')
            self._player_urls[player] = url
            print(url)
            
            # Find whether site has table for projections (blocks one I want)
            if soup.find_all('h2')[0].get_text() == '2022-23 Projection':
                table = tables[1]
            else:
                table = tables[0]
                
            df = pd.read_html(str(table))[0]
            
            dfs = self.clean_basketball_yearly_stats_df(df)
        elif self._sport_type == 'football':
            urls = self._generate_football_reference_urls(player)
            print(urls[0])
            fp_res = requests.get(urls[0])
            fp_soup = BeautifulSoup(fp_res.content,'lxml')
#             sh_res = requests.get(urls[1])
#             sh_soup = BeautifulSoup(sh_res.content,'lxml')
            
            fp_df = pd.read_html(str(fp_soup.find_all('table')[1]))[0]
#             sh_df = sh_soup.find_all('table')[0]
            
            dfs = self.clean_football_yearly_stats_df((fp_df, []))

        return dfs
    
    ###################################################
    # Data Cleaning Utility Methods
    def clean_basketball_game_stats_df(self, df):
        # Fill with proper values
        df['FG%'] = df['FG%'].fillna(0.5)
        df['FT%'] = df['FT%'].fillna(0.5)
        df = df.fillna('0')
        
        # Remove irrelevant rows
        df.G = df.G.astype(str)
        df.PTS = df.PTS.astype(str)
        df = df.loc[~df.PTS.str.contains('Did Not Dress')]
        df = df.loc[~df.PTS.str.contains('Did Not Play')]
        df = df.loc[~df.PTS.str.contains('Not With Team')]
        df = df.loc[~df.PTS.str.contains('Player Suspended')]
        df = df.loc[~df.PTS.str.contains('Inactive')]
        df = df.loc[~df.G.str.contains('G')]
        
        # Set df types
        text_keys = ['Rk', 'Date', 'Age', 'Tm', 'Opp', 'MP']
        numerical_keys = ['G', 'GS', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%',
                          'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST',
                          'STL', 'BLK', 'TOV', 'PF', 'PTS', 'GmSc', '+/-']
        for key in text_keys:
            df[key] = df[key].astype(str)
        for key in numerical_keys:
            df[key] = df[key].astype(float)
        
        return df
    
    def clean_basketball_yearly_stats_df(self, df):
        df = df.fillna('0')
        
        # Remove badly formatted / irrelevant rows
        df = df.loc[df.Season.str.contains('-')]
        df.Tm = df.Tm.astype(str)
        df = df.loc[~df.Tm.str.contains('Did Not Play')]
        
        # Set df types
        text_keys = ['Season', 'Age', 'Lg', 'Pos']
        numerical_keys = ['G', 'GS', 'MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', '2P',
                          '2PA', '2P%', 'eFG%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB',
                          'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']
        for key in text_keys:
            df[key] = df[key].astype(str)
        for key in numerical_keys:
            df[key] = df[key].astype(float)

        # Join rows that belong to the same season
        rows_to_drop = []
        prev_exists = False
        for idx, row in df.iterrows():
            if prev_exists:
                # Equal so join the rows to next and drop prev
                if prev.Season == row.Season:
                    # Positions
                    prev_pos = prev.Pos.split(',')
                    cur_pos = row.Pos.split(',')
                    for pos in prev_pos:
                        if not pos in cur_pos:
                            cur_pos.append(pos)
                    row.Pos = ','.join(cur_pos)

                    # Averages
                    for label in ['MP', 'FG', 'FGA', 'FG%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS']:
                        row[label] = (float(row[label]) * float(row.G) + float(prev[label]) * float(prev.G)) / (float(row.G) + float(prev.G))

                    # GP/GS
                    row.G = float(row.G) + float(prev.G)
                    row.GS = float(row.GS) + float(prev.GS)

                    rows_to_drop.append(prev_idx)
            prev = row
            prev_idx = idx
            prev_exists = True
                
        df.drop(rows_to_drop, inplace=True)
        
        return df
        
    def clean_football_yearly_stats_df(self, dfs):
        fp_df, sh_df = dfs
        
        # Clean Fantasy Points DataFrame
        fp_df.fillna('0')
        numerical_keys = ['Projected Pts', 'Actual Pts', '+/-']
        text_keys = ['Exceeded Expectations?']
        for key in numerical_keys:
            fp_df[key] = fp_df[key].astype(float)
        for key in text_keys:
            fp_df[key] = fp_df[key].astype(str)

        # Clean Stats History DataFrame
        fp_df.drop('Unnamed: 0', axis=1, inplace=True)
        
        # Ignoring stats for now
        
        # Merge the Two DataFrames
        # Ignoring stats for now
        
        return (fp_df, None)
        
    
    ###################################################
    # URL Utility Methods
    def generate_basketball_urls(self, players):
        urls = []

        for player in players:
            url = self._generate_basketball_reference_url(player)
            urls.append(url)

        return urls

    def _generate_basketball_reference_url(self, player_name, url_id=None):
        split_name = player_name.split(' ')
        print(split_name)
        
        url_exceptions = {
            'Jaren Jackson Jr.': 'https://www.basketball-reference.com/players/j/jacksja02.html',
            'Robert Williams III': 'https://www.basketball-reference.com/players/w/williro04.html',
            'Clint Capela': 'https://www.basketball-reference.com/players/c/capelca01.html',
            "D'Angelo Russell": 'https://www.basketball-reference.com/players/r/russeda01.html',
            'Gary Trent Jr.': 'https://www.basketball-reference.com/players/t/trentga02.html',
            'Wendell Carter Jr.': 'https://www.basketball-reference.com/players/c/cartewe01.html',
            'Kelly Oubre Jr.': 'https://www.basketball-reference.com/players/o/oubreke01.html',
            'Marvin Bagley III': 'https://www.basketball-reference.com/players/b/baglema01.html',
            'Lonnie Walker IV': 'https://www.basketball-reference.com/players/w/walkelo01.html',
            'Marcus Morris Sr.': 'https://www.basketball-reference.com/players/m/morrima03.html',
            'Kevin Porter Jr.': 'https://www.basketball-reference.com/players/p/porteke02.html',
            'Larry Nance Jr.': 'https://www.basketball-reference.com/players/n/nancela02.html',
            'Maxi Kleber': 'https://www.basketball-reference.com/players/k/klebima01.html',
            'T.J. Warren': 'https://www.basketball-reference.com/players/w/warretj01.html',
            'P.J. Washington': 'https://www.basketball-reference.com/players/w/washipj01.html',
            'Harrison Barnes': 'https://www.basketball-reference.com/players/b/barneha02.html',
            'Tobias Harris': 'https://www.basketball-reference.com/players/h/harrito02.html',
            'Jaylen Brown': 'https://www.basketball-reference.com/players/b/brownja02.html',
            'Cameron Johnson': 'https://www.basketball-reference.com/players/j/johnsca02.html',
            'Keldon Johnson': 'https://www.basketball-reference.com/players/j/johnske04.html',
            'Anthony Davis': 'https://www.basketball-reference.com/players/d/davisan02.html',
            'Cam Thomas': 'https://www.basketball-reference.com/players/t/thomaca02.html'
        }
        if player_name in url_exceptions.keys():
            return url_exceptions[player_name]

        if len(split_name) == 2:
            last_first_five = split_name[1][:5].lower()
            first_first_two = split_name[0][:2].lower()
            last_first = split_name[1][:1].lower()
            breference_id = f'0{url_id}' if url_id else '01'

            return BASKETBALL_REFERENCE_URL_FORMAT_STRING.format(last_first, last_first_five + first_first_two + breference_id)
        else:
            return None
        
    def _generate_football_reference_urls(self, player_name):
        name_url_safe = player_name.replace(' ', '-')
        
        if name_url_safe == 'najee-harris':
            name_url_safe = 'najee-harris-rb'
        elif name_url_safe == 'dj-moore':
            name_url_safe = 'dj-moore-wr'
        elif name_url_safe == 'mike-williams':
            name_url_safe = 'mike-williams-wr'
        elif name_url_safe == 'josh-allen':
            name_url_safe = 'josh-allen-qb'
        elif name_url_safe == 'juju-smithschuster':
            name_url_safe = 'juju-smith-schuster'
        elif name_url_safe == 'clyde-edwardshelaire':
            name_url_safe = 'clyde-edwards-helaire'
        elif name_url_safe == 'amonra-st-brown':
            name_url_safe = 'amonra-stbrown'
        elif name_url_safe == 'elijah-mitchell':
            name_url_safe = 'elijah-mitchell-rb'
        elif name_url_safe == 'damien-harris':
            name_url_safe = 'damien-harris-rb'
        elif name_url_safe == 'donovan-peoplesjones':
            name_url_safe = 'donovan-peoples-jones'
        elif name_url_safe == 'jeff-wilson':
            name_url_safe = 'jeffery-wilson'
        elif name_url_safe == 'ronald-jones':
            name_url_safe = 'ronald-jones-ii'
        elif name_url_safe == 'robbie-anderson':
            name_url_safe = 'robby-anderson'
        elif name_url_safe == 'odell-beckham-jr':
            name_url_safe = 'odell-beckham'
            
        
        return (FOOTBALL_FANTASY_POINTS_URL_FORMAT_STRING.format(name_url_safe),
                   FOOTBALL_STATS_HISTORY_URL_FORMAT_STRING.format(name_url_safe))

    ###################################################
    # Getter/Setter Methods
    def set_sport_type(self, sport_type):
        self._sport_type = sport_type
    
    def get_basketball_column_headers(self):
        return (self._yby_column_headers, self._gbg_column_headers)
    
    def get_sport_type(self):
        return self._sport_type
        
    def get_player_names(self):
        return self._player_names
    
    def get_player_urls(self):
        return self._player_urls


In [106]:
sports_data = SportsDataCollector('basketball')

In [107]:
top_200 = get_top_200_from_espn()
top_200

['Nikola Jokic',
 'Karl-Anthony Towns',
 'Giannis Antetokounmpo',
 'Kevin Durant',
 'Joel Embiid',
 'Stephen Curry',
 'James Harden',
 'Trae Young',
 'Luka Doncic',
 'DeMar DeRozan',
 'Dejounte Murray',
 'Jayson Tatum',
 'LaMelo Ball',
 'LeBron James',
 'Nikola Vucevic',
 'Bam Adebayo',
 'Fred VanVleet',
 'Pascal Siakam',
 'Tyrese Haliburton',
 'Jaren Jackson Jr.',
 'Ja Morant',
 'Devin Booker',
 'Anthony Edwards',
 'Rudy Gobert',
 'Darius Garland',
 'Jimmy Butler',
 'Khris Middleton',
 'Donovan Mitchell',
 'Jaylen Brown',
 'Jonas Valanciunas',
 'Robert Williams III',
 'Brandon Ingram',
 'Domantas Sabonis',
 'Zach LaVine',
 'Julius Randle',
 'Cade Cunningham',
 'Jrue Holiday',
 'Evan Mobley',
 'John Collins',
 'Clint Capela',
 'Terry Rozier',
 "De'Aaron Fox",
 'Desmond Bane',
 'Christian Wood',
 'Kyrie Irving',
 'Jalen Brunson',
 'Josh Hart',
 'Tobias Harris',
 'Tyrese Maxey',
 'Miles Bridges',
 'CJ McCollum',
 'Harrison Barnes',
 'Anfernee Simons',
 'Shai Gilgeous-Alexander',
 "D'Ange

In [108]:
stats = sports_data.get_stats(top_200)
stats

['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01.html
['Karl-Anthony', 'Towns']
https://www.basketball-reference.com/players/t/townska01.html
['Giannis', 'Antetokounmpo']
https://www.basketball-reference.com/players/a/antetgi01.html
['Kevin', 'Durant']
https://www.basketball-reference.com/players/d/duranke01.html
['Joel', 'Embiid']
https://www.basketball-reference.com/players/e/embiijo01.html
['Stephen', 'Curry']
https://www.basketball-reference.com/players/c/curryst01.html
['James', 'Harden']
https://www.basketball-reference.com/players/h/hardeja01.html
['Trae', 'Young']
https://www.basketball-reference.com/players/y/youngtr01.html
['Luka', 'Doncic']
https://www.basketball-reference.com/players/d/doncilu01.html
['DeMar', 'DeRozan']
https://www.basketball-reference.com/players/d/derozde01.html
['Dejounte', 'Murray']
https://www.basketball-reference.com/players/m/murrade01.html
['Jayson', 'Tatum']
https://www.basketball-reference.com/players/t/tatumja01.html


https://www.basketball-reference.com/players/g/giddejo01.html
['Caris', 'LeVert']
https://www.basketball-reference.com/players/l/leverca01.html
['Jarred', 'Vanderbilt']
https://www.basketball-reference.com/players/v/vandeja01.html
['Ayo', 'Dosunmu']
https://www.basketball-reference.com/players/d/dosunay01.html
['Coby', 'White']
https://www.basketball-reference.com/players/w/whiteco01.html
['Lonzo', 'Ball']
https://www.basketball-reference.com/players/b/balllo01.html
['Montrezl', 'Harrell']
https://www.basketball-reference.com/players/h/harremo01.html
['Davion', 'Mitchell']
https://www.basketball-reference.com/players/m/mitchda01.html
["Jae'Sean", 'Tate']
https://www.basketball-reference.com/players/t/tateja01.html
['Bogdan', 'Bogdanovic']
https://www.basketball-reference.com/players/b/bogdabo01.html
['Jalen', 'Green']
https://www.basketball-reference.com/players/g/greenja01.html
['Kevin', 'Huerter']
https://www.basketball-reference.com/players/h/huertke01.html
['Al', 'Horford']
https:/

https://www.basketball-reference.com/players/s/sengual01.html


{'Nikola Jokic':     Season   Age   Tm   Lg Pos     G    GS    MP    FG   FGA  ...    FT%  ORB  \
 0  2015-16  20.0  DEN  NBA   C  80.0  55.0  21.7   3.8   7.5  ...  0.811  2.3   
 1  2016-17  21.0  DEN  NBA   C  73.0  59.0  27.9   6.8  11.7  ...  0.825  2.9   
 2  2017-18  22.0  DEN  NBA   C  75.0  73.0  32.6   6.7  13.5  ...  0.850  2.6   
 3  2018-19  23.0  DEN  NBA   C  80.0  80.0  31.3   7.7  15.1  ...  0.821  2.9   
 4  2019-20  24.0  DEN  NBA   C  73.0  73.0  32.0   7.7  14.7  ...  0.817  2.3   
 5  2020-21  25.0  DEN  NBA   C  72.0  72.0  34.6  10.2  18.0  ...  0.868  2.8   
 6  2021-22  26.0  DEN  NBA   C  74.0  74.0  33.5  10.3  17.7  ...  0.810  2.8   
 
     DRB   TRB  AST  STL  BLK  TOV   PF   PTS  
 0   4.7   7.0  2.4  1.0  0.6  1.3  2.6  10.0  
 1   6.9   9.8  4.9  0.8  0.8  2.3  2.9  16.7  
 2   8.1  10.7  6.1  1.2  0.8  2.8  2.8  18.5  
 3   8.0  10.8  7.3  1.4  0.7  3.1  2.9  20.1  
 4   7.5   9.7  7.0  1.2  0.6  3.1  3.0  19.9  
 5   8.0  10.8  8.3  1.3  0.7  3.1  2.

In [96]:
# Quicksave player urls
player_urls = sports_data.get_player_urls()
sports_data._get_yearly_stats_player('Cameron Johnson')

['Cameron', 'Johnson']
https://www.basketball-reference.com/players/j/johnsca02.html


Unnamed: 0,Season,Age,Tm,Lg,Pos,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,2019-20,23.0,PHO,NBA,PF,57.0,9.0,22.0,3.1,7.1,...,0.807,0.9,2.4,3.3,1.2,0.6,0.4,0.6,1.5,8.8
1,2020-21,24.0,PHO,NBA,PF,60.0,11.0,24.0,3.4,8.1,...,0.847,0.6,2.8,3.3,1.4,0.6,0.3,0.7,1.4,9.6
2,2021-22,25.0,PHO,NBA,PF,66.0,16.0,26.2,4.2,9.2,...,0.86,0.6,3.5,4.1,1.5,0.9,0.2,0.7,1.7,12.5


In [82]:
# Display example
stats['Zion Williamson']

Unnamed: 0,Season,Age,Tm,Lg,Pos,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,2019-20,19.0,NOP,NBA,PF,24.0,24.0,27.8,8.8,15.0,...,0.64,2.7,3.6,6.3,2.1,0.7,0.4,2.5,1.8,22.5
1,2020-21,20.0,NOP,NBA,PF,61.0,61.0,33.2,10.4,17.0,...,0.698,2.7,4.5,7.2,3.7,0.9,0.6,2.7,2.2,27.0


In [109]:
#####################################################################
## Save all yearly stats as pickle files
for player in stats.keys():
    file_safe_name = player.replace(' ', '')
    stats[player].to_pickle(f'PlayerPickles/{file_safe_name}YearStats.pkl')

In [51]:
#####################################################################
## Load all yearly stats as pickle files
stats = dict()
for player in top_200:
    filename = 'PlayerPickles/{0}YearStats.pkl'.format(player.replace(' ', ''))
    with open(filename, 'rb') as player_pickle:
        stats[player] = pickle.load(player_pickle)

Unnamed: 0,Season,Age,Tm,Lg,Pos,G,GS,MP,FG,FGA,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,2019-20,19,NOP,NBA,PF,24,24,668.0,210.0,360,...,0.64,64.0,86.0,150.0,50.0,16.0,9.0,59.0,42.0,540.0
1,2020-21,20,NOP,NBA,PF,61,61,2026.0,634.0,1037,...,0.698,167.0,274.0,441.0,226.0,57.0,39.0,167.0,135.0,1647.0


In [111]:
########################################################################
## Get Game Stats for All Players
game_stats = dict()
for player in player_urls.keys():
    game_stats[player] = sports_data.get_game_stats(player, yearly_df=stats[player])

['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01/gamelog/2016
['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01/gamelog/2017
['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01/gamelog/2018
['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01/gamelog/2019
['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01/gamelog/2020
['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01/gamelog/2021
['Nikola', 'Jokic']
https://www.basketball-reference.com/players/j/jokicni01/gamelog/2022
['Karl-Anthony', 'Towns']
https://www.basketball-reference.com/players/t/townska01/gamelog/2016
['Karl-Anthony', 'Towns']
https://www.basketball-reference.com/players/t/townska01/gamelog/2017
['Karl-Anthony', 'Towns']
https://www.basketball-reference.com/players/t/townska01/gamelog/2018
['Karl-Anthony', 'Towns']
https://www.basketball-reference.com/players/t/townska01

['Dejounte', 'Murray']
https://www.basketball-reference.com/players/m/murrade01/gamelog/2017
['Dejounte', 'Murray']
https://www.basketball-reference.com/players/m/murrade01/gamelog/2018
['Dejounte', 'Murray']
https://www.basketball-reference.com/players/m/murrade01/gamelog/2020
['Dejounte', 'Murray']
https://www.basketball-reference.com/players/m/murrade01/gamelog/2021
['Dejounte', 'Murray']
https://www.basketball-reference.com/players/m/murrade01/gamelog/2022
['Jayson', 'Tatum']
https://www.basketball-reference.com/players/t/tatumja01/gamelog/2018
['Jayson', 'Tatum']
https://www.basketball-reference.com/players/t/tatumja01/gamelog/2019
['Jayson', 'Tatum']
https://www.basketball-reference.com/players/t/tatumja01/gamelog/2020
['Jayson', 'Tatum']
https://www.basketball-reference.com/players/t/tatumja01/gamelog/2021
['Jayson', 'Tatum']
https://www.basketball-reference.com/players/t/tatumja01/gamelog/2022
['LaMelo', 'Ball']
https://www.basketball-reference.com/players/b/ballla01/gamelog/20

['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2014
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2015
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2016
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2017
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2018
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2019
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2020
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2021
['Jimmy', 'Butler']
https://www.basketball-reference.com/players/b/butleji01/gamelog/2022
['Khris', 'Middleton']
https://www.basketball-reference.com/players/m/middlkh01/gamelog/2013
['Khris', 'Middleton']
https://www.basketball-reference.com/players/m/middlkh01/gamelog/2014
['Kh

['John', 'Collins']
https://www.basketball-reference.com/players/c/collijo01/gamelog/2021
['John', 'Collins']
https://www.basketball-reference.com/players/c/collijo01/gamelog/2022
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2015
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2016
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2017
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2018
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2019
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2020
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2021
['Clint', 'Capela']
https://www.basketball-reference.com/players/c/capelca01/gamelog/2022
['Terry', 'Rozier']
https://www.basketball-reference.com/players/r/roziete01/gamelog/2016
['Terry', 

['Shai', 'Gilgeous-Alexander']
https://www.basketball-reference.com/players/g/gilgesh01/gamelog/2020
['Shai', 'Gilgeous-Alexander']
https://www.basketball-reference.com/players/g/gilgesh01/gamelog/2021
['Shai', 'Gilgeous-Alexander']
https://www.basketball-reference.com/players/g/gilgesh01/gamelog/2022
["D'Angelo", 'Russell']
https://www.basketball-reference.com/players/r/russeda01/gamelog/2016
["D'Angelo", 'Russell']
https://www.basketball-reference.com/players/r/russeda01/gamelog/2017
["D'Angelo", 'Russell']
https://www.basketball-reference.com/players/r/russeda01/gamelog/2018
["D'Angelo", 'Russell']
https://www.basketball-reference.com/players/r/russeda01/gamelog/2019
["D'Angelo", 'Russell']
https://www.basketball-reference.com/players/r/russeda01/gamelog/2020
["D'Angelo", 'Russell']
https://www.basketball-reference.com/players/r/russeda01/gamelog/2021
["D'Angelo", 'Russell']
https://www.basketball-reference.com/players/r/russeda01/gamelog/2022
['Deandre', 'Ayton']
https://www.basket

['Reggie', 'Jackson']
https://www.basketball-reference.com/players/j/jacksre01/gamelog/2018
['Reggie', 'Jackson']
https://www.basketball-reference.com/players/j/jacksre01/gamelog/2019
['Reggie', 'Jackson']
https://www.basketball-reference.com/players/j/jacksre01/gamelog/2020
['Reggie', 'Jackson']
https://www.basketball-reference.com/players/j/jacksre01/gamelog/2021
['Reggie', 'Jackson']
https://www.basketball-reference.com/players/j/jacksre01/gamelog/2022
['Gary', 'Trent', 'Jr.']
https://www.basketball-reference.com/players/t/trentga02/gamelog/2019
['Gary', 'Trent', 'Jr.']
https://www.basketball-reference.com/players/t/trentga02/gamelog/2020
['Gary', 'Trent', 'Jr.']
https://www.basketball-reference.com/players/t/trentga02/gamelog/2021
['Gary', 'Trent', 'Jr.']
https://www.basketball-reference.com/players/t/trentga02/gamelog/2022
['Andrew', 'Wiggins']
https://www.basketball-reference.com/players/w/wiggian01/gamelog/2015
['Andrew', 'Wiggins']
https://www.basketball-reference.com/players/w

['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2014
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2015
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2016
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2017
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2018
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2019
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2020
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2021
['Dennis', 'Schroder']
https://www.basketball-reference.com/players/s/schrode01/gamelog/2022
['OG', 'Anunoby']
https://www.basketball-reference.com/players/a/anunoog01/gamelog/2018
['OG', 'Anunoby']
https://www.basketball-reference.com/players/a/anunoog01/

['Kevin', 'Love']
https://www.basketball-reference.com/players/l/loveke01/gamelog/2022
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2015
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2016
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2017
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2018
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2019
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2020
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2021
['Marcus', 'Smart']
https://www.basketball-reference.com/players/s/smartma01/gamelog/2022
["De'Andre", 'Hunter']
https://www.basketball-reference.com/players/h/huntede01/gamelog/2020
["De'Andre", 'Hunter']
https://www.basketball-reference.com/players/h/huntede01/gamelog/2021
["De'An

['LaMarcus', 'Aldridge']
https://www.basketball-reference.com/players/a/aldrila01/gamelog/2019
['LaMarcus', 'Aldridge']
https://www.basketball-reference.com/players/a/aldrila01/gamelog/2020
['LaMarcus', 'Aldridge']
https://www.basketball-reference.com/players/a/aldrila01/gamelog/2021
['LaMarcus', 'Aldridge']
https://www.basketball-reference.com/players/a/aldrila01/gamelog/2022
['Herbert', 'Jones']
https://www.basketball-reference.com/players/j/joneshe01/gamelog/2022
['Javonte', 'Green']
https://www.basketball-reference.com/players/g/greenja01/gamelog/2015
['Javonte', 'Green']
https://www.basketball-reference.com/players/g/greenja01/gamelog/2016
['Javonte', 'Green']
https://www.basketball-reference.com/players/g/greenja01/gamelog/2017
['Javonte', 'Green']
https://www.basketball-reference.com/players/g/greenja01/gamelog/2018
['Javonte', 'Green']
https://www.basketball-reference.com/players/g/greenja01/gamelog/2019
['Javonte', 'Green']
https://www.basketball-reference.com/players/g/greenj

['Lonnie', 'Walker', 'IV']
https://www.basketball-reference.com/players/w/walkelo01/gamelog/2020
['Lonnie', 'Walker', 'IV']
https://www.basketball-reference.com/players/w/walkelo01/gamelog/2021
['Lonnie', 'Walker', 'IV']
https://www.basketball-reference.com/players/w/walkelo01/gamelog/2022
['Oshae', 'Brissett']
https://www.basketball-reference.com/players/b/brissos01/gamelog/2020
['Oshae', 'Brissett']
https://www.basketball-reference.com/players/b/brissos01/gamelog/2021
['Oshae', 'Brissett']
https://www.basketball-reference.com/players/b/brissos01/gamelog/2022
['Terance', 'Mann']
https://www.basketball-reference.com/players/m/mannte01/gamelog/2020
['Terance', 'Mann']
https://www.basketball-reference.com/players/m/mannte01/gamelog/2021
['Terance', 'Mann']
https://www.basketball-reference.com/players/m/mannte01/gamelog/2022
['Grayson', 'Allen']
https://www.basketball-reference.com/players/a/allengr01/gamelog/2019
['Grayson', 'Allen']
https://www.basketball-reference.com/players/a/allengr

['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2009
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2010
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2011
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2012
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2013
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2014
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2015
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2016
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2017
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2018
['Patty', 'Mills']
https://www.basketball-reference.com/players/m/millspa01/gamelog/2019
['Patty', 'Mills']
ht

['Kentavious', 'Caldwell-Pope']
https://www.basketball-reference.com/players/c/caldwke01/gamelog/2020
['Kentavious', 'Caldwell-Pope']
https://www.basketball-reference.com/players/c/caldwke01/gamelog/2021
['Kentavious', 'Caldwell-Pope']
https://www.basketball-reference.com/players/c/caldwke01/gamelog/2022
['Kevon', 'Looney']
https://www.basketball-reference.com/players/l/looneke01/gamelog/2016
['Kevon', 'Looney']
https://www.basketball-reference.com/players/l/looneke01/gamelog/2017
['Kevon', 'Looney']
https://www.basketball-reference.com/players/l/looneke01/gamelog/2018
['Kevon', 'Looney']
https://www.basketball-reference.com/players/l/looneke01/gamelog/2019
['Kevon', 'Looney']
https://www.basketball-reference.com/players/l/looneke01/gamelog/2020
['Kevon', 'Looney']
https://www.basketball-reference.com/players/l/looneke01/gamelog/2021
['Kevon', 'Looney']
https://www.basketball-reference.com/players/l/looneke01/gamelog/2022
['Myles', 'Turner']
https://www.basketball-reference.com/players

['Larry', 'Nance', 'Jr.']
https://www.basketball-reference.com/players/n/nancela02/gamelog/2017
['Larry', 'Nance', 'Jr.']
https://www.basketball-reference.com/players/n/nancela02/gamelog/2018
['Larry', 'Nance', 'Jr.']
https://www.basketball-reference.com/players/n/nancela02/gamelog/2019
['Larry', 'Nance', 'Jr.']
https://www.basketball-reference.com/players/n/nancela02/gamelog/2020
['Larry', 'Nance', 'Jr.']
https://www.basketball-reference.com/players/n/nancela02/gamelog/2021
['Larry', 'Nance', 'Jr.']
https://www.basketball-reference.com/players/n/nancela02/gamelog/2022
['Max', 'Strus']
https://www.basketball-reference.com/players/s/strusma01/gamelog/2020
['Max', 'Strus']
https://www.basketball-reference.com/players/s/strusma01/gamelog/2021
['Max', 'Strus']
https://www.basketball-reference.com/players/s/strusma01/gamelog/2022
['Thaddeus', 'Young']
https://www.basketball-reference.com/players/y/youngth01/gamelog/2008
['Thaddeus', 'Young']
https://www.basketball-reference.com/players/y/yo

['Cody', 'Martin']
https://www.basketball-reference.com/players/m/martico01/gamelog/2022
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2011
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2012
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2013
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2014
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2015
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2016
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2017
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2018
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2019
['Eric', 'Bledsoe']
https://www.basketball-reference.com/players/b/bledser01/gamelog/2020
['Eric', 'B

In [112]:
########################################################################
## Save All Game Stats
for player in game_stats.keys():
    for year in game_stats[player].keys():
        unique_id = player.replace(' ', '') + year
        game_stats[player][year].to_pickle(f'PlayerPickles/GameStats/{unique_id}GameStats.pkl')

In [55]:
#####################################################################
## Load All Game Stats
import os, pickle
game_stats = dict()
for filename in os.listdir('PlayerPickles/GameStats/'):
    filename_no_ending = filename.replace('GameStats.pkl', '')
    year = filename_no_ending[len(filename_no_ending) - 4:]
    player = filename_no_ending[:len(filename_no_ending) - 4]
    if player not in game_stats:
        game_stats[player] = dict()
    with open('PlayerPickles/GameStats/' + filename, 'rb') as games_pickle:
        game_stats[player][year] = pickle.load(games_pickle)

In [57]:
game_stats['ZionWilliamson']['2021']

Unnamed: 0,Rk,G,Date,Age,Tm,Unnamed: 5,Opp,Unnamed: 7,GS,MP,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,GmSc,+/-
0,1,1.0,2020-12-23,20-170,NOP,@,TOR,W (+14),1.0,29:34,...,6.0,10.0,3.0,1.0,1.0,6.0,3.0,15.0,11.5,15.0
1,2,2.0,2020-12-25,20-172,NOP,@,MIA,L (-13),1.0,38:18,...,10.0,14.0,1.0,1.0,0.0,3.0,1.0,32.0,24.5,32.0
2,3,3.0,2020-12-27,20-174,NOP,0,SAS,W (+3),1.0,38:03,...,6.0,11.0,0.0,5.0,1.0,1.0,3.0,18.0,13.0,18.0
3,4,4.0,2020-12-29,20-176,NOP,@,PHO,L (-25),1.0,28:49,...,1.0,2.0,0.0,1.0,0.0,1.0,1.0,20.0,13.5,20.0
4,5,5.0,2020-12-31,20-178,NOP,@,OKC,W (+33),1.0,17:25,...,1.0,5.0,1.0,0.0,0.0,3.0,4.0,12.0,7.8,12.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64,62,57.0,2021-04-28,20-296,NOP,@,DEN,L (-2),1.0,30:55,...,1.0,2.0,0.0,2.0,0.0,3.0,3.0,21.0,10.3,21.0
65,63,58.0,2021-04-29,20-297,NOP,@,OKC,W (+14),1.0,34:13,...,2.0,8.0,6.0,1.0,0.0,0.0,0.0,27.0,24.6,27.0
66,64,59.0,2021-05-01,20-299,NOP,@,MIN,W (+4),1.0,41:54,...,6.0,9.0,8.0,1.0,1.0,7.0,1.0,37.0,32.1,37.0
67,65,60.0,2021-05-03,20-301,NOP,0,GSW,L (-15),1.0,36:52,...,4.0,8.0,1.0,0.0,2.0,4.0,0.0,32.0,20.9,32.0


In [260]:
#############################################################
## FOOTBALL STATS
football_data = SportsDataCollector('football')
top_200 = [*set(get_top_200_football())]

In [241]:
top_200

['mike gesicki',
 'gabriel davis',
 'parris campbell',
 'david montgomery',
 'tyler boyd',
 'gerald everett',
 'patrick mahomes',
 'tony pollard',
 'albert okwuegbunam',
 'tyreek hill',
 'kenneth gainwell',
 'jamarr chase',
 'courtland sutton',
 'jaylen waddle',
 'darren waller',
 'dalvin cook',
 'justin herbert',
 'curtis samuel',
 'joe mixon',
 'james proche',
 'kyler murray',
 'evan engram',
 'javonte williams',
 'trey lance',
 'ceedee lamb',
 'marquez valdesscantling',
 'kirk cousins',
 'diontae johnson',
 'boston scott',
 'devante parker',
 'darrell henderson',
 'dawson knox',
 'cooper kupp',
 'kyle rudolph',
 'dandre swift',
 'aaron rodgers',
 'randall cobb',
 'adam thielen',
 'jk dobbins',
 'khalil herbert',
 'dernest johnson',
 'clyde edwardshelaire',
 'joe burrow',
 'dalton schultz',
 'tj hockenson',
 'trey sermon',
 'robert woods',
 'dallas goedert',
 'justin fields',
 'kyle pitts',
 'george kittle',
 'amari cooper',
 'david njoku',
 'jalen hurts',
 'isaiah mckenzie',
 'miles

In [261]:
stats = football_data.get_stats(top_200)
stats

https://www.fantasypros.com/nfl/projections/mike-gesicki.php
<h2>TE - MIA                                </h2>
https://www.fantasypros.com/nfl/projections/gabriel-davis.php
<h2>WR - BUF                                </h2>
https://www.fantasypros.com/nfl/projections/parris-campbell.php
<h2>WR - IND                                </h2>
https://www.fantasypros.com/nfl/projections/david-montgomery.php
<h2>RB - CHI                                </h2>
https://www.fantasypros.com/nfl/projections/tyler-boyd.php
<h2>WR - CIN                                </h2>
https://www.fantasypros.com/nfl/projections/gerald-everett.php
<h2>TE - LAC                                </h2>
https://www.fantasypros.com/nfl/projections/patrick-mahomes.php
<h2>QB - KC                                </h2>
https://www.fantasypros.com/nfl/projections/tony-pollard.php
<h2>RB - DAL                                </h2>
https://www.fantasypros.com/nfl/projections/albert-okwuegbunam.php
<h2>TE - DEN                       

KeyboardInterrupt: 

In [248]:
########################################################################
## Save All Game Stats for Football
fp_stats = {key:val[0] for key, val in stats.items()}
for player in fp_stats.keys():
    unique_id = player.replace(' ', '_')
    fp_stats[player].to_pickle(f'PlayerPickles/FootballStats/{unique_id}_game_stats.pkl')

In [251]:
#####################################################################
## Load All Game Stats
import os, pickle
stats = dict()
for filename in os.listdir('PlayerPickles/FootballStats/'):
    player = filename.replace('_game_stats.pkl', '')
    with open('PlayerPickles/FootballStats/' + filename, 'rb') as stats_pickle:
        stats[player] = pickle.load(stats_pickle)

In [252]:
stats

{'darnell_mooney':     Projected Pts  Actual Pts   +/- Exceeded Expectations?
 0             8.9        12.6   3.7                    Yes
 1             8.3        12.9   4.6                    Yes
 2             8.7         5.9  -2.8                     No
 3             9.1         6.6  -2.5                     No
 4             8.9         1.9  -7.0                     No
 5             9.9         2.7  -7.2                     No
 6            10.1        12.5   2.4                    Yes
 7             8.9        18.1   9.2                    Yes
 8             6.4        17.6  11.2                    Yes
 9             6.2         6.4   0.2                    Yes
 10            6.7         3.9  -2.8                     No
 11            6.6        10.5   3.9                    Yes
 12            6.9         3.5  -3.4                     No
 13            6.5        13.5   7.0                    Yes
 14            6.6         0.9  -5.7                     No
 15            6.5    