In [3]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd
import numpy as np

In [123]:
class Data:

    class player:
        
        def __init__(self, player_name):
            self.player_name = player_name
            self._URL = "https://www.basketball-reference.com/players/"
            self.table_types = ['all_per_game', 'all_totals', 'all_per_minute', 'all_per_poss', 
                                'all_advanced', 'all_shooting', 'all_pbp', 'all_year-and-career-highs', 
                                'all_playoffs_per_game', 'all_playoffs_totals', 'all_playoffs_per_minute', 
                                'all_playoffs_per_poss', 'all_playoffs_advanced', 'all_playoffs_shooting', 
                                'all_playoffs_pbp', 'all_year-and-career-highs-po', 'all_all_star', 'all_sim_thru', 
                                'all_sim_career', 'all_all_college_stats', 'all_all_salaries']
            
        
            
        def fetch(self, table_types=["all_per_game"]):
            #flip the player's name (e.g. "Michael Jordan" -> ["Jordan", "Michael"])
            inv_player_name = self.player_name.lower().split(" ")[::-1]
            
            #create an url extension from the player name ("j/jordami01.html")
            player_tag = inv_player_name[0][0]+"/"+inv_player_name[0][0:5]+inv_player_name[1][0:2]+"01.html"

            #create the full url ("https://www.basketball-reference.com/players/j/jordami01.html")
            _FULL_URL = self._URL + player_tag

            #make a request to the page and report status of response
            response = requests.get(_FULL_URL)
            print("<status: "+str(response.status_code)+">")

            #parse the raw html
            soup = bs(response.content, "html.parser")

            
            
            
            #find all the tables
            tables = soup.findAll("div", {"class": "table_wrapper"})
            
            #map an index to each possible **kwarg
            table_map = {}
            for num, key in enumerate([t.get("id") for t in tables]):
                table_map[key] = num       
                                       
                                       
            #initiate list to hold all user requested data tables
            all_dfs = {}
            
    
            for t in table_types:
                
                #find the schema for the data table
                schema = list(set([d.get("data-stat") for d in tables[table_map[t]].findAll("td")]))

                #initialize a dictionary of empty lists mapped to the schema
                stats = {}
                for s in schema:
                    stats[s] = []

                #add the data from each tag to the appropriate list
                for d in tables[table_map[t]].findAll("td"):
                    if str(d.string)[0].isdigit() or str(d.string)[0] == ".":
                        value = float(d.string) 
                    else:
                        value = d.string

                    stats[d.get("data-stat")].append(value)

                    
                #create a dataframe from data, dropping rows where data has been aggregated (i.e. only return raw data)
                if "age" in stats.keys():
                    stats_df = pd.DataFrame.from_dict(stats).dropna(subset=["age"])
                    
                elif "pos" in stats.keys():
                    stats_df = pd.DataFrame.from_dict(stats).dropna(subset=["pos"])
                    
                else:
                    stats_df = pd.DataFrame.from_dict(stats)
                    
                all_dfs[t] = stats_df
            
            #return dataframe
            return all_dfs

In [113]:
player = Data.player("patrick ewing").fetch()
player["all_per_game"]

<status: 200>
['all_per_game', 'all_totals', 'all_per_minute', 'all_per_poss', 'all_advanced', 'all_shooting', 'all_pbp', 'all_year-and-career-highs', 'all_playoffs_per_game', 'all_playoffs_totals', 'all_playoffs_per_minute', 'all_playoffs_per_poss', 'all_playoffs_advanced', 'all_playoffs_shooting', 'all_playoffs_pbp', 'all_year-and-career-highs-po', 'all_all_star', 'all_sim_thru', 'all_sim_career', 'all_all_college_stats', 'all_all_salaries']
{'all_per_game': 0, 'all_totals': 1, 'all_per_minute': 2, 'all_per_poss': 3, 'all_advanced': 4, 'all_shooting': 5, 'all_pbp': 6, 'all_year-and-career-highs': 7, 'all_playoffs_per_game': 8, 'all_playoffs_totals': 9, 'all_playoffs_per_minute': 10, 'all_playoffs_per_poss': 11, 'all_playoffs_advanced': 12, 'all_playoffs_shooting': 13, 'all_playoffs_pbp': 14, 'all_year-and-career-highs-po': 15, 'all_all_star': 16, 'all_sim_thru': 17, 'all_sim_career': 18, 'all_all_college_stats': 19, 'all_all_salaries': 20}


Unnamed: 0,stl_per_g,fg_per_g,ft_pct,team_id,orb_per_g,age,gs,g,fg3a_per_g,mp_per_g,...,pf_per_g,tov_per_g,fg_pct,drb_per_g,trb_per_g,pos,efg_pct,fg2a_per_g,ast_per_g,lg_id
0,1.1,7.7,0.739,NYK,2.5,23.0,50.0,50.0,0.1,35.4,...,3.8,3.4,0.474,6.5,9.0,C,0.474,16.2,2.0,NBA
1,1.4,8.4,0.713,NYK,2.5,24.0,63.0,63.0,0.1,35.0,...,3.9,3.6,0.503,6.3,8.8,PF,0.503,16.6,1.7,NBA
2,1.3,8.0,0.716,NYK,3.0,25.0,82.0,82.0,0.0,31.0,...,4.0,3.5,0.555,5.3,8.2,C,0.555,14.4,1.5,NBA
3,1.5,9.1,0.746,NYK,2.7,26.0,80.0,80.0,0.1,36.2,...,3.9,3.3,0.567,6.6,9.3,C,0.567,16.0,2.4,NBA
4,1.0,11.2,0.775,NYK,2.9,27.0,82.0,82.0,0.0,38.6,...,4.0,3.4,0.551,8.0,10.9,C,0.551,20.4,2.2,NBA
5,1.0,10.4,0.745,NYK,2.4,28.0,81.0,81.0,0.1,38.3,...,3.5,3.6,0.514,8.8,11.2,C,0.514,20.2,3.0,NBA
6,1.1,9.7,0.738,NYK,2.8,29.0,82.0,82.0,0.1,38.4,...,3.4,2.5,0.522,8.5,11.2,C,0.522,18.5,1.9,NBA
7,0.9,9.6,0.719,NYK,2.4,30.0,81.0,81.0,0.1,37.1,...,3.5,3.3,0.503,9.7,12.1,C,0.503,19.0,1.9,NBA
8,1.1,9.4,0.765,NYK,2.8,31.0,79.0,79.0,0.2,37.6,...,3.5,3.3,0.496,8.4,11.2,C,0.497,18.8,2.3,NBA
9,0.9,9.2,0.75,NYK,2.0,32.0,79.0,79.0,0.3,37.0,...,3.4,3.2,0.503,9.0,11.0,C,0.505,18.1,2.7,NBA


In [124]:
table_types = Data.player("").table_types
Stay_Me7o = Data.player("Carmelo Anthony").fetch(table_types=table_types)

<status: 200>


In [130]:
Stay_Me7o["all_per_game"]

Unnamed: 0,stl_per_g,fg_per_g,ft_pct,team_id,orb_per_g,age,gs,g,fg3a_per_g,mp_per_g,...,pf_per_g,tov_per_g,fg_pct,drb_per_g,trb_per_g,pos,efg_pct,fg2a_per_g,ast_per_g,lg_id
0,1.2,7.6,0.777,DEN,2.2,19.0,82.0,82.0,2.6,36.5,...,2.7,3.0,0.426,3.8,6.1,SF,0.449,15.3,2.8,NBA
1,0.9,7.1,0.796,DEN,1.9,20.0,75.0,75.0,2.1,34.8,...,3.1,3.0,0.431,3.8,5.7,SF,0.448,14.3,2.6,NBA
2,1.1,9.5,0.808,DEN,1.5,21.0,80.0,80.0,1.9,36.8,...,2.9,2.7,0.481,3.4,4.9,SF,0.493,17.8,2.7,NBA
3,1.2,10.6,0.808,DEN,2.2,22.0,65.0,65.0,2.3,38.2,...,3.1,3.6,0.476,3.8,6.0,SF,0.489,20.1,3.8,NBA
4,1.3,9.5,0.786,DEN,2.3,23.0,77.0,77.0,2.1,36.4,...,3.3,3.3,0.492,5.1,7.4,SF,0.511,17.1,3.4,NBA
5,1.1,8.1,0.793,DEN,1.6,24.0,66.0,66.0,2.6,34.5,...,3.0,3.0,0.443,5.2,6.8,SF,0.469,15.7,3.4,NBA
6,1.3,10.0,0.83,DEN,2.2,25.0,69.0,69.0,2.7,38.2,...,3.3,3.0,0.458,4.4,6.6,SF,0.478,19.1,3.2,NBA
7,0.9,8.9,0.838,TOT,1.5,26.0,77.0,77.0,3.3,35.7,...,2.9,2.7,0.455,5.8,7.3,SF,0.487,16.3,2.9,NBA
8,0.9,8.7,0.823,DEN,1.5,26.0,50.0,50.0,2.5,35.5,...,2.7,2.8,0.452,6.1,7.6,SF,0.474,16.8,2.8,NBA
9,0.9,9.1,0.872,NYK,1.5,26.0,27.0,27.0,4.6,36.2,...,3.3,2.4,0.461,5.2,6.7,SF,0.51,15.2,3.0,NBA


true
