
# $\color{blue}{\text{Tasks:}}$

$\color{green}{\text{1. Extract Players Data from Website (1991-2021)}}$ 

$\color{green}{\text{2. Extract Teams Data from Website (1991-2021)}}$

$\color{green}{\text{3. Extract Most Valuabe Players Data from Website (1991-2021}}$

# Libraries

In [5]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os

# Links

https://www.basketball-reference.com/leagues/NBA_1991_per_game.html

https://www.basketball-reference.com/leagues/NBA_1991_standings.html

https://www.basketball-reference.com/awards/awards_1991.html

In [6]:
url_players = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html" #Links of the website
url_teams = "https://www.basketball-reference.com/leagues/NBA_{}_standings.html"
url_mvp = "https://www.basketball-reference.com/awards/awards_{}.html"

In [93]:
years = list(range(1991,2022)) #Data to be extracted from 1991 to 2021

In [130]:
os.makedirs("players") #Folders created in local drive
os.makedirs("teams")
os.makedirs("mvp")

In [131]:
def save_url(url, path): #function to save each html page in local drive
    for year in years:
        link = url.format(year)
        data = requests.get(link)
        j = data.text
        with open(f"{path}/{year}.html", "w", encoding = "utf-8") as f:
            f.write(j)

In [132]:
save_url(url_players, r'players') #Applied fucntion to three of the above mentioned links
save_url(url_teams, r'teams')
save_url(url_mvp, r'mvp')

In [170]:
players_dfs = []                                     #Extracting players data from html pages

for year in years:
    with open("players/{}.html".format(year), "r", encoding = "utf-8") as f:
        page = f.read()
        soup = BeautifulSoup(page, "html.parser")
        d = soup.find_all("tr", class_ ='thead')
        for i in d:
            i.decompose()
        players_table = soup.find(id = 'div_per_game_stats')
        df = pd.read_html(str(players_table))[0]
        df["Year"] = year
        players_dfs.append(df)

In [164]:
players_data = pd.concat(players_dfs)

In [165]:
players_data.head()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,1,Alaa Abdelnaby,PF,22,POR,43,0,6.7,1.3,2.7,...,0.6,1.4,2.1,0.3,0.1,0.3,0.5,0.9,3.1,1991
1,2,Mahmoud Abdul-Rauf,PG,21,DEN,67,19,22.5,6.2,15.1,...,0.5,1.3,1.8,3.1,0.8,0.1,1.6,2.2,14.1,1991
2,3,Mark Acres,C,28,ORL,68,0,19.3,1.6,3.1,...,2.1,3.2,5.3,0.4,0.4,0.4,0.6,3.2,4.2,1991
3,4,Michael Adams,PG,28,DEN,66,66,35.5,8.5,21.5,...,0.9,3.0,3.9,10.5,2.2,0.1,3.6,2.5,26.5,1991
4,5,Mark Aguirre,SF,31,DET,78,13,25.7,5.4,11.7,...,1.7,3.1,4.8,1.8,0.6,0.3,1.6,2.7,14.2,1991


In [166]:
players_data.tail()

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
700,536,Delon Wright,PG,28,SAC,27,8,25.8,3.9,8.3,...,1.0,2.9,3.9,3.6,1.6,0.4,1.3,1.1,10.0,2021
701,537,Thaddeus Young,PF,32,CHI,68,23,24.3,5.4,9.7,...,2.5,3.8,6.2,4.3,1.1,0.6,2.0,2.2,12.1,2021
702,538,Trae Young,PG,22,ATL,63,63,33.7,7.7,17.7,...,0.6,3.3,3.9,9.4,0.8,0.2,4.1,1.8,25.3,2021
703,539,Cody Zeller,C,28,CHO,48,21,20.9,3.8,6.8,...,2.5,4.4,6.8,1.8,0.6,0.4,1.1,2.5,9.4,2021
704,540,Ivica Zubac,C,23,LAC,72,33,22.3,3.6,5.5,...,2.6,4.6,7.2,1.3,0.3,0.9,1.1,2.6,9.0,2021


In [169]:
players_data.to_excel("players.xlsx", index=False) #export extracted data into excel file

In [181]:
teams_dfs = []                                     #Extracting teams data from html pages

for year in years:
    with open("teams/{}.html".format(year), "r", encoding = "utf-8") as f:
        page = f.read()
        soup = BeautifulSoup(page, "html.parser")
        d = soup.find_all("tr", class_ ='thead')
        for i in d:
            i.decompose()
        e_table = soup.find(id = 'divs_standings_E')
        east_df = pd.read_html(str(e_table))[0]
        east_df["team"] = east_df["Eastern Conference"]
        east_df["Year"] = year
        w_table = soup.find(id="divs_standings_W")
        west_df = pd.read_html(str(w_table))[0]
        west_df["team"] = west_df["Western Conference"]
        west_df["Year"] = year
        ew_df = pd.concat([east_df, west_df])
        del ew_df["Eastern Conference"]
        del ew_df["Western Conference"]
        teams_dfs.append(ew_df)

In [182]:
teams_data = pd.concat(teams_dfs)

In [186]:
teams_data.head(30)

Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,team,Year
0,56,26,0.683,—,111.5,105.7,5.22,Boston Celtics*,1991
1,44,38,0.537,12.0,105.4,105.6,-0.39,Philadelphia 76ers*,1991
2,39,43,0.476,17.0,103.1,103.3,-0.43,New York Knicks*,1991
3,30,52,0.366,26.0,101.4,106.4,-4.84,Washington Bullets,1991
4,26,56,0.317,30.0,102.9,107.5,-4.53,New Jersey Nets,1991
5,24,58,0.293,32.0,101.8,107.8,-5.91,Miami Heat,1991
6,61,21,0.744,—,110.0,101.0,8.57,Chicago Bulls*,1991
7,50,32,0.61,11.0,100.1,96.8,3.08,Detroit Pistons*,1991
8,48,34,0.585,13.0,106.4,104.0,2.33,Milwaukee Bucks*,1991
9,43,39,0.524,18.0,109.8,109.0,0.72,Atlanta Hawks*,1991


In [187]:
teams_data.tail()

Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,team,Year
10,42,30,0.583,—,112.4,110.2,2.26,Dallas Mavericks*,2021
11,38,34,0.528,4.0,113.3,112.3,1.07,Memphis Grizzlies*,2021
12,33,39,0.458,9.0,111.1,112.8,-1.58,San Antonio Spurs,2021
13,31,41,0.431,11.0,114.6,114.9,-0.2,New Orleans Pelicans,2021
14,17,55,0.236,25.0,108.8,116.7,-7.5,Houston Rockets,2021


In [188]:
teams_data.to_excel("teams.xlsx", index=False) #export extracted data into excel file

In [189]:
mvp_dfs = []                                     #Extracting mvp data from html pages

for year in years:
    with open("mvp/{}.html".format(year), "r", encoding = "utf-8") as f:
        page = f.read()
        soup = BeautifulSoup(page, "html.parser")
        soup.find("tr", class_ ='over_header').decompose()
        mvp_table = soup.find(id = 'mvp')
        df = pd.read_html(str(mvp_table))[0]
        df["Year"] = year
        mvp_dfs.append(df)

In [190]:
mvp_data = pd.concat(mvp_dfs)

In [191]:
mvp_data.head()

Unnamed: 0,Rank,Player,Age,Tm,First,Pts Won,Pts Max,Share,G,MP,...,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
0,1,Michael Jordan,27,CHI,77.0,891.0,960,0.928,82,37.0,...,6.0,5.5,2.7,1.0,0.539,0.312,0.851,20.3,0.321,1991
1,2,Magic Johnson,31,LAL,10.0,497.0,960,0.518,79,37.1,...,7.0,12.5,1.3,0.2,0.477,0.32,0.906,15.4,0.251,1991
2,3,David Robinson,25,SAS,6.0,476.0,960,0.496,82,37.7,...,13.0,2.5,1.5,3.9,0.552,0.143,0.762,17.0,0.264,1991
3,4,Charles Barkley,27,PHI,2.0,222.0,960,0.231,67,37.3,...,10.1,4.2,1.6,0.5,0.57,0.284,0.722,13.4,0.258,1991
4,5,Karl Malone,27,UTA,0.0,142.0,960,0.148,82,40.3,...,11.8,3.3,1.1,1.0,0.527,0.286,0.77,15.5,0.225,1991


In [192]:
mvp_data.tail()

Unnamed: 0,Rank,Player,Age,Tm,First,Pts Won,Pts Max,Share,G,MP,...,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
10,11,Russell Westbrook,32,WAS,0.0,5.0,1010,0.005,65,36.4,...,11.5,11.7,1.4,0.4,0.439,0.315,0.656,3.7,0.075,2021
11,12,Ben Simmons,24,PHI,0.0,3.0,1010,0.003,58,32.4,...,7.2,6.9,1.6,0.6,0.557,0.3,0.613,6.0,0.153,2021
12,13T,James Harden,31,TOT,0.0,1.0,1010,0.001,44,36.6,...,7.9,10.8,1.2,0.8,0.466,0.362,0.861,7.0,0.208,2021
13,13T,LeBron James,36,LAL,0.0,1.0,1010,0.001,45,33.4,...,7.7,7.8,1.1,0.6,0.513,0.365,0.698,5.6,0.179,2021
14,13T,Kawhi Leonard,29,LAC,0.0,1.0,1010,0.001,52,34.1,...,6.5,5.2,1.6,0.4,0.512,0.398,0.885,8.8,0.238,2021


In [193]:
mvp_data.to_excel("mvp.xlsx", index=False) #export extracted data into excel file

# $\color{darkblue}{\text{Conclusion:}}$

$\color{green}{\text{Through this code we have extracted data of players, teams, and mvp and save it to the local folder for further analysis...}}$