In [235]:
years = list(range(1994, 2025))

### **Fetch MVP data**

In [236]:
mvps_base_url = "https://www.basketball-reference.com/awards/awards_{}.html"

In [237]:
# Get MVP leaderboard from every year in years.

In [238]:
import time
import requests

In [260]:
for year in years:
    url = mvps_base_url.format(year)
    data = requests.get(url) # Response

    # Record html. (So you don't send requests every single time)
    with open("mvps/{}.html".format(year), "w+") as f:
        f.write(data.text)

    time.sleep(5)
        

In [None]:
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO

In [280]:
dataframes = []

# Fetch mvp data from last 30 years.
for year in years:
    with open("mvps/{}.html".format(year), encoding='latin1') as f:
        page = f.read() # Store as string

    soup = BeautifulSoup(page, "html.parser")
    soup.find('tr', class_="over_header").decompose()
    mvp_table = soup.find(id="mvp")
    mvp_table_parsed = pd.read_html(StringIO(str(mvp_table)))[0]
    mvp_table_parsed["Year"] = year
    
    dataframes.append(mvp_table_parsed)

In [281]:
mvps = pd.concat(dataframes)

In [282]:
mvps.to_csv("csv/mvps.csv")

### **Fetch Player data**

In [3]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

# Path to the Brave browser executable on macOS
brave_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"

# Set up ChromeOptions to use Brave
options = Options()
options.binary_location = brave_path

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)


In [8]:
import time

url = "https://www.basketball-reference.com/leagues/NBA_{}_per_game.html"

for year in years:
    driver.get(url.format(year))
    driver.execute_script("window.scrollTo(1,10000)")
    time.sleep(5)
    html = driver.page_source

    with open("players/{}.html".format(year), "w+") as f:
        f.write(html)

In [19]:
dataframes = []

for year in years:
    with open("players/{}.html".format(year)) as f:
        page = f.read()

    soup = BeautifulSoup(page, "html.parser")
    soup.find('tr', class_="thead").decompose()
    players_table = soup.find(id="per_game_stats")
    players_table_parsed = pd.read_html(StringIO(str(players_table)))[0]
    players_table_parsed["Year"] = year

    dataframes.append(players_table_parsed)

In [20]:
players_stats = pd.concat(dataframes)

In [29]:
players_stats.to_csv("csv/player_stats.csv")

### **Fetch Team Data**

In [8]:
team_url = "https://www.basketball-reference.com/leagues/NBA_{}_standings.html"

In [9]:
import time

In [10]:
for year in years:
    url = team_url.format(year)
    data = requests.get(url)
    time.sleep(5)

    with open("teams/{}.html".format(year), "w+" ) as f:
        f.write(data.text)

In [43]:
df = []

for year in years:
    with open("teams/{}.html".format(year)) as f:
        page = f.read()
        
    soup = BeautifulSoup(page, "html.parser")
    soup.find('tr', class_='thead').decompose()
    team_table = soup.find(id="divs_standings_E")
    team = pd.read_html(StringIO(str(team_table)))[0]
    team["Year"] = year
    team["Team"] = team["Eastern Conference"]
    del team["Eastern Conference"]
    
    df.append(team)
    
    soup = BeautifulSoup(page, "html.parser")
    soup.find('tr', class_="thead").decompose()
    team_table = soup.find(id="divs_standings_W")
    team = pd.read_html(StringIO(str(team_table)))[0]
    team["Year"] = year
    team["Team"] = team["Western Conference"]
    del team["Western Conference"]
    
    df.append(team)

In [127]:
teams = pd.concat(df)

In [128]:
teams.to_csv("csv/teams.csv")

### **Data Cleaning**

In [267]:
mvps = pd.read_csv("csv/mvps.csv", encoding="latin1")

In [268]:
mvps.tail(30)

Unnamed: 0.1,Unnamed: 0,Rank,Player,Age,Tm,First,Pts Won,Pts Max,Share,G,...,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
427,4,5,Luka DonÃÂÃÂiÃÂÃÂ,22,DAL,0,146,1000,0.146,65,...,9.1,8.7,1.2,0.6,0.457,0.353,0.744,7.6,0.159,2022
428,5,6,Jayson Tatum,23,BOS,0,43,1000,0.043,76,...,8.0,4.4,1.0,0.6,0.453,0.353,0.853,9.6,0.169,2022
429,6,7,Ja Morant,22,MEM,0,10,1000,0.01,57,...,5.7,6.7,1.2,0.4,0.493,0.344,0.761,6.7,0.171,2022
430,7,8,Stephen Curry,33,GSW,0,4,1000,0.004,64,...,5.2,6.3,1.3,0.4,0.437,0.38,0.923,8.0,0.173,2022
431,8,9,Chris Paul,36,PHO,0,2,1000,0.002,65,...,4.4,10.8,1.9,0.3,0.493,0.317,0.837,9.4,0.21,2022
432,9,10T,DeMar DeRozan,32,CHI,0,1,1000,0.001,76,...,5.2,4.9,0.9,0.3,0.504,0.352,0.877,8.8,0.154,2022
433,10,10T,Kevin Durant,33,BRK,0,1,1000,0.001,55,...,7.4,6.4,0.9,0.9,0.518,0.383,0.91,8.4,0.198,2022
434,11,10T,LeBron James,37,LAL,0,1,1000,0.001,56,...,8.2,6.2,1.3,1.1,0.524,0.359,0.756,7.5,0.172,2022
435,0,1,Joel Embiid,28,PHI,73,915,1000,0.915,66,...,10.2,4.2,1.0,1.7,0.548,0.33,0.857,12.3,0.259,2023
436,1,2,Nikola JokiÃÂÃÂ,27,DEN,15,674,1000,0.674,69,...,11.8,9.8,1.3,0.7,0.632,0.383,0.822,14.9,0.308,2023


In [278]:
mvps.replace({
    'ÃÂÃÂ' : 'a',
    'ÃÂÃÂiÃÂÃÂ' : 'c',
    'ÃÂ': 'č',  # You might need to add more mappings here
    'Ã': 'ć',
    'Ã': 'á',
    'Ã¡': 'á',
}, inplace=True)

In [279]:
mvps.tail(30)

Unnamed: 0.1,Unnamed: 0,Rank,Player,Age,Tm,First,Pts Won,Pts Max,Share,G,...,TRB,AST,STL,BLK,FG%,3P%,FT%,WS,WS/48,Year
427,4,5,Luka DonÃÂÃÂiÃÂÃÂ,22,DAL,0,146,1000,0.146,65,...,9.1,8.7,1.2,0.6,0.457,0.353,0.744,7.6,0.159,2022
428,5,6,Jayson Tatum,23,BOS,0,43,1000,0.043,76,...,8.0,4.4,1.0,0.6,0.453,0.353,0.853,9.6,0.169,2022
429,6,7,Ja Morant,22,MEM,0,10,1000,0.01,57,...,5.7,6.7,1.2,0.4,0.493,0.344,0.761,6.7,0.171,2022
430,7,8,Stephen Curry,33,GSW,0,4,1000,0.004,64,...,5.2,6.3,1.3,0.4,0.437,0.38,0.923,8.0,0.173,2022
431,8,9,Chris Paul,36,PHO,0,2,1000,0.002,65,...,4.4,10.8,1.9,0.3,0.493,0.317,0.837,9.4,0.21,2022
432,9,10T,DeMar DeRozan,32,CHI,0,1,1000,0.001,76,...,5.2,4.9,0.9,0.3,0.504,0.352,0.877,8.8,0.154,2022
433,10,10T,Kevin Durant,33,BRK,0,1,1000,0.001,55,...,7.4,6.4,0.9,0.9,0.518,0.383,0.91,8.4,0.198,2022
434,11,10T,LeBron James,37,LAL,0,1,1000,0.001,56,...,8.2,6.2,1.3,1.1,0.524,0.359,0.756,7.5,0.172,2022
435,0,1,Joel Embiid,28,PHI,73,915,1000,0.915,66,...,10.2,4.2,1.0,1.7,0.548,0.33,0.857,12.3,0.259,2023
436,1,2,Nikola JokiÃÂÃÂ,27,DEN,15,674,1000,0.674,69,...,11.8,9.8,1.3,0.7,0.632,0.383,0.822,14.9,0.308,2023


In [50]:
mvps = mvps[["Player", "Year", "Pts Won", "Pts Max", "Share"]]

In [51]:
mvps

Unnamed: 0,Player,Year,Pts Won,Pts Max,Share
0,Hakeem Olajuwon,1994,889,1010,0.880
1,David Robinson,1994,730,1010,0.723
2,Scottie Pippen,1994,390,1010,0.386
3,Shaquille O'Neal,1994,289,1010,0.286
4,Patrick Ewing,1994,255,1010,0.252
...,...,...,...,...,...
452,Jalen Brunson,2024,142,990,0.143
453,Jayson Tatum,2024,86,990,0.087
454,Anthony Edwards,2024,18,990,0.018
455,Domantas Sabonis,2024,3,990,0.003


In [52]:
players = pd.read_csv("csv/player_stats.csv")

In [53]:
players

Unnamed: 0.1,Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,0,1,Alaa Abdelnaby,PF,25,BOS,13,0,12.2,1.8,...,0.9,2.6,3.5,0.2,0.2,0.2,1.3,1.5,4.9,1994
1,1,2,Mahmoud Abdul-Rauf,PG,24,DEN,80,78,32.7,7.4,...,0.3,1.8,2.1,4.5,1.0,0.1,1.9,1.9,18.0,1994
2,2,3,Michael Adams,PG,31,WSB,70,67,33.4,4.1,...,0.5,2.1,2.6,6.9,1.4,0.1,2.4,2.0,12.1,1994
3,3,4,Mark Aguirre,SF,34,LAC,39,0,22.0,4.2,...,0.7,2.3,3.0,2.7,0.5,0.2,1.8,2.5,10.6,1994
4,4,5,Danny Ainge,SG,34,PHO,68,1,22.9,3.3,...,0.4,1.5,1.9,2.6,0.8,0.1,1.2,2.1,8.9,1994
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18944,757,568,Thaddeus Young,PF,35,PHO,10,0,8.9,1.1,...,1.7,1.1,2.8,0.7,0.5,0.2,0.4,1.1,2.3,2024
18945,758,569,Trae Young,PG,25,ATL,54,54,36.0,8.0,...,0.4,2.3,2.8,10.8,1.3,0.2,4.4,2.0,25.7,2024
18946,759,570,Omer Yurtseven,C,25,UTA,48,12,11.4,2.1,...,1.5,2.8,4.3,0.6,0.2,0.4,0.8,1.1,4.6,2024
18947,760,571,Cody Zeller,C,31,NOP,43,0,7.4,0.6,...,1.1,1.5,2.6,0.9,0.2,0.1,0.4,1.0,1.8,2024


In [54]:
del players["Unnamed: 0"]
del players["Rk"]

In [56]:
players.head()

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,Alaa Abdelnaby,PF,25,BOS,13,0,12.2,1.8,4.2,0.436,...,0.9,2.6,3.5,0.2,0.2,0.2,1.3,1.5,4.9,1994
1,Mahmoud Abdul-Rauf,PG,24,DEN,80,78,32.7,7.4,16.0,0.46,...,0.3,1.8,2.1,4.5,1.0,0.1,1.9,1.9,18.0,1994
2,Michael Adams,PG,31,WSB,70,67,33.4,4.1,10.0,0.408,...,0.5,2.1,2.6,6.9,1.4,0.1,2.4,2.0,12.1,1994
3,Mark Aguirre,SF,34,LAC,39,0,22.0,4.2,8.9,0.468,...,0.7,2.3,3.0,2.7,0.5,0.2,1.8,2.5,10.6,1994
4,Danny Ainge,SG,34,PHO,68,1,22.9,3.3,7.9,0.417,...,0.4,1.5,1.9,2.6,0.8,0.1,1.2,2.1,8.9,1994


In [59]:
players["Player"] = players["Player"].str.replace("*", "", regex=False) # Some players have a * at the end of their name

In [69]:
players.head(10)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
0,Alaa Abdelnaby,PF,25,BOS,13,0,12.2,1.8,4.2,0.436,...,0.9,2.6,3.5,0.2,0.2,0.2,1.3,1.5,4.9,1994
1,Mahmoud Abdul-Rauf,PG,24,DEN,80,78,32.7,7.4,16.0,0.46,...,0.3,1.8,2.1,4.5,1.0,0.1,1.9,1.9,18.0,1994
2,Michael Adams,PG,31,WSB,70,67,33.4,4.1,10.0,0.408,...,0.5,2.1,2.6,6.9,1.4,0.1,2.4,2.0,12.1,1994
3,Mark Aguirre,SF,34,LAC,39,0,22.0,4.2,8.9,0.468,...,0.7,2.3,3.0,2.7,0.5,0.2,1.8,2.5,10.6,1994
4,Danny Ainge,SG,34,PHO,68,1,22.9,3.3,7.9,0.417,...,0.4,1.5,1.9,2.6,0.8,0.1,1.2,2.1,8.9,1994
5,Gary Alexander,PF,24,TOT,11,0,5.0,0.7,1.3,0.571,...,0.6,0.7,1.4,0.2,0.3,0.0,0.7,0.9,1.7,1994
6,Gary Alexander,PF,24,MIA,4,0,3.0,0.3,0.5,0.5,...,0.3,0.5,0.8,0.3,0.0,0.0,0.3,0.8,0.5,1994
7,Gary Alexander,PF,24,CLE,7,0,6.1,1.0,1.7,0.583,...,0.9,0.9,1.7,0.1,0.4,0.0,1.0,1.0,2.4,1994
8,Victor Alexander,C,24,GSW,69,39,19.1,3.9,7.3,0.53,...,1.7,2.8,4.5,1.0,0.4,0.5,1.2,2.4,8.7,1994
9,Eric Anderson,PF,23,NYK,11,0,3.5,0.6,1.5,0.412,...,0.5,1.0,1.5,0.2,0.0,0.1,0.2,0.8,1.9,1994


In [68]:
players.groupby(["Player", "Year"]).get_group(("Gary Alexander", 1994)) 

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
5,Gary Alexander,PF,24,TOT,11,0,5.0,0.7,1.3,0.571,...,0.6,0.7,1.4,0.2,0.3,0.0,0.7,0.9,1.7,1994
6,Gary Alexander,PF,24,MIA,4,0,3.0,0.3,0.5,0.5,...,0.3,0.5,0.8,0.3,0.0,0.0,0.3,0.8,0.5,1994
7,Gary Alexander,PF,24,CLE,7,0,6.1,1.0,1.7,0.583,...,0.9,0.9,1.7,0.1,0.4,0.0,1.0,1.0,2.4,1994


In [88]:
def single_row(df):
    if df.shape[0] == 1:
        return df
    else:
        row = df[df["Tm"] == "TOT"]
        row["Tm"] = df.iloc[-1,:]["Tm"]
        return row

players = players.groupby(["Player", "Year"]).apply(single_row)

  players = players.groupby(["Player", "Year"]).apply(single_row)


In [89]:
players.head(20)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
Player,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
A.C. Green,1994,177,A.C. Green,PF,30,PHO,82,55,34.5,5.7,11.3,0.502,...,3.4,5.8,9.2,1.7,0.9,0.5,1.2,1.7,14.7,1994
A.C. Green,1995,665,A.C. Green,SF,31,PHO,82,52,32.8,3.8,7.5,0.504,...,2.4,5.8,8.2,1.5,0.7,0.4,1.4,1.8,11.2,1995
A.C. Green,1996,1161,A.C. Green,SF,32,PHO,82,36,25.8,2.6,5.4,0.484,...,2.0,4.7,6.8,0.9,0.5,0.3,1.0,1.7,7.5,1996
A.C. Green,1997,1736,A.C. Green,PF,33,DAL,83,73,30.0,2.8,5.8,0.483,...,2.7,5.2,7.9,0.8,0.8,0.2,0.9,1.7,7.2,1997
A.C. Green,1998,2323,A.C. Green,PF,34,DAL,82,68,32.3,3.0,6.5,0.453,...,2.7,5.5,8.1,1.5,1.0,0.3,0.8,1.9,7.3,1998
A.C. Green,1999,2881,A.C. Green,PF,35,DAL,50,35,18.5,2.2,5.1,0.422,...,1.6,2.9,4.6,0.5,0.6,0.2,0.4,1.4,4.9,1999
A.C. Green,2000,3409,A.C. Green,PF,36,LAL,82,82,23.5,2.1,4.7,0.447,...,2.0,4.0,5.9,1.0,0.6,0.2,0.6,1.5,5.0,2000
A.C. Green,2001,3928,A.C. Green,PF,37,MIA,82,1,17.2,1.8,4.0,0.444,...,1.3,2.5,3.8,0.5,0.4,0.1,0.5,1.5,4.5,2001
A.J. Bramlett,2000,3282,A.J. Bramlett,C,23,CLE,8,0,7.6,0.5,2.6,0.19,...,1.5,1.3,2.8,0.0,0.1,0.0,0.4,1.6,1.0,2000
A.J. Green,2023,17713,A.J. Green,SG,23,MIL,35,1,9.9,1.5,3.6,0.424,...,0.2,1.1,1.3,0.6,0.2,0.0,0.3,0.9,4.4,2023


In [90]:
players.index = players.index.droplevel()

In [91]:
players.index = players.index.droplevel()

In [208]:
players.tail(100)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Year
9338,Zach Randolph,PF,28,MEM,81,81,37.7,8.0,16.5,.488,...,4.1,7.7,11.7,1.8,1.0,0.4,2.1,2.8,20.8,2010
9966,Zach Randolph,PF,29,MEM,75,74,36.3,8.0,15.8,.503,...,4.3,7.8,12.2,2.2,0.8,0.3,2.0,2.3,20.1,2011
10545,Zach Randolph,PF,30,MEM,28,8,26.3,4.7,10.2,.463,...,2.8,5.3,8.0,1.7,0.8,0.1,1.4,2.0,11.6,2012
11141,Zach Randolph,PF,31,MEM,76,75,34.3,6.2,13.5,.460,...,4.1,7.2,11.2,1.4,0.8,0.4,2.0,2.4,15.4,2013
11768,Zach Randolph,PF,32,MEM,79,79,34.2,7.1,15.2,.467,...,3.4,6.7,10.1,2.5,0.7,0.3,2.3,2.7,17.4,2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4693,Željko Rebrača,C,29,DET,74,4,15.9,2.6,5.1,.505,...,1.1,2.8,3.9,0.5,0.4,1.0,1.1,2.6,6.9,2002
5193,Željko Rebrača,C,30,DET,30,12,16.3,2.7,4.8,.552,...,0.9,2.2,3.1,0.3,0.2,0.6,1.0,2.6,6.6,2003
5774,Željko Rebrača,C,31,ATL,24,2,11.4,1.4,3.2,.442,...,1.0,1.5,2.4,0.3,0.2,0.5,0.7,2.2,3.8,2004
6374,Željko Rebrača,C,32,LAC,58,2,16.0,2.3,4.0,.568,...,0.8,2.3,3.2,0.4,0.2,0.7,0.8,2.2,5.8,2005


In [100]:
combined = players.merge(mvps, how="outer", on=["Player", "Year"])

In [209]:
combined.tail(5)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,STL,BLK,TOV,PF,PTS,Year,Pts Won,Pts Max,Share,Team
14659,Željko Rebrača,C,29,DET,74,4,15.9,2.6,5.1,0.505,...,0.4,1.0,1.1,2.6,6.9,2002,0.0,0.0,0.0,Detroit Pistons
14660,Željko Rebrača,C,30,DET,30,12,16.3,2.7,4.8,0.552,...,0.2,0.6,1.0,2.6,6.6,2003,0.0,0.0,0.0,Detroit Pistons
14661,Željko Rebrača,C,31,ATL,24,2,11.4,1.4,3.2,0.442,...,0.2,0.5,0.7,2.2,3.8,2004,0.0,0.0,0.0,Atlanta Hawks
14662,Željko Rebrača,C,32,LAC,58,2,16.0,2.3,4.0,0.568,...,0.2,0.7,0.8,2.2,5.8,2005,0.0,0.0,0.0,Los Angeles Clippers
14663,Željko Rebrača,C,33,LAC,29,2,14.2,1.8,3.3,0.542,...,0.2,0.7,0.8,2.0,4.7,2006,0.0,0.0,0.0,Los Angeles Clippers


In [108]:
combined[["Pts Won", "Pts Max", "Share"]] = combined[["Pts Won", "Pts Max", "Share"]].fillna(0)

In [109]:
combined.head(5)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,AST,STL,BLK,TOV,PF,PTS,Year,Pts Won,Pts Max,Share
0,A.C. Green,PF,30,PHO,82,55,34.5,5.7,11.3,0.502,...,1.7,0.9,0.5,1.2,1.7,14.7,1994,0.0,0.0,0.0
1,A.C. Green,SF,31,PHO,82,52,32.8,3.8,7.5,0.504,...,1.5,0.7,0.4,1.4,1.8,11.2,1995,0.0,0.0,0.0
2,A.C. Green,SF,32,PHO,82,36,25.8,2.6,5.4,0.484,...,0.9,0.5,0.3,1.0,1.7,7.5,1996,0.0,0.0,0.0
3,A.C. Green,PF,33,DAL,83,73,30.0,2.8,5.8,0.483,...,0.8,0.8,0.2,0.9,1.7,7.2,1997,0.0,0.0,0.0
4,A.C. Green,PF,34,DAL,82,68,32.3,3.0,6.5,0.453,...,1.5,1.0,0.3,0.8,1.9,7.3,1998,0.0,0.0,0.0


In [159]:
teams = pd.read_csv("csv/teams.csv")

In [160]:
teams.head(10)

Unnamed: 0.1,Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,0,57,25,.695,—,98.5,91.5,6.48,1994,New York Knicks*
1,1,50,32,.610,7.0,105.7,101.8,3.68,1994,Orlando Magic*
2,2,45,37,.549,12.0,103.2,101.0,2.11,1994,New Jersey Nets*
3,3,42,40,.512,15.0,103.4,100.7,2.40,1994,Miami Heat*
4,4,32,50,.390,25.0,100.8,105.1,-4.28,1994,Boston Celtics
5,5,25,57,.305,32.0,98.0,105.6,-7.37,1994,Philadelphia 76ers
6,6,24,58,.293,33.0,100.4,107.7,-7.13,1994,Washington Bullets
7,7,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,Central Division,1994,Central Division
8,8,57,25,.695,—,101.4,96.2,4.94,1994,Atlanta Hawks*
9,9,55,27,.671,2.0,98.0,94.9,2.87,1994,Chicago Bulls*


In [161]:
teams = teams[~teams["W"].str.contains("Division")]

In [162]:
teams.head(10)

Unnamed: 0.1,Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,0,57,25,0.695,—,98.5,91.5,6.48,1994,New York Knicks*
1,1,50,32,0.61,7.0,105.7,101.8,3.68,1994,Orlando Magic*
2,2,45,37,0.549,12.0,103.2,101.0,2.11,1994,New Jersey Nets*
3,3,42,40,0.512,15.0,103.4,100.7,2.4,1994,Miami Heat*
4,4,32,50,0.39,25.0,100.8,105.1,-4.28,1994,Boston Celtics
5,5,25,57,0.305,32.0,98.0,105.6,-7.37,1994,Philadelphia 76ers
6,6,24,58,0.293,33.0,100.4,107.7,-7.13,1994,Washington Bullets
8,8,57,25,0.695,—,101.4,96.2,4.94,1994,Atlanta Hawks*
9,9,55,27,0.671,2.0,98.0,94.9,2.87,1994,Chicago Bulls*
10,10,47,35,0.573,10.0,101.0,97.5,3.26,1994,Indiana Pacers*


In [165]:
teams["Team"] = teams["Team"].str.replace("*", "", regex=False)

In [172]:
del teams["Unnamed: 0"]

In [174]:
teams.head(5)

Unnamed: 0,W,L,W/L%,GB,PS/G,PA/G,SRS,Year,Team
0,57,25,0.695,—,98.5,91.5,6.48,1994,New York Knicks
1,50,32,0.61,7.0,105.7,101.8,3.68,1994,Orlando Magic
2,45,37,0.549,12.0,103.2,101.0,2.11,1994,New Jersey Nets
3,42,40,0.512,15.0,103.4,100.7,2.4,1994,Miami Heat
4,32,50,0.39,25.0,100.8,105.1,-4.28,1994,Boston Celtics


In [175]:
teams["Team"].unique()

array(['New York Knicks', 'Orlando Magic', 'New Jersey Nets',
       'Miami Heat', 'Boston Celtics', 'Philadelphia 76ers',
       'Washington Bullets', 'Atlanta Hawks', 'Chicago Bulls',
       'Indiana Pacers', 'Cleveland Cavaliers', 'Charlotte Hornets',
       'Milwaukee Bucks', 'Detroit Pistons', 'Houston Rockets',
       'San Antonio Spurs', 'Utah Jazz', 'Denver Nuggets',
       'Minnesota Timberwolves', 'Dallas Mavericks',
       'Seattle SuperSonics', 'Phoenix Suns', 'Golden State Warriors',
       'Portland Trail Blazers', 'Los Angeles Lakers', 'Sacramento Kings',
       'Los Angeles Clippers', 'Toronto Raptors', 'Vancouver Grizzlies',
       'Washington Wizards', 'Memphis Grizzlies', 'New Orleans Hornets',
       'Charlotte Bobcats', 'New Orleans/Oklahoma City Hornets',
       'Oklahoma City Thunder', 'Brooklyn Nets', 'New Orleans Pelicans'],
      dtype=object)

In [171]:
combined["Tm"].unique()

array(['PHO', 'DAL', 'LAL', 'MIA', 'CLE', 'MIL', 'CHI', 'GSW', 'IND',
       'WAS', 'MIN', 'ATL', 'HOU', 'DEN', 'ORL', 'NOH', 'TOR', 'SAC',
       'CHO', 'PHI', 'POR', 'DET', 'BOS', 'OKC', 'UTA', 'VAN', 'SEA',
       'NJN', 'NOK', 'LAC', 'CHA', 'MEM', 'NYK', 'NOP', 'BRK', 'SAS',
       'CHH', 'WSB', nan], dtype=object)

In [179]:
data = {
    "Abbreviation": ["ATL", "BRK", "BKN", "BOS", "CHA", "CHH", "CHO", "CHI", "CLE", "DAL",
                     "DEN", "DET", "GSW", "HOU", "IND", "LAC", "LAL", "MEM", "MIA", "MIL",
                     "MIN", "NJN", "NOH", "NOP", "NOK", "NYK", "OKC", "ORL", "PHI", "PHX",
                     "PHO", "POR", "SEA", "SAC", "SAS", "TOR", "UTA", "VAN", "WAS", "WSB"],
    "Name": ["Atlanta Hawks", "Brooklyn Nets", "Brooklyn Nets", "Boston Celtics", "Charlotte Bobcats",
             "Charlotte Hornets", "Charlotte Hornets", "Chicago Bulls", "Cleveland Cavaliers",
             "Dallas Mavericks", "Denver Nuggets", "Detroit Pistons", "Golden State Warriors",
             "Houston Rockets", "Indiana Pacers", "Los Angeles Clippers", "Los Angeles Lakers",
             "Memphis Grizzlies", "Miami Heat", "Milwaukee Bucks", "Minnesota Timberwolves",
             "New Jersey Nets", "New Orleans Hornets", "New Orleans Pelicans",
             "New Orleans/Oklahoma City Hornets", "New York Knicks", "Oklahoma City Thunder",
             "Orlando Magic", "Philadelphia 76ers", "Phoenix Suns", "Phoenix Suns",
             "Portland Trail Blazers", "Seattle SuperSonics", "Sacramento Kings",
             "San Antonio Spurs", "Toronto Raptors", "Utah Jazz", "Vancouver Grizzlies",
             "Washington Wizards", "Washington Bullets"]
}

team_names = pd.DataFrame(data)
team_names.to_csv("csv/teamNames.csv", index=False)

In [195]:
nicknames = {}

with open("csv/teamNames.csv") as f:
    lines = f.readlines()
    for line in lines[1:]:
        abb, name = line.replace("\n", "").split(",")
        nicknames[abb] = name

In [197]:
combined["Tm"]

0        PHO
1        PHO
2        PHO
3        DAL
4        DAL
        ... 
14659    DET
14660    DET
14661    ATL
14662    LAC
14663    LAC
Name: Tm, Length: 14664, dtype: object

In [200]:
combined["Team"] = combined["Tm"].map(nicknames)

In [216]:
combined[combined["Player"] == "Nikola Jokić"]

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,STL,BLK,TOV,PF,PTS,Year,Pts Won,Pts Max,Share,Team
10679,Nikola Jokić,C,20,DEN,80,55,21.7,3.8,7.5,0.512,...,1.0,0.6,1.3,2.6,10.0,2016,0.0,0.0,0.0,Denver Nuggets
10680,Nikola Jokić,C,21,DEN,73,59,27.9,6.8,11.7,0.578,...,0.8,0.8,2.3,2.9,16.7,2017,0.0,0.0,0.0,Denver Nuggets
10681,Nikola Jokić,C,22,DEN,75,73,32.6,6.7,13.5,0.499,...,1.2,0.8,2.8,2.8,18.5,2018,0.0,0.0,0.0,Denver Nuggets
10682,Nikola Jokić,C,23,DEN,80,80,31.3,7.7,15.1,0.511,...,1.4,0.7,3.1,2.9,20.1,2019,0.0,0.0,0.0,Denver Nuggets
10683,Nikola Jokić,C,24,DEN,73,73,32.0,7.7,14.7,0.528,...,1.2,0.6,3.1,3.0,19.9,2020,0.0,0.0,0.0,Denver Nuggets
10684,Nikola Jokić,C,25,DEN,72,72,34.6,10.2,18.0,0.566,...,1.3,0.7,3.1,2.7,26.4,2021,0.0,0.0,0.0,Denver Nuggets
10685,Nikola Jokić,C,26,DEN,74,74,33.5,10.3,17.7,0.583,...,1.5,0.9,3.8,2.6,27.1,2022,0.0,0.0,0.0,Denver Nuggets
10686,Nikola Jokić,C,27,DEN,69,69,33.7,9.4,14.8,0.632,...,1.3,0.7,3.6,2.5,24.5,2023,0.0,0.0,0.0,Denver Nuggets
10687,Nikola Jokić,C,28,DEN,79,79,34.6,10.4,17.9,0.583,...,1.4,0.9,3.0,2.5,26.4,2024,0.0,0.0,0.0,Denver Nuggets


In [202]:
stats = combined.merge(teams, how="outer", on=["Team", "Year"])

In [215]:
stats.tail(30)

Unnamed: 0,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,FG%,...,Pts Max,Share,Team,W,L,W/L%,GB,PS/G,PA/G,SRS
14634,Hamidou Diallo,SG,25.0,WAS,2.0,0.0,2.5,0.5,1.0,0.5,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14635,Jared Butler,SG,23.0,WAS,40.0,0.0,14.2,2.5,5.0,0.488,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14636,Johnny Davis,SG,21.0,WAS,50.0,6.0,12.3,1.2,3.1,0.403,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14637,Jordan Poole,SG,24.0,WAS,78.0,66.0,30.1,6.3,15.2,0.413,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14638,Jules Bernard,SG,24.0,WAS,19.0,0.0,7.8,1.5,3.4,0.453,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14639,Justin Champagnie,SF,22.0,WAS,15.0,1.0,15.7,2.1,5.2,0.41,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14640,Kyle Kuzma,PF,28.0,WAS,70.0,70.0,32.6,8.7,18.8,0.463,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14641,Landry Shamet,SG,26.0,WAS,46.0,5.0,15.8,2.6,6.0,0.431,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14642,Marvin Bagley III,C,24.0,WAS,50.0,25.0,21.1,4.8,8.2,0.586,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
14643,Patrick Baldwin Jr.,SF,21.0,WAS,38.0,7.0,13.0,1.6,4.1,0.381,...,0.0,0.0,Washington Wizards,15.0,67.0,0.183,32.0,113.7,123.0,-9.29
