## Gather and scale Offensive Rating (ORtg), Defensive Rating (DRtg), and Net Rating (NRtg) for every team for every season 1998-2023

Import necessary packages

In [32]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
from sklearn.preprocessing import MinMaxScaler

Prep URL for 1998-2023 seasons

In [33]:
YEARS = [y for y in range(1998, 2024)]
def make_link (year):
    return f"https://www.basketball-reference.com/leagues/NBA_{year}.html"

Scrape ORtg, DRtg, and NRtg Data for every team from 1998-2023 seasons

In [34]:
data = []

for year in YEARS:
    page = requests.get(make_link(year))
    time.sleep(2)
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("table", {"id": "advanced-team"})
    if table:
        rows = table.find_all("tr")
        
        for row in rows[1:]:
            cells = row.find_all(["th", "td"])
            
            team = cells[1].text.strip()   # 'Team' column
            if team != "Team":
                ortg = cells[10].text.strip()  # 'ORtg' column
                drtg = cells[11].text.strip()  # 'DRtg' column
                nrtg = cells[12].text.strip()  # 'NRtg' column
                
                data.append((year, team, ortg, drtg, nrtg))




Scale all the values between 0 and 1 for each year for each column

In [35]:
data = pd.DataFrame(data, columns=["Year", "Team", "ORtg", "DRtg", "NRtg"])
data['NRtg'] = data['NRtg'].str.replace('+', '').replace('', '0')
data[['ORtg', 'DRtg', 'NRtg']] = data[['ORtg', 'DRtg', 'NRtg']].astype(float)

# Initialize separate MinMaxScalers for each column
scalers = {
    'ORtg': MinMaxScaler(),
    'DRtg': MinMaxScaler(),
    'NRtg': MinMaxScaler()
}

# Group the DataFrame by the year column
grouped = data.groupby('Year')

# Scale the stats for each group (i.e., each year) and each column individually
scaled_dfs = []
for year, group in grouped:
    scaled_group = group.copy()
    for col in ['ORtg', 'DRtg', 'NRtg']:
        scaler = scalers[col]
        scaled_group[col] = scaler.fit_transform(group[[col]])
    scaled_dfs.append(scaled_group)

# Concatenate the scaled DataFrames back together
scaled_df = pd.concat(scaled_dfs)

Export data to csv

In [36]:
scaled_df.to_csv("rtg_data_scaled.csv", index=False)