## Importando as bibliotecas

In [1]:
import pandas as pd
import numpy as np
import requests
import sys
import os
import time
from scipy.stats import zscore

In [2]:
import sys
import os

notebook_dir = os.getcwd()

root_dir = os.path.dirname(notebook_dir)

if root_dir not in sys.path:
    sys.path.append(root_dir)

%load_ext autoreload
%autoreload 2

## Importando as funções do nba_scraper.py

In [3]:
from src.nba_scraper import find_ID_by_name
from src.nba_scraper import find_abb_by_name
from src.nba_scraper import getting_ID_row_by_name
from src.nba_scraper import get_four_factors
from src.nba_scraper import get_advanced_stats
from src.nba_scraper import get_scoring_stats
from src.nba_scraper import get_bench_point_percent
from src.nba_scraper import get_shot_locations_pct
from src.nba_scraper import get_usage_stars
from src.nba_scraper import get_all_teams_df

-------------------------------------------------------------------------------------------------------------------------------------------------------------

# Fazendo o Dataframe inicial dos ultimos 10 campeões
### Primeiramente colocarei, além da temporada, o 'full_name' do time, a temporada e o ID de cada time.

In [58]:
seasons = ["2015-16","2016-17","2017-18","2018-19","2019-20","2020-21",
           "2021-22","2022-23","2023-24","2024-25"]

teams_Series = ['Cleveland Cavaliers', 'Golden State Warriors', 'Golden State Warriors', 'Toronto Raptors',
                 'Los Angeles Lakers', 'Milwaukee Bucks', 'Golden State Warriors', 'Boston Celtics', 'Denver Nuggets', 'Oklahoma City Thunder']

indexs = ["2016_CLE", "2017_GSW", "2018_GSW", "2019_TOR", "2020_LAL", "2021_MIL", "2022_GSW", "2023_BOS", "2024_DEN", "2025_OKC"]

df_last_10_champions = pd.DataFrame(index=indexs)
df_last_10_champions["TEAM_NAME"] = teams_Series


----------------------------------------------------------------------------------------------------------------------

# Serão feito os datasets das métricas da liga em cada temporada

In [5]:
def generate_year_regular_df(season):
    df_four_factors = get_four_factors(season)
    df_advanced_stats = get_advanced_stats(season)
    df_scoring_stats = get_scoring_stats(season)
    df_bench_pct = get_bench_point_percent(season)
    df_shot_locations = get_shot_locations_pct(season)
    df_usage = get_usage_stars(season)


    df_1 = pd.merge(df_four_factors, df_advanced_stats, on = "TEAM_ID")
    df_2 = pd.merge(df_scoring_stats, df_bench_pct, on="TEAM_ID")
    df_3 = pd.merge(df_shot_locations, df_usage, on= "TEAM_ID")
    df_4 = pd.merge(df_1, df_2, on="TEAM_ID")
    df_year = pd.merge(df_3, df_4, on="TEAM_ID")
    return df_year

----------------------------------------------------------------------------------------------------------------------

## Inicializando os DataFrames de cada season:

In [6]:
df_champs_reg = pd.DataFrame()
df_champs_playoffs = pd.DataFrame()
lista_dfs_reg = []

In [7]:
for season in seasons:  
    try:
        df = generate_year_regular_df(season)
        
        if not df.empty:
            lista_dfs_reg.append(df)
            print(f" Sucesso: {season}")
        else:
            print(f" Aviso: Dados vazios retornados para {season}")

    except Exception as e:
        print(f" Erro crítico na temporada {season}: {e}")
        continue 
    
    time.sleep(4)

 Sucesso: 2015-16
 Sucesso: 2016-17
 Sucesso: 2017-18
 Sucesso: 2018-19
 Sucesso: 2019-20
 Sucesso: 2020-21
 Sucesso: 2021-22
 Sucesso: 2022-23
 Sucesso: 2023-24
 Sucesso: 2024-25


----------------------------------------------------------------------------------------------------------------------

## Criando e processando os datasets 'crus' e de 'zscore' de cada temporada.

## REGULAR SEASON

In [8]:
metrics = ['RIM_PCT', 'MID_PCT', 'SUM_USAGE', 'EFG_PCT', 'FTA_RATE', 'OREB_PCT', 'TM_TOV_PCT', 'OFF_RATING', 
           'DEF_RATING', 'NET_RATING', 'AST_PCT', 'PACE', 'PCT_FGA_3PT', 'BENCH_PTS_PCT'] 
columns = ['TEAM_NAME', 'RIM_PCT', 'MID_PCT', 'SUM_USAGE', 'EFG_PCT', 'FTA_RATE', 'OREB_PCT', 'TM_TOV_PCT', 'OFF_RATING', 
           'DEF_RATING', 'NET_RATING', 'AST_PCT', 'PACE', 'PCT_FGA_3PT', 'BENCH_PTS_PCT'] 
# Associando à 'metrics' as colunas de estatísticas.

In [89]:
# 2015-16
df_2016 = lista_dfs_reg[0]
df_2016_z = df_2016
df_2016.to_csv('../data/raw/df_2016.csv', index=False)

for metric in metrics:
    df_2016_z[metric] = np.round(zscore(df_2016[metric]), decimals=5) # Gera o DataFrame de zscore  
    df_last_10_champions[metric] = np.zeros(10)

df_2016_z.to_csv('../data/processed/df_2016_z.csv', index=False)

dados_do_time = df_2016_z[df_2016_z['TEAM_NAME'] == df_last_10_champions.iloc[0]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[0] = dados_do_time[columns]

In [90]:
# 2016-17
df_2017 = lista_dfs_reg[1]
df_2017_z = df_2017
df_2017.to_csv('../data/raw/df_2017.csv', index=False)

for metric in metrics: 
    df_2017_z[metric] = np.round(zscore(df_2017[metric]), decimals=5)

df_2017_z.to_csv('../data/processed/df_2017_z.csv', index=False)

dados_do_time = df_2017_z[df_2017_z['TEAM_NAME'] == df_last_10_champions.iloc[1]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[1] = dados_do_time[columns]

In [91]:
# 2017-18
df_2018 = lista_dfs_reg[2]
df_2018_z = df_2018
df_2018.to_csv('../data/raw/df_2018.csv', index=False)

for metric in metrics:
    df_2018_z[metric] = np.round(zscore(df_2018[metric]), decimals=5)

df_2018_z.to_csv('../data/processed/df_2018_z.csv', index=False)

dados_do_time = df_2018_z[df_2018_z['TEAM_NAME'] == df_last_10_champions.iloc[2]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[2] = dados_do_time[columns]

In [92]:
# 2019-20
df_2019 = lista_dfs_reg[3]
df_2019_z = df_2019
df_2019.to_csv('../data/raw/df_2019.csv', index=False)

for metric in metrics:
    df_2019_z[metric] = np.round(zscore(df_2019[metric]), decimals=5)

df_2019_z.to_csv('../data/processed/df_2019_z.csv', index=False)

dados_do_time = df_2019_z[df_2019_z['TEAM_NAME'] == df_last_10_champions.iloc[3]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[3] = dados_do_time[columns]

In [93]:
# 2019-20
df_2020 = lista_dfs_reg[4]
df_2020_z = df_2020
df_2020.to_csv('../data/raw/df_2020.csv', index=False)

for metric in metrics:
    df_2020_z[metric] = np.round(zscore(df_2020[metric]), decimals=5)

df_2020_z.to_csv('../data/processed/df_2020_z.csv', index=False)

dados_do_time = df_2020_z[df_2020_z['TEAM_NAME'] == df_last_10_champions.iloc[4]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[4] = dados_do_time[columns]

In [94]:
# 2020-21
df_2021 = lista_dfs_reg[5]
df_2021_z = df_2021
df_2021.to_csv('../data/raw/df_2021.csv', index=False)

for metric in metrics:
    df_2021_z[metric] = np.round(zscore(df_2021[metric]), decimals=5)

df_2021_z.to_csv('../data/processed/df_2021_z.csv', index=False)

dados_do_time = df_2021_z[df_2021_z['TEAM_NAME'] == df_last_10_champions.iloc[5]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[5] = dados_do_time[columns]

In [95]:
# 2021-22
df_2022 = lista_dfs_reg[6]
df_2022_z = df_2022
df_2022.to_csv('../data/raw/df_2022.csv', index=False)

for metric in metrics:
    df_2022_z[metric] = np.round(zscore(df_2022[metric]), decimals=5)

df_2022_z.to_csv('../data/processed/df_2022_z.csv', index=False)

dados_do_time = df_2022_z[df_2022_z['TEAM_NAME'] == df_last_10_champions.iloc[6]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[6] = dados_do_time[columns]

In [96]:
# 2022-23
df_2023 = lista_dfs_reg[7]
df_2023_z = df_2023
df_2023.to_csv('../data/raw/df_2023.csv', index=False)

for metric in metrics:
    df_2023_z[metric] = np.round(zscore(df_2023[metric]), decimals=5)
    
df_2023_z.to_csv('../data/processed/df_2023_z.csv', index=False)

dados_do_time = df_2023_z[df_2023_z['TEAM_NAME'] == df_last_10_champions.iloc[7]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[7] = dados_do_time[columns]

In [97]:
# 2023-24
df_2024 = lista_dfs_reg[8]
df_2024_z = df_2024
df_2024.to_csv('../data/raw/df_2024.csv', index=False)

for metric in metrics:
    df_2024_z[metric] = np.round(zscore(df_2024[metric]), decimals=5)
df_2024_z.to_csv('../data/processed/df_2024_z.csv', index=False)

dados_do_time = df_2024_z[df_2024_z['TEAM_NAME'] == df_last_10_champions.iloc[8]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[8] = dados_do_time[columns]

In [99]:
# 2024-25
df_2025 = lista_dfs_reg[9]
df_2025_z = df_2025
df_2025.to_csv('../data/raw/df_2025.csv', index=False)

for metric in metrics:
    df_2025_z[metric] = np.round(zscore(df_2025[metric]), decimals=5)
    df_2025_z.to_csv('../data/processed/df_2025_z.csv', index=False)


dados_do_time = df_2025_z[df_2025_z['TEAM_NAME'] == df_last_10_champions.iloc[9]["TEAM_NAME"]].iloc[0]
df_last_10_champions.iloc[9] = dados_do_time[columns]
df_last_10_champions

df_last_10_champions.to_csv('../data/processed/df_last_10_champions.csv', index = False)
df_last_10_champions

Unnamed: 0,TEAM_NAME,RIM_PCT,MID_PCT,SUM_USAGE,EFG_PCT,FTA_RATE,OREB_PCT,TM_TOV_PCT,OFF_RATING,DEF_RATING,NET_RATING,AST_PCT,PACE,PCT_FGA_3PT,BENCH_PTS_PCT
2016_CLE,Cleveland Cavaliers,-0.85772,-0.72888,1.61219,1.15024,-0.62128,0.60834,-0.45683,1.50385,-0.59209,1.22757,0.12594,-1.28737,1.39404,-1.89812
2017_GSW,Golden State Warriors,0.18171,-0.4034,0.22237,2.66694,-0.53513,-0.14418,0.29463,2.10385,-2.04987,2.7092,2.69366,1.47389,0.90557,-1.07841
2018_GSW,Golden State Warriors,-0.90284,1.07602,1.45287,2.86942,-0.47827,-1.24133,0.94755,1.61757,-0.43061,1.34366,2.62856,1.38524,0.04209,-0.8596
2019_TOR,Toronto Raptors,-0.25092,-0.17004,-0.40586,1.25647,-0.46637,-0.35567,-0.10792,1.00291,-1.04807,1.23158,0.13781,-0.07096,0.42612,-0.42377
2020_LAL,Los Angeles Lakers,1.46361,0.44391,0.99262,0.9825,0.79731,0.91312,0.59115,0.64446,-1.36546,1.24106,0.07368,0.1882,-0.62818,0.21524
2021_MIL,Milwaukee Bucks,-0.26711,0.63685,0.45733,1.46229,-0.61854,0.26156,-0.34771,1.27002,-0.43329,1.21153,-0.89691,1.63889,0.2881,-1.06316
2022_GSW,Golden State Warriors,-0.83474,-0.34222,0.59923,1.24718,-0.7746,-0.25118,1.30995,0.23198,-1.79915,1.14782,2.05517,-0.00921,1.5662,0.33642
2023_BOS,Boston Celtics,-0.94511,-1.19166,1.5404,1.22423,-1.17934,-0.99625,-0.72635,1.38919,-1.42744,1.693,0.89449,-0.36972,2.21145,-0.80219
2024_DEN,Denver Nuggets,1.14096,1.22407,0.29715,0.818,-1.06673,0.72986,-0.6464,0.87451,-0.7814,0.99761,1.22111,-0.9862,-1.52131,-1.06214
2025_OKC,Oklahoma City Thunder,-0.51294,0.46469,1.18178,0.86102,-1.52896,-0.45748,-2.43221,1.4822,-2.3518,2.16971,-0.95711,0.68818,-0.06181,-0.43531
