## Importando as bibliotecas

In [1]:
import pandas as pd
import numpy as np
import requests
import sys
import os
import time
from scipy.stats import zscore

In [2]:
import sys
import os

notebook_dir = os.getcwd()

root_dir = os.path.dirname(notebook_dir)

if root_dir not in sys.path:
    sys.path.append(root_dir)

%load_ext autoreload
%autoreload 2

## Importando as funções do nba_scraper.py

In [3]:
from src.nba_scraper import find_ID_by_name
from src.nba_scraper import find_abb_by_name
from src.nba_scraper import getting_ID_row_by_name
from src.nba_scraper import get_four_factors
from src.nba_scraper import get_advanced_stats
from src.nba_scraper import get_scoring_stats
from src.nba_scraper import get_bench_point_percent
from src.nba_scraper import get_shot_locations_pct
from src.nba_scraper import get_usage_stars
from src.nba_scraper import get_all_teams_df

-------------------------------------------------------------------------------------------------------------------------------------------------------------

# Fazendo o Dataframe inicial dos ultimos 10 campeões
### Primeiramente colocarei, além da temporada, o 'full_name' do time, a temporada e o ID de cada time.

In [58]:
seasons = ["2015-16","2016-17","2017-18","2018-19","2019-20","2020-21",
           "2021-22","2022-23","2023-24","2024-25"]

teams_Series = ['Cleveland Cavaliers', 'Golden State Warriors', 'Golden State Warriors', 'Toronto Raptors',
                 'Los Angeles Lakers', 'Milwaukee Bucks', 'Golden State Warriors', 'Boston Celtics', 'Denver Nuggets', 'Oklahoma City Thunder']

indexs = ["2016_CLE", "2017_GSW", "2018_GSW", "2019_TOR", "2020_LAL", "2021_MIL", "2022_GSW", "2023_BOS", "2024_DEN", "2025_OKC"]

df_last_10_champions = pd.DataFrame(index=indexs)
df_last_10_champions["TEAM_NAME"] = teams_Series


----------------------------------------------------------------------------------------------------------------------

# Serão feito os datasets das métricas da liga em cada temporada

In [5]:
def generate_year_regular_df(season):
    df_four_factors = get_four_factors(season)
    df_advanced_stats = get_advanced_stats(season)
    df_scoring_stats = get_scoring_stats(season)
    df_bench_pct = get_bench_point_percent(season)
    df_shot_locations = get_shot_locations_pct(season)
    df_usage = get_usage_stars(season)


    df_1 = pd.merge(df_four_factors, df_advanced_stats, on = "TEAM_ID")
    df_2 = pd.merge(df_scoring_stats, df_bench_pct, on="TEAM_ID")
    df_3 = pd.merge(df_shot_locations, df_usage, on= "TEAM_ID")
    df_4 = pd.merge(df_1, df_2, on="TEAM_ID")
    df_year = pd.merge(df_3, df_4, on="TEAM_ID")
    return df_year

----------------------------------------------------------------------------------------------------------------------

## Inicializando os DataFrames de cada season:

In [6]:
df_champs_reg = pd.DataFrame()
df_champs_playoffs = pd.DataFrame()
lista_dfs_reg = []

In [7]:
for season in seasons:  
    try:
        df = generate_year_regular_df(season)
        
        if not df.empty:
            lista_dfs_reg.append(df)
            print(f" Sucesso: {season}")
        else:
            print(f" Aviso: Dados vazios retornados para {season}")

    except Exception as e:
        print(f" Erro crítico na temporada {season}: {e}")
        continue 
    
    time.sleep(4)

 Sucesso: 2015-16
 Sucesso: 2016-17
 Sucesso: 2017-18
 Sucesso: 2018-19
 Sucesso: 2019-20
 Sucesso: 2020-21
 Sucesso: 2021-22
 Sucesso: 2022-23
 Sucesso: 2023-24
 Sucesso: 2024-25


----------------------------------------------------------------------------------------------------------------------

## Criando e processando os datasets 'crus' e de 'zscore' de cada temporada.

## REGULAR SEASON

In [8]:
metrics = ['RIM_PCT', 'MID_PCT', 'SUM_USAGE', 'EFG_PCT', 'FTA_RATE', 'OREB_PCT', 'TM_TOV_PCT', 'OFF_RATING', 
           'DEF_RATING', 'NET_RATING', 'AST_PCT', 'PACE', 'PCT_FGA_3PT', 'BENCH_PTS_PCT'] 
columns = ['TEAM_NAME', 'RIM_PCT', 'MID_PCT', 'SUM_USAGE', 'EFG_PCT', 'FTA_RATE', 'OREB_PCT', 'TM_TOV_PCT', 'OFF_RATING', 
           'DEF_RATING', 'NET_RATING', 'AST_PCT', 'PACE', 'PCT_FGA_3PT', 'BENCH_PTS_PCT'] 
# Associando à 'metrics' as colunas de estatísticas.

In [78]:
# 2015-16
df_2016 = lista_dfs_reg[0]
df_2016_z = df_2016
df_2016.to_csv('../data/raw/df_2016.csv', index=False)

for metric in metrics:
    df_2016_z[metric] = np.round(zscore(df_2016[metric]), decimals=5) # Gera o DataFrame de zscore  
    df_last_10_champions[metric] = np.zeros(10)

df_2016_z.to_csv('../data/processed/df_2016_z.csv', index=False)

In [79]:
# 2016-17
df_2017 = lista_dfs_reg[1]
df_2017_z = df_2017
df_2017.to_csv('../data/raw/df_2017.csv', index=False)

for metric in metrics: 
    df_2017_z[metric] = np.round(zscore(df_2017[metric]), decimals=5)

df_2017_z.to_csv('../data/processed/df_2017_z.csv', index=False)

In [80]:
# 2017-18
df_2018 = lista_dfs_reg[2]
df_2018_z = df_2018
df_2018.to_csv('../data/raw/df_2018.csv', index=False)

for metric in metrics:
    df_2018_z[metric] = np.round(zscore(df_2018[metric]), decimals=5)

df_2018_z.to_csv('../data/processed/df_2018_z.csv', index=False)


In [81]:
# 2019-20
df_2019 = lista_dfs_reg[3]
df_2019_z = df_2019
df_2019.to_csv('../data/raw/df_2019.csv', index=False)

for metric in metrics:
    df_2019_z[metric] = np.round(zscore(df_2019[metric]), decimals=5)

df_2019_z.to_csv('../data/processed/df_2019_z.csv', index=False)


In [82]:
# 2019-20
df_2020 = lista_dfs_reg[4]
df_2020_z = df_2020
df_2020.to_csv('../data/raw/df_2020.csv', index=False)

for metric in metrics:
    df_2020_z[metric] = np.round(zscore(df_2020[metric]), decimals=5)

df_2020_z.to_csv('../data/processed/df_2020_z.csv', index=False)

In [83]:
# 2020-21
df_2021 = lista_dfs_reg[5]
df_2021_z = df_2021
df_2021.to_csv('../data/raw/df_2021.csv', index=False)

for metric in metrics:
    df_2021_z[metric] = np.round(zscore(df_2021[metric]), decimals=5)

df_2021_z.to_csv('../data/processed/df_2021_z.csv', index=False)

In [84]:
# 2021-22
df_2022 = lista_dfs_reg[6]
df_2022_z = df_2022
df_2022.to_csv('../data/raw/df_2022.csv', index=False)

for metric in metrics:
    df_2022_z[metric] = np.round(zscore(df_2022[metric]), decimals=5)

df_2022_z.to_csv('../data/processed/df_2022_z.csv', index=False)

In [85]:
# 2022-23
df_2023 = lista_dfs_reg[7]
df_2023_z = df_2023
df_2023.to_csv('../data/raw/df_2023.csv', index=False)

for metric in metrics:
    df_2023_z[metric] = np.round(zscore(df_2023[metric]), decimals=5)

df_2023_z.to_csv('../data/processed/df_2023_z.csv', index=False)

In [86]:
# 2023-24
df_2024 = lista_dfs_reg[8]
df_2024_z = df_2024
df_2024.to_csv('../data/raw/df_2024.csv', index=False)

for metric in metrics:
    df_2024_z[metric] = np.round(zscore(df_2024[metric]), decimals=5)

df_2024_z.to_csv('../data/processed/df_2024_z.csv', index=False)

In [88]:
# 2024-25
df_2025 = lista_dfs_reg[9]
df_2025_z = df_2025
df_2025.to_csv('../data/raw/df_2025.csv', index=False)

for metric in metrics:
    df_2025_z[metric] = np.round(zscore(df_2025[metric]), decimals=5)
    df_2025_z.to_csv('../data/processed/df_2025_z.csv', index=False)

for i in range(len(df_last_10_champions)):
    data = df_2025_z[df_2025_z['TEAM_NAME'] == df_last_10_champions.iloc[i]["TEAM_NAME"]].iloc[0]
    df_last_10_champions.iloc[i] = data[columns]

df_last_10_champions.to_csv('../data/processed/df_last_10_champions.csv', index = False)
df_last_10_champions

Unnamed: 0,TEAM_NAME,RIM_PCT,MID_PCT,SUM_USAGE,EFG_PCT,FTA_RATE,OREB_PCT,TM_TOV_PCT,OFF_RATING,DEF_RATING,NET_RATING,AST_PCT,PACE,PCT_FGA_3PT,BENCH_PTS_PCT
2016_CLE,Cleveland Cavaliers,-0.01655,-0.80638,0.28502,1.77448,-0.13093,0.12737,-1.0692,1.96553,-0.6253,1.57176,-0.15645,0.38066,1.00583,0.27443
2017_GSW,Golden State Warriors,-1.00934,-0.51936,0.15989,-0.35693,0.06879,0.82919,-0.16053,0.13963,-0.89092,0.5467,2.1351,-0.10928,1.34298,1.4303
2018_GSW,Golden State Warriors,-1.00934,-0.51936,0.15989,-0.35693,0.06879,0.82919,-0.16053,0.13963,-0.89092,0.5467,2.1351,-0.10928,1.34298,1.4303
2019_TOR,Toronto Raptors,1.56572,-0.47836,-0.13208,-1.06739,-0.73009,0.71222,0.839,-1.09554,-0.02767,-0.70046,1.33444,0.54224,-1.35422,0.33527
2020_LAL,Los Angeles Lakers,-0.10962,0.13667,1.4529,0.70878,1.86627,-0.7694,-0.06966,0.35444,0.03874,0.20501,-0.04602,-0.64614,0.10676,-1.65201
2021_MIL,Milwaukee Bucks,-0.0786,2.06377,1.36948,1.267,1.66655,-2.25103,-0.88747,0.38129,-0.32649,0.41002,-0.81907,0.17739,0.07867,0.01082
2022_GSW,Golden State Warriors,-1.00934,-0.51936,0.15989,-0.35693,0.06879,0.82919,-0.16053,0.13963,-0.89092,0.5467,2.1351,-0.10928,1.34298,1.4303
2023_BOS,Boston Celtics,-1.53676,-0.43736,0.70212,0.91177,-2.06155,-0.06758,-1.887,1.56276,-1.18974,1.60593,-0.23928,-1.55827,3.2254,-1.02338
2024_DEN,Denver Nuggets,1.7829,0.46469,-0.61175,1.52074,1.06739,0.71222,-0.2514,1.40165,0.47036,0.6492,1.27922,0.5683,-1.83185,-1.91562
2025_OKC,Oklahoma City Thunder,-0.51294,0.46469,1.18178,0.86102,-1.52896,-0.45748,-2.43221,1.4822,-2.3518,2.16971,-0.95711,0.68818,-0.06181,-0.43531
