## Importando as bibliotecas

In [26]:
import pandas as pd
import numpy as np
import requests
import sys
import os
import time
from scipy.stats import zscore

In [2]:
import sys
import os

# 1. Pega o caminho absoluto da pasta onde este notebook está
notebook_dir = os.getcwd()

# 2. Pega a pasta "pai" (ou seja, a raiz do seu projeto)
root_dir = os.path.dirname(notebook_dir)

# 3. Adiciona a raiz do projeto ao "path" do sistema para o Python enxergar a pasta 'src'
if root_dir not in sys.path:
    sys.path.append(root_dir)

# --- Comandos Mágicos para facilitar a vida ---
# Isso faz com que, se você alterar algo no arquivo .py, o notebook atualize 
# automaticamente sem você precisar reiniciar o kernel.
%load_ext autoreload
%autoreload 2

## Importando as funções do nba_scraper.py

In [3]:
from src.nba_scraper import find_ID_by_name
from src.nba_scraper import find_abb_by_name
from src.nba_scraper import getting_ID_row_by_name
from src.nba_scraper import get_four_factors
from src.nba_scraper import get_advanced_stats
from src.nba_scraper import get_scoring_stats
from src.nba_scraper import get_bench_point_percent
from src.nba_scraper import get_shot_locations_pct
from src.nba_scraper import get_usage_stars
from src.nba_scraper import get_all_teams_df

-------------------------------------------------------------------------------------------------------------------------------------------------------------

# Fazendo o Dataframe inicial dos ultimos 10 campeões
### Primeiramente colocarei, além da temporada, o 'full_name' do time, a temporada e o ID de cada time.

In [4]:
seasons = ["2015-16","2016-17","2017-18","2018-19","2019-20","2020-21",
           "2021-22","2022-23","2023-24","2024-25"]

teams_Series = ["CLEVELAND CAVALIERS", "GOLDEN STATE WARRIORS",
                      "GOLDEN STATE WARRIORS", "TORONTO RAPTORS","LOS ANGELES LAKERS",
                      "MILWAUKEE BUCKS", "GOLDEN STATE WARRIORS", "BOSTON CELTICS",
                        "DENVER NUGGETS", "OKLAHOMA CITY THUNDER"]

indexs = ["2016_CLE", "2017_GSW", "2018_GSW", "2019_TOR", "2020_LAL", "2021_MIL", "2022_GSW", "2023_BOS", "2024_DEN", "2025_OKC"]

df_last_10_champions = pd.DataFrame(index=indexs)
df_last_10_champions["TEAMS"] = teams_Series
df_last_10_champions["SEASON"] = seasons
df_last_10_champions

Unnamed: 0,TEAMS,SEASON
2016_CLE,CLEVELAND CAVALIERS,2015-16
2017_GSW,GOLDEN STATE WARRIORS,2016-17
2018_GSW,GOLDEN STATE WARRIORS,2017-18
2019_TOR,TORONTO RAPTORS,2018-19
2020_LAL,LOS ANGELES LAKERS,2019-20
2021_MIL,MILWAUKEE BUCKS,2020-21
2022_GSW,GOLDEN STATE WARRIORS,2021-22
2023_BOS,BOSTON CELTICS,2022-23
2024_DEN,DENVER NUGGETS,2023-24
2025_OKC,OKLAHOMA CITY THUNDER,2024-25


----------------------------------------------------------------------------------------------------------------------

# Será feito os datasets das métricas da liga em cada temporada

----------------------------------------------------------------------------------------------------------------------

## Inicializando os DataFrames de cada season:

In [5]:
def generate_year_regular_df(season):
    df_four_factors = get_four_factors(season)
    df_advanced_stats = get_advanced_stats(season)
    df_scoring_stats = get_scoring_stats(season)
    df_bench_pct = get_bench_point_percent(season)
    df_shot_locations = get_shot_locations_pct(season)
    df_usage = get_usage_stars(season)


    df_1 = pd.merge(df_four_factors, df_advanced_stats, on = "TEAM_ID")
    df_2 = pd.merge(df_scoring_stats, df_bench_pct, on="TEAM_ID")
    df_3 = pd.merge(df_shot_locations, df_usage, on= "TEAM_ID")
    df_4 = pd.merge(df_1, df_2, on="TEAM_ID")
    df_year = pd.merge(df_3, df_4, on="TEAM_ID")
    return df_year

In [6]:
df_champs_reg = pd.DataFrame()
df_champs_playoffs = pd.DataFrame()
lista_dfs_reg = []

In [7]:
for season in seasons:  
    try:
        df = generate_year_regular_df(season)
        
        if not df.empty:
            lista_dfs_reg.append(df)
            print(f" Sucesso: {season}")
        else:
            print(f" Aviso: Dados vazios retornados para {season}")

    except Exception as e:
        print(f" Erro crítico na temporada {season}: {e}")
        continue 
    
    time.sleep(4)

 Sucesso: 2015-16
 Sucesso: 2016-17
 Sucesso: 2017-18
 Sucesso: 2018-19
 Sucesso: 2019-20
 Sucesso: 2020-21
 Sucesso: 2021-22
 Sucesso: 2022-23
 Sucesso: 2023-24
 Sucesso: 2024-25


In [None]:
metrics = ['RIM_PCT', 'MID_PCT', 'SUM_USAGE', 'EFG_PCT', 'FTA_RATE', 'OREB_PCT', 'TM_TOV_PCT', 'OFF_RATING', 
           'DEF_RATING', 'NET_RATING', 'AST_PCT', 'PACE', 'PCT_FGA_3PT', 'BENCH_PTS_PCT']

In [46]:
df_2016 = lista_dfs_reg[0]
df_2016_z = df_2016
df_2016.to_csv('../data/raw/df_2016.csv', index=False)

for metric in metrics:
    df_2016_z[metric] = np.round(zscore(df_2016[metric]), decimals=5)

df_2016_z.to_csv('../data/processed/df_2016_z.csv', index=False)

In [47]:
df_2017 = lista_dfs_reg[1]
df_2017_z = df_2017
df_2017.to_csv('../data/raw/df_2017.csv', index=False)

for metric in metrics:
    df_2017_z[metric] = np.round(zscore(df_2017[metric]), decimals=5)
    df_2017_z.to_csv('../data/processed/df_2017_z.csv', index=False)

In [48]:
df_2018 = lista_dfs_reg[2]
df_2018_z = df_2018
df_2018.to_csv('../data/raw/df_2018.csv', index=False)

for metric in metrics:
    df_2018_z[metric] = np.round(zscore(df_2018[metric]), decimals=5)
    df_2018_z.to_csv('../data/processed/df_2018_z.csv', index=False)

In [49]:
df_2019 = lista_dfs_reg[3]
df_2019_z = df_2019
df_2019.to_csv('../data/raw/df_2019.csv', index=False)

for metric in metrics:
    df_2019_z[metric] = np.round(zscore(df_2019[metric]), decimals=5)
    df_2019_z.to_csv('../data/processed/df_2019_z.csv', index=False)

In [50]:
df_2020 = lista_dfs_reg[4]
df_2020_z = df_2020
df_2020.to_csv('../data/raw/df_2020.csv', index=False)

for metric in metrics:
    df_2020_z[metric] = np.round(zscore(df_2020[metric]), decimals=5)
    df_2020_z.to_csv('../data/processed/df_2020_z.csv', index=False)

In [51]:
df_2021 = lista_dfs_reg[5]
df_2021_z = df_2021
df_2021.to_csv('../data/raw/df_2021.csv', index=False)

for metric in metrics:
    df_2021_z[metric] = np.round(zscore(df_2021[metric]), decimals=5)
    df_2021_z.to_csv('../data/processed/df_2021_z.csv', index=False)

In [52]:
df_2022 = lista_dfs_reg[6]
df_2022_z = df_2022
df_2022.to_csv('../data/raw/df_2022.csv', index=False)

for metric in metrics:
    df_2022_z[metric] = np.round(zscore(df_2022[metric]), decimals=5)
    df_2022_z.to_csv('../data/processed/df_2022_z.csv', index=False)

In [53]:
df_2023 = lista_dfs_reg[7]
df_2023_z = df_2023
df_2023.to_csv('../data/raw/df_2023.csv', index=False)

for metric in metrics:
    df_2023_z[metric] = np.round(zscore(df_2023[metric]), decimals=5)
    df_2023_z.to_csv('../data/processed/df_2023_z.csv', index=False)

In [54]:
df_2024 = lista_dfs_reg[8]
df_2024_z = df_2024
df_2024.to_csv('../data/raw/df_2024.csv', index=False)

for metric in metrics:
    df_2024_z[metric] = np.round(zscore(df_2024[metric]), decimals=5)
    df_2024_z.to_csv('../data/processed/df_2024_z.csv', index=False)

In [55]:
df_2025 = lista_dfs_reg[9]
df_2025_z = df_2025
df_2025.to_csv('../data/raw/df_2025.csv', index=False)

for metric in metrics:
    df_2025_z[metric] = np.round(zscore(df_2025[metric]), decimals=5)
    df_2025_z.to_csv('../data/processed/df_2025_z.csv', index=False)