In [2]:
import pandas as pd
from bs4 import BeautifulSoup
import json
import time
from io import StringIO
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


def buildrank(type):

    field = rankings[type]['field']
    label = rankings[type]['label']

    driver.find_element(By.XPATH, f"//div[@class='Crom_container__C45Ti crom-container']/table/thead/tr/th[@field='{field}']").click()

    element = driver.find_element(By.XPATH, "//div[@class='Crom_container__C45Ti crom-container']/table")
    html_content = element.get_attribute('outerHTML')

    # Parse HTML (Parsear o conteúdo HTML) - BeaultifulSoup
    soup = BeautifulSoup(html_content, 'html.parser')
    table = soup.find(name='table')


    # Data Structure Conversion (Estruturar conteúdo em um Data Frame) - Pandas
    html_string = str(table)
    df_full = pd.read_html(StringIO(html_string))[0].head(10)

    df = df_full[['Unnamed: 0', 'Player', 'Team', label]]
    df.columns = ['pos', 'player', 'team', 'total']

    # Convert to Dict (Transformar os Dados em um Dicionário de dados próprio)
    return df.to_dict('records')

# Grab content from URL (Pegar conteúdo HTML a partir da URL)
url = "https://stats.nba.com/players/traditional/?PerMode=Totals&Season=2019-20&SeasonType=Regular%20Season&sort=PLAYER_NAME&dir=-1"
top10ranking = {}

rankings = {
    '3points': {'field': 'FG3M', 'label': '3PM'},
    'points': {'field': 'PTS', 'label': 'PTS'},
    'assistants': {'field': 'AST', 'label': 'AST'},
    'rebounds': {'field': 'REB', 'label': 'REB'},
    'steals': {'field': 'STL', 'label': 'STL'},
    'blocks': {'field': 'BLK', 'label': 'BLK'},
}

try:
    options = Options()
    options.add_argument("--headless")
    options.add_argument("--disable-infobars")
    options.add_argument("--disable-notifications")
    
    driver = webdriver.Chrome(options=options)

    driver.get(url)
    driver.implicitly_wait(10)

    try:
        element_cookies = (By.XPATH, "//*[@id='onetrust-accept-btn-handler']")
        WebDriverWait(driver, 15).until(EC.element_to_be_clickable(element_cookies))
        driver.find_element(*element_cookies).click()
        print("Cookies aceitos.")
    except Exception as e:
        print("Erro ao aceitar cookies")

    time.sleep(1)

    for k in rankings:
        top10ranking[k] = buildrank(k)

    driver.quit()

    # Dump and Save to JSON file (Converter e salvar em um arquivo JSON)
    with open('ranking.json', 'w', encoding='utf-8') as jp:
        js = json.dumps(top10ranking, indent=4)
        jp.write(js)

    print('Sucesso!')
except:
    driver.quit()
    print('Erro!')

Erro ao aceitar cookies
Erro!


In [38]:
import json
import pandas as pd
import plotly.express as px

# Carregando o JSON de um arquivo
with open('ranking.json', 'r') as file:
    data = json.load(file)

# Criando DataFrames
df_3points = pd.DataFrame(data['3points'])
df_points = pd.DataFrame(data['points'])
df_assistants = pd.DataFrame(data['assistants'])
df_rebounds = pd.DataFrame(data['rebounds'])
df_steals = pd.DataFrame(data['steals'])
df_blocks = pd.DataFrame(data['blocks'])

# Função para plotar gráficos interativos
def plot_interactive_graph(df, title, y_label, x_col='player', y_col='total', top_n=10):
    fig = px.bar(df.sort_values(by=y_col, ascending=False).head(top_n), 
        x=x_col, 
        y=y_col, 
        title=title, 
        labels={x_col: 'Jogador', y_col: y_label},
        text=y_col)
    
    fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
    fig.update_layout(yaxis_title=y_label, xaxis_title='Jogador', 
        xaxis_tickangle=-45)
    
    fig.show()

# Gerando gráficos interativos
plot_interactive_graph(df_points, 'Top 10 jogadores por pontos', 'Total de Pontos')
plot_interactive_graph(df_3points, 'Top 10 jogadores por 3 pontos', 'Total de 3 Pontos')
plot_interactive_graph(df_assistants, 'Top 10 jogadores por assistências', 'Total de Assistências')
plot_interactive_graph(df_rebounds, 'Top 10 jogadores por rebotes', 'Total de Rebotes')
plot_interactive_graph(df_steals, 'Top 10 jogadores por roubos de bola', 'Total de Roubos')
plot_interactive_graph(df_blocks, 'Top 10 jogadores por tocos', 'Total de Tocos')
