#

## Scraping.ipynb

---

Notebook tem como objetivo extrair dados de partidas do site https://football.esportsbattle.com/en/ e salvar em um arquivo .csv

In [52]:
import pandas as pd
from datetime import datetime, timedelta
import requests


##### gerar_url()

Função que recebe uma data de referência e gera uma lista de URLs que busca os torneios

In [53]:
def gerar_url(data_inicial, data_final = datetime.now()):

    datas = [
        [(data_inicial + timedelta(days=n)).year,
        (data_inicial + timedelta(days=n)).month,
        (data_inicial + timedelta(days=n)).day]
        for n in range((data_final - data_inicial).days + 1)
    ]   

    datas_url = []
    pag = 1 
    for i in range(len(datas)):
        ano = datas[i][0]
        mes = datas[i][1]
        dia = datas[i][2]
        if mes in [1,3,5,7,8,10,12] and dia == 31:
            datas_url.append(f"https://football.esportsbattle.com/api/tournaments?page={pag}&dateFrom={ano}%2F{mes}%2F{dia}+11%3A00&dateTo={ano}%2F{mes}%2F{1}+02%3A59")
        elif mes == 2 and dia == 28:
            datas_url.append(f"https://football.esportsbattle.com/api/tournaments?page={pag}&dateFrom={ano}%2F{mes}%2F{dia}+11%3A00&dateTo={ano}%2F{mes}%2F{1}+02%3A59")
        else:
            datas_url.append(f"https://football.esportsbattle.com/api/tournaments?page={pag}&dateFrom={ano}%2F{mes}%2F{dia}+11%3A00&dateTo={ano}%2F{mes}%2F{dia+1}+02%3A59")


    return datas_url

In [54]:
def pega_torneios(datas_url):
    lista_torneios = []  # Lista de todos os torneios disponíveis por página

    for url in datas_url:
        try:
            response = requests.get(url)
            response.raise_for_status()  # Lança erro se status != 200
            json_torneios = response.json()

            paginas = json_torneios.get('totalPages', 0)
            torneios = json_torneios.get('tournaments', [])

            for pag in range(paginas):
                for torneio in torneios:
                    lista_torneios.append([pag + 1, torneio])

        except Exception as e:
            print(f"[AVISO] Erro ao processar URL: {url}")
            print(f"Motivo: {e}\n")
            continue

    return lista_torneios


##### Função que pega todas as partidas por torneio

In [55]:
def pega_partidas(lista_torneios):
    
    lista_partidas = []
    for i in range(len(lista_torneios)):
        # pag = lista_torneios[i][0]
        id_torneio = lista_torneios[i][1]['id']

        lista_partidas.append(f'https://football.esportsbattle.com/api/tournaments/{id_torneio}/matches')

    return lista_partidas

In [56]:
def pega_resultados(lista_partidas):
    dados = []
    for url in lista_partidas:
        response = requests.get(url)
        if response.status_code != 200:
            continue  # ignora se não conseguir pegar
        json_jogos = response.json()

        # Achata o JSON em colunas
        df_json = pd.json_normalize(json_jogos)
        dados.append(df_json)

    # Concatena todos os DataFrames em um só
    if dados:
        df_partidas = pd.concat(dados, ignore_index=True)
    else:
        df_partidas = pd.DataFrame()

    return df_partidas

In [57]:
# urls = gerar_url(datetime(2025,1,1), datetime(2025,1,1))
# torneios = pega_torneios(urls)
# partidas = pega_partidas(torneios)
# jogos = pega_resultados(partidas)

In [59]:
if __name__ == "__main__":
    urls = gerar_url(datetime(2025,1,1), datetime(2025,1,1))
    torneios = pega_torneios(urls)
    partidas = pega_partidas(torneios)
    jogos = pega_resultados(partidas)


In [60]:
jogos

Unnamed: 0,id,date,status_id,tournament.id,tournament.token,tournament.token_international,tournament.status_id,console.id,console.token,console.token_international,...,participant1.prevPeriodsScores,participant2.id,participant2.nickname,participant2.score,participant2.photo,participant2.team.logo,participant2.team.id,participant2.team.token,participant2.team.token_international,participant2.prevPeriodsScores
0,1404256,2025-01-01T11:50:00Z,3,181773,Лига чемпионов А 2х6 2025-01-01,Champions League A 2x6 2025-01-01,4,136,Anfield-1,Anfield-1,...,[1],603265,Kinshiki,3,L2dhbWVycy81LzU1LzU1OS8xNzA1NTk5NDkyLXBob3RvLm...,L3RlYW1zLzYvNi82L2xvZ28ucG5n,6,Манчестер Сити,Manchester City,[2]
1,1404257,2025-01-01T11:50:00Z,3,181773,Лига чемпионов А 2х6 2025-01-01,Champions League A 2x6 2025-01-01,4,137,Anfield-2,Anfield-2,...,[1],603264,Decade,4,L2dhbWVycy81LzU2LzU2Ni8xNzA5NTcwMTcxLXBob3RvLm...,L3RlYW1zLzIvMi8yLzE2OTE1ODQ4NjMtbG9nby5wbmc=,2,Ливерпуль,Liverpool,[2]
2,1404258,2025-01-01T12:08:00Z,3,181773,Лига чемпионов А 2х6 2025-01-01,Champions League A 2x6 2025-01-01,4,136,Anfield-1,Anfield-1,...,[2],603262,flamez,3,L2dhbWVycy81LzUzLzUzOS8xNzAwNTk0MDE4LXBob3RvLm...,L3RlYW1zLzIvMjEvMjEvbG9nby5wbmc=,21,Пари Сен Жермен,Paris Saint-Germain F.C.,[1]
3,1404259,2025-01-01T12:08:00Z,3,181773,Лига чемпионов А 2х6 2025-01-01,Champions League A 2x6 2025-01-01,4,137,Anfield-2,Anfield-2,...,[4],603266,SpeciAL,6,L2dhbWVycy81LzU0LzU0OC8xNzAwNTkzODg0LXBob3RvLm...,L3RlYW1zLzkvOS85L2xvZ28ucG5n,9,Бавария,Bayern Munich,[3]
4,1404260,2025-01-01T12:26:00Z,3,181773,Лига чемпионов А 2х6 2025-01-01,Champions League A 2x6 2025-01-01,4,136,Anfield-1,Anfield-1,...,[1],603264,Decade,3,L2dhbWVycy81LzU2LzU2Ni8xNzA5NTcwMTcxLXBob3RvLm...,L3RlYW1zLzIvMi8yLzE2OTE1ODQ4NjMtbG9nby5wbmc=,2,Ливерпуль,Liverpool,[1]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755,1404571,2025-01-01T21:33:00Z,3,181764,Вольта Лига Чемпионов А 2025-01-01,Volta Champions League A 2025-01-01,4,140,Hillsborough-2,Hillsborough-2,...,[1],603342,fantazer,3,L2dhbWVycy80LzQ2LzQ2Ny8xNzA4NTEzNTAxLXBob3RvLm...,L3RlYW1zLzIvMjEvMjEvbG9nby5wbmc=,21,Пари Сен Жермен,Paris Saint-Germain F.C.,[2]
756,1404572,2025-01-01T21:42:00Z,3,181764,Вольта Лига Чемпионов А 2025-01-01,Volta Champions League A 2025-01-01,4,139,Hillsborough-1,Hillsborough-1,...,[1],603343,Smetana,3,L2dhbWVycy82LzY2LzY2MS8xNzM3NjQ0NjUzLXBob3RvLm...,L3RlYW1zLzkvOS85L2xvZ28ucG5n,9,Бавария,Bayern Munich,[1]
757,1404573,2025-01-01T21:42:00Z,3,181764,Вольта Лига Чемпионов А 2025-01-01,Volta Champions League A 2025-01-01,4,140,Hillsborough-2,Hillsborough-2,...,[2],603344,Profik,3,L2dhbWVycy82LzYwLzYwMy8xNzIwMTA4OTc4LXBob3RvLm...,L3RlYW1zLzEvMTkvMTkvbG9nby5wbmc=,19,Барселона,Barcelona,[2]
758,1404574,2025-01-01T21:51:00Z,3,181764,Вольта Лига Чемпионов А 2025-01-01,Volta Champions League A 2025-01-01,4,139,Hillsborough-1,Hillsborough-1,...,[0],603345,Gala,4,L2dhbWVycy80LzQ2LzQ2OS8xNzA4NTEzNDQ4LXBob3RvLm...,L3RlYW1zLzIvMjAvMjAvbG9nby5wbmc=,20,Реал Мадрид,Real Madrid,[2]


In [None]:
jogos.to_csv("../../data/raw/base_partidas_raw.csv", index=False, encoding='utf-8')