In [None]:
import logging
import pickle
import tomllib
from pathlib import Path

# import pendulum
import pandas as pd
from selectolax.parser import HTMLParser
from splinter import Browser
from splinter.driver.webdriver.chrome import WebDriver
from webdriver_manager.chrome import ChromeDriverManager

from classes import Gamer
from utils import browser_action_with_element_presence_check

## Initialize

### Set Up Logging

In [None]:
logging.basicConfig(
    format="{levelname:8s} - {module} - {funcName}: {message}", style="{"
)

logger_main = logging.getLogger(__name__)
# logger_main.setLevel(logging.DEBUG)

### Create Input Data

In [None]:
with open(Path().resolve().parents[0] / "secrets.toml", "rb") as f:
    secrets = tomllib.load(f)

gamers: list[Gamer] = [
    Gamer(name, gamer_tag) for name, gamer_tag in secrets["gamer_ids"].items()
]

#### Create Browser Instance

In [None]:
# Set up Splinter (prep the automated browser).
executable_path = {"executable_path": ChromeDriverManager().install()}
browser: WebDriver = Browser("chrome", **executable_path, headless=False)

## Extract

### Main Scraping Function

In [None]:
# def scrape_game_history():
#     # 3 loops.
#     for gamer in gamers:
#         visit(url)

#         game_history = retrieve_game_history(browser)
#         if not game_history:
#             print("No game history")

#         # m games per n pages up until the crawler runs into an already seen game.
#         for game in game_history:
#             if is_cpu_game():
#                 continue
#             else:
#                 collect()

# def visit(url):
#     browser_action(browser.visit, url=url)

# def retrieve_game_history(browser):
#     parsed_html = BeautifulSoup(browser.html, "lxml")
#     return parsed_html.find_all("div", attrs={"class": "mlb22-games-box"})

### Retrieve Individual Game Nodes

#### Visit Game History URL

In [None]:
is_game_history, _ = browser_action_with_element_presence_check(
    browser.visit,
    url=gamers[0].url,
    css_selector="div.mlb22-games-box",
    browser=browser,
)
is_game_history

#### Retrieve Individual Game Nodes

In [None]:
if not is_game_history:
    game_nodes = []
else:
    game_history_parser = HTMLParser(browser.html)
    game_nodes = game_history_parser.css("div.mlb22-games-box")

### Individual Game Processing

#### Create Parser for Individual Game on Game History Page

In [None]:
game_parser = HTMLParser(game_nodes[0].html)

#### Game Date

In [None]:
date_p_tag = game_parser.css_first("p:nth-child(2)")
game_date = date_p_tag.text()
game_date

#### View Game href

In [None]:
view_game_a_tag = game_parser.tags("a")
view_game_href = view_game_a_tag[-1].attributes["href"]
view_game_href

#### Click the View Game Link

In [None]:
is_game_stats, _ = browser_action_with_element_presence_check(
    browser.links.find_by_href(view_game_href).click,
    css_selector="div.section-block",
    browser=browser,
)
is_game_stats

#### Create Parser for Individual Game Stats Page and Retrieve Stats Tables

In [None]:
game_stats_parser = HTMLParser(browser.html)
# Retrieve game stats DataFrames
game_boxscores = pd.read_html(game_stats_parser.html)
len(game_boxscores)

In [None]:
boxscore_names = [
    "linescore",
    "away_hitting",
    "away_pitching",
    "home_hitting",
    "home_pitching",
]

### Write to CSV and Pickle Formats

In [None]:
base_path = Path("../data").resolve()

for name, boxscore in zip(boxscore_names, game_boxscores):
    path = base_path / f"{name}.csv"
    boxscore.to_csv(path, index=False)

extract_vars = (game_stats_parser.html, boxscore_names, gamers)
with open(base_path / "extract_vars.pickle", "wb") as f:
    pickle.dump(extract_vars, f)

### Read from CSV

In [None]:
game_boxscores = [pd.read_csv(f"{name}.csv") for name in boxscore_names]