# STATIZ 크롤링 봇

본 프로그램은 STATIZ에서 연도별, 월별 경기 기록을 크롤링하기 위한 크롤링 봇이다. 

## 1. 기본적인 로직

크롤링 봇을 튜닝해야 할 일이 생긴다면 이 섹션의 코드들을 참조하여 적절히 수정할 수 있다.

### 필요한 라이브러리 설치

크롤링을 위해 `requests`, `beautifulsoup4` 라이브러리를 설치한다.

In [None]:
%pip install requests beautifulsoup4

### 데이터프레임 정의

- `individual_bat_datum_df`: 경기별로 선수 개인의 타격 기록을 저장한다.
- `individual_pitch_datum_df`: 경기별로 선수 개인의 투구 기록을 저장한다.
- `team_aggregated_bat_datum_df`: 경기별로 팀 합계 타격 기록을 저장한다.
- `team_aggregated_pitch_datum_df`: 경기별로 팀 합계 투구 기록을 저장한다.

In [88]:
import pandas as pd

# 1. define multi-column structure
individual_bat_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('bat', 'player_name'), ('bat', 'position'), ('bat', 'PA'), ('bat', 'AB'), ('bat', 'R'), ('bat', 'H'), ('bat', 'HR'), ('bat', 'RBI'), ('bat', 'BB'), ('bat', 'HBP'), ('bat', 'SO'), ('bat', 'GO'), ('bat', 'FO'), ('bat', 'NP'), ('bat', 'GDP'), ('bat', 'LOB'), ('bat', 'AVG'), ('bat', 'OPS'), ('bat', 'LI'), ('bat', 'WPA'), ('bat', 'RE24'),
])

individual_pitch_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('pitch', 'player_name'), ('pitch', 'IP'), ('pitch', 'TBF'), ('pitch', 'H'), ('pitch', 'R'), ('pitch', 'ER'), ('pitch', 'BB'), ('pitch', 'HBP'), ('pitch', 'K'), ('pitch', 'HR'), ('pitch', 'GO-FO'), ('pitch', 'NP-S'), ('pitch', 'IR-IS'), ('pitch', 'GSC'), ('pitch', 'ERA'), ('pitch', 'WHIP'), ('pitch', 'LI'), ('pitch', 'WPA'), ('pitch', 'RE24'),
])


team_bat_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('bat', 'PA'), ('bat', 'AB'), ('bat', 'R'), ('bat', 'H'), ('bat', 'HR'), ('bat', 'RBI'), ('bat', 'BB'), ('bat', 'HBP'), ('bat', 'SO'), ('bat', 'GO'), ('bat', 'FO'), ('bat', 'NP'), ('bat', 'GDP'), ('bat', 'LOB'), ('bat', 'AVG'), ('bat', 'OPS'), ('bat', 'LI'), ('bat', 'WPA'), ('bat', 'RE24'),
])

team_pitch_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('pitch', 'IP'), ('pitch', 'TBF'), ('pitch', 'H'), ('pitch', 'R'), ('pitch', 'ER'), ('pitch', 'BB'), ('pitch', 'HBP'), ('pitch', 'K'), ('pitch', 'HR'), ('pitch', 'GO-FO'), ('pitch', 'NP-S'), ('pitch', 'IR-IS'), ('pitch', 'GSC'), ('pitch', 'ERA'), ('pitch', 'WHIP'), ('pitch', 'LI'), ('pitch', 'WPA'), ('pitch', 'RE24'),
])

# 2. create dataframe to store individual datum(individual_datum_df) and team datum(team_aggregated_datum_df)
individual_bat_datum_df = pd.DataFrame(columns=individual_bat_columns)
individual_pitch_datum_df = pd.DataFrame(columns=individual_pitch_columns)

team_aggregated_bat_datum_df = pd.DataFrame(columns=team_bat_columns)
team_aggregated_pitch_datum_df = pd.DataFrame(columns=team_pitch_columns)

In [89]:
individual_bat_datum_df

Unnamed: 0_level_0,game,game,game,game,game,bat,bat,bat,bat,bat,bat,bat,bat,bat,bat,bat,bat,bat,bat,bat,bat
Unnamed: 0_level_1,game_id,date,team_type,team_name,score,player_name,position,PA,AB,R,...,GO,FO,NP,GDP,LOB,AVG,OPS,LI,WPA,RE24


### 크롤링 준비

페이지 데이터를 크롤링한다.

In [90]:
import requests
from bs4 import BeautifulSoup
import re

# 1. define URL and request header
year = "2024"
game_number = "1464"
game_id = year + game_number
url = f"https://statiz.sporki.com/schedule/?m=boxscore&s_no={year}{game_number}"

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

# 2. get HTML data
tolerance_count = 5 # try to get HTML data at most 5 times
while tolerance_count > 0:
    response = requests.get(url, headers=headers)
    if (response.status_code == 200):
        break
    else:
        tolerance_count -= 1

# 3. parse HTML data
soup = BeautifulSoup(response.text, "html.parser")

### 데이터 추출

크롤링해온 페이지 데이터(`soup`)의 HTML 태그를 기반으로 경기 데이터를 추출한다.

In [91]:
# 4. extract game information

# 4-1. find scores and date
div_schedule = soup.find("div", class_="game_schedule result")
div_score = div_schedule.find("div", class_="score")
div_spans = div_score.find_all("span")
div_txt = div_score.find("div", class_="txt")

if len(div_spans) == 0:
    print("취소된 경기입니다. 다음 경기를 크롤링합니다.")

game_date = div_txt.text.strip().split(", ")[1]
game_date = f"{year}-{game_date}"

away_score = div_spans[0].text.strip()              # score information of the away team
home_score = div_spans[1].text.strip()              # score information of the home team


In [97]:
# 4-2. find away/home teams' name and bat/pitch information
div_tables = soup.find_all("div", class_="box_type_boared")

div_away_bat = div_tables[0]
div_home_bat = div_tables[1]
div_away_pitch = div_tables[2]
div_home_pitch = div_tables[3]

# function for extracting team name from the div bat
def extractTeamName(div_team_bat):
    div_team_name = div_team_bat.find("div", class_="box_head")
    team_name = div_team_name.text.strip()
    team_name = re.search(r"\((.*?)\)", team_name).group(1)
    return team_name

# extract teams' name
away_team_name = extractTeamName(div_away_bat)
home_team_name = extractTeamName(div_home_bat)

# function for extracting bat information from the div bat
def extractBatDatum(div_bat, game_id, game_date, team_type, team_name, score):
    div_bat_rows = div_bat.find_all("tr")
    
    individual_bat_rows = []
    for row in range(1, len(div_bat_rows) - 1):
        div_columns = div_bat_rows[row].find_all("td")
        bat_row = [game_id, game_date, team_type, team_name, score]
        for col in range(1, len(div_columns)):
            bat_row.append(div_columns[col].text.strip())
        individual_bat_rows.append(bat_row)
    
    team_bat_row = [game_id, game_date, team_type, team_name, score]
    div_columns = div_bat_rows[len(div_bat_rows) - 1].find_all("td")
    for col in range(1, len(div_columns)):
        team_bat_row.append(div_columns[col].text.strip())
    
    return individual_bat_rows, team_bat_row

away_bat_rows, away_team_bat_row = extractBatDatum(div_away_bat, game_id, game_date, "away", away_team_name, away_score)
home_bat_rows, home_team_bat_row = extractBatDatum(div_home_bat, game_id, game_date, "home", home_team_name, home_score)

# function for extracting pitch information from the div pitch
def extractPitchDatum(div_pitch, game_id, game_date, team_type, team_name, score):
    div_pitch_rows = div_pitch.find_all("tr")
    
    individual_pitch_rows = []
    for row in range(1, len(div_pitch_rows) - 1):
        div_columns = div_pitch_rows[row].find_all("td")
        pitch_row = [game_id, game_date, team_type, team_name, score]
        for col in range(len(div_columns)):
            pitch_row.append(div_columns[col].text.strip())
        individual_pitch_rows.append(pitch_row)
        
    team_pitch_row = [game_id, game_date, team_type, team_name, score]
    div_columns = div_pitch_rows[len(div_pitch_rows) - 1].find_all("td")
    for col in range(1, len(div_columns)):
        team_pitch_row.append(div_columns[col].text.strip())
    
    return individual_pitch_rows, team_pitch_row

away_pitch_rows, away_team_pitch_row = extractPitchDatum(div_away_pitch, game_id, game_date, "away", away_team_name, away_score)
home_pitch_rows, home_team_pitch_row = extractPitchDatum(div_home_pitch, game_id, game_date, "home", home_team_name, home_score)

In [98]:
# 4-3. concatenate each teams individual information to the dataframe

def concatenateDataframe(df, columns, crawled_rows):
    new_data_df = pd.DataFrame(crawled_rows, columns=columns)
    return pd.concat([df, new_data_df], ignore_index=True)

individual_bat_datum_df = concatenateDataframe(individual_bat_datum_df, individual_bat_columns, away_bat_rows)
individual_bat_datum_df = concatenateDataframe(individual_bat_datum_df, individual_bat_columns, home_bat_rows)

individual_pitch_datum_df = concatenateDataframe(individual_pitch_datum_df, individual_pitch_columns, away_pitch_rows)
individual_pitch_datum_df = concatenateDataframe(individual_pitch_datum_df, individual_pitch_columns, home_pitch_rows)

team_aggregated_bat_datum_df = concatenateDataframe(team_aggregated_bat_datum_df, team_bat_columns, [away_team_bat_row, home_team_bat_row])
team_aggregated_pitch_datum_df = concatenateDataframe(team_aggregated_pitch_datum_df, team_pitch_columns, [away_team_pitch_row, home_team_pitch_row])

# 4-4. skip the codes for saving the dataframe content to the file

## 2. 크롤링 봇

이 섹션의 코드를 실행하여 시작 연도, 종료 연도의 경기 데이터들을 추출할 수 있다.

In [None]:
# 크롤링을 위해 필요한 라이브러리를 설치한다.
%pip install requests beautifulsoup4

In [34]:
"""
KBO 정규 시즌은 2014년부터 2024년까지 모두 10월달에 종료되었다.
연도별 KBO 정규 시즌 종료 날짜는 하드코딩한다.
key(year): value(end day)
"""
season_end_day = {
    2024: 1,
    2023: 17,
    2022: 11,
    2021: 31,
    2020: 31,
    2019: 1,
    2018: 14,
    2017: 3,
    2016: 9,
    2015: 6,
    2014: 17
}


# 연도별로 게임 ID를 수집하는 함수를 정의한다.
import requests
from bs4 import BeautifulSoup
import re

# 시작 연도부터 종료 연도까지 게임 ID들을 추출하는 함수
def crawlGameIds(start_year, end_year):
    game_ids = []
    for year in range(start_year, end_year + 1):
        game_ids += crawlAnnualGameIds(year)
    return game_ids

# 하나의 연도 게임 ID들을 추출하는 함수
def crawlAnnualGameIds(year):
    game_ids = []
    
    """
    모든 정규 시즌은 10월달에 종료되었다.
    9월까지만 전체 데이터를 수집하고, 10월달은 예외적으로 시즌 종료 날짜까지만 수집한다.
    """
    for month in range(3, 10 + 1):
        # 1. define URL and request header
        url = f"https://statiz.sporki.com/schedule/?year={year}&month={month}"

        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
        }

        # 2. get HTML data
        tolerance_count = 5 # try to get HTML data at most 5 times
        while tolerance_count > 0:
            response = requests.get(url, headers=headers)
            if (response.status_code == 200):
                break
            else:
                print("Failed to get html data")
                tolerance_count -= 1

        # 3. parse HTML data
        soup = BeautifulSoup(response.text, "html.parser")

        # 4. extract calendar area
        div_calendar = soup.find("div", class_="calendar_area")
        tbody_calendar = div_calendar.find("tbody")
        tr_weeks = tbody_calendar.find_all("tr")
        td_days = []
        game_links = []

        for tr in tr_weeks:
            tds = tr.find_all("td")
            for td in tds:
                span_day = td.find("span", class_="day")
                div_game_schedule_m = td.find("div", class_="game_schedule_m")
                # 10월달이 아닌 경우 데이터 그대로 저장
                if month != 10 and span_day is not None and div_game_schedule_m is not None:
                    td_days.append(td)
                # 10월달은 종료 날짜까지만 저장
                elif span_day is not None and int(span_day.text) <= season_end_day[year]:
                    td_days.append(td)

        for td in td_days:
            a_links = td.find_all("a")
            for a in a_links:
                if a.find("span", class_="weather rain") is None:   # 우천 취소 경기 제외
                    game_links.append(a.get("href"))
                
        for game_link in game_links:
            game_id = re.search(r"s_no=(\d+)", game_link).group(1)
            game_ids.append(game_id)
                
    return game_ids

# 수집한 게임 ID들을 바탕으로 데이터프레임을 확장하는 함수를 정의한다.
def crawlGameDatum(game_id, individual_bat_datum_df, individual_pitch_datum_df, team_aggregated_bat_datum_df, team_aggregated_pitch_datum_df):
    
    # function for extracting team name from the div bat
    def extractTeamName(div_team_bat):
        div_team_name = div_team_bat.find("div", class_="box_head")
        team_name = div_team_name.text.strip()
        team_name = re.search(r"\((.*?)\)", team_name).group(1)
        return team_name
    
    # function for extracting bat information from the div bat
    def extractBatDatum(div_bat, game_id, game_date, team_type, team_name, score):
        div_bat_rows = div_bat.find_all("tr")
        
        individual_bat_rows = []
        for row in range(1, len(div_bat_rows) - 1):
            div_columns = div_bat_rows[row].find_all("td")
            bat_row = [game_id, game_date, team_type, team_name, score]
            for col in range(1, len(div_columns)):
                bat_row.append(div_columns[col].text.strip())
            individual_bat_rows.append(bat_row)
        
        team_bat_row = [game_id, game_date, team_type, team_name, score]
        div_columns = div_bat_rows[len(div_bat_rows) - 1].find_all("td")
        for col in range(1, len(div_columns)):
            team_bat_row.append(div_columns[col].text.strip())
        
        return individual_bat_rows, team_bat_row
    
    # function for extracting pitch information from the div pitch
    def extractPitchDatum(div_pitch, game_id, game_date, team_type, team_name, score):
        div_pitch_rows = div_pitch.find_all("tr")
        
        individual_pitch_rows = []
        for row in range(1, len(div_pitch_rows) - 1):
            div_columns = div_pitch_rows[row].find_all("td")
            pitch_row = [game_id, game_date, team_type, team_name, score]
            for col in range(len(div_columns)):
                pitch_row.append(div_columns[col].text.strip())
            individual_pitch_rows.append(pitch_row)
            
        team_pitch_row = [game_id, game_date, team_type, team_name, score]
        div_columns = div_pitch_rows[len(div_pitch_rows) - 1].find_all("td")
        for col in range(1, len(div_columns)):
            team_pitch_row.append(div_columns[col].text.strip())
        
        return individual_pitch_rows, team_pitch_row

    # function for extending dataframes
    def concatenateDataframe(df, columns, crawled_rows):
        new_data_df = pd.DataFrame(crawled_rows, columns=columns)
        return pd.concat([df, new_data_df], ignore_index=True)
    
    # 함수 로직 시작
    
    # 1. HTML 데이터 받아오기
    url = f"https://statiz.sporki.com/schedule/?m=boxscore&s_no={game_id}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
    }
    
    tolerance_count = 5 # try to get HTML data at most 5 times
    while tolerance_count > 0:
        response = requests.get(url, headers=headers)
        if (response.status_code == 200):
            break
        else:
            tolerance_count -= 1
            
    soup = BeautifulSoup(response.text, "html.parser")
    
    # 2. 데이터 추출하기
    
    div_schedule = soup.find("div", class_="game_schedule result")
    div_score = div_schedule.find("div", class_="score")
    div_spans = div_score.find_all("span")
    div_txt = div_score.find("div", class_="txt")
    
    game_date = div_txt.text.strip().split(", ")[1]
    game_date = f"{game_id[:4]}-{game_date}"

    away_score = div_spans[0].text.strip()              # score information of the away team
    home_score = div_spans[1].text.strip()              # score information of the home team
    
    div_tables = soup.find_all("div", class_="box_type_boared")

    div_away_bat = div_tables[0]
    div_home_bat = div_tables[1]
    div_away_pitch = div_tables[2]
    div_home_pitch = div_tables[3]
    
    # extract teams' name
    away_team_name = extractTeamName(div_away_bat)
    home_team_name = extractTeamName(div_home_bat)
    
    # extract bat information
    away_bat_rows, away_team_bat_row = extractBatDatum(div_away_bat, game_id, game_date, "away", away_team_name, away_score)
    home_bat_rows, home_team_bat_row = extractBatDatum(div_home_bat, game_id, game_date, "home", home_team_name, home_score)
    
    # extract pitch information
    away_pitch_rows, away_team_pitch_row = extractPitchDatum(div_away_pitch, game_id, game_date, "away", away_team_name, away_score)
    home_pitch_rows, home_team_pitch_row = extractPitchDatum(div_home_pitch, game_id, game_date, "home", home_team_name, home_score)
    
    # 3. 데이터프레임 확장하기
    individual_bat_datum_df = concatenateDataframe(individual_bat_datum_df, individual_bat_columns, away_bat_rows)
    individual_bat_datum_df = concatenateDataframe(individual_bat_datum_df, individual_bat_columns, home_bat_rows)

    individual_pitch_datum_df = concatenateDataframe(individual_pitch_datum_df, individual_pitch_columns, away_pitch_rows)
    individual_pitch_datum_df = concatenateDataframe(individual_pitch_datum_df, individual_pitch_columns, home_pitch_rows)

    team_aggregated_bat_datum_df = concatenateDataframe(team_aggregated_bat_datum_df, team_bat_columns, [away_team_bat_row, home_team_bat_row])
    team_aggregated_pitch_datum_df = concatenateDataframe(team_aggregated_pitch_datum_df, team_pitch_columns, [away_team_pitch_row, home_team_pitch_row])
    
    return individual_bat_datum_df, individual_pitch_datum_df, team_aggregated_bat_datum_df, team_aggregated_pitch_datum_df

In [38]:
import pandas as pd
from datetime import datetime
import os

# define multi-column structure
individual_bat_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('bat', 'player_name'), ('bat', 'position'), ('bat', 'PA'), ('bat', 'AB'), ('bat', 'R'), ('bat', 'H'), ('bat', 'HR'), ('bat', 'RBI'), ('bat', 'BB'), ('bat', 'HBP'), ('bat', 'SO'), ('bat', 'GO'), ('bat', 'FO'), ('bat', 'NP'), ('bat', 'GDP'), ('bat', 'LOB'), ('bat', 'AVG'), ('bat', 'OPS'), ('bat', 'LI'), ('bat', 'WPA'), ('bat', 'RE24'),
])

individual_pitch_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('pitch', 'player_name'), ('pitch', 'IP'), ('pitch', 'TBF'), ('pitch', 'H'), ('pitch', 'R'), ('pitch', 'ER'), ('pitch', 'BB'), ('pitch', 'HBP'), ('pitch', 'K'), ('pitch', 'HR'), ('pitch', 'GO-FO'), ('pitch', 'NP-S'), ('pitch', 'IR-IS'), ('pitch', 'GSC'), ('pitch', 'ERA'), ('pitch', 'WHIP'), ('pitch', 'LI'), ('pitch', 'WPA'), ('pitch', 'RE24'),
])


team_bat_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('bat', 'PA'), ('bat', 'AB'), ('bat', 'R'), ('bat', 'H'), ('bat', 'HR'), ('bat', 'RBI'), ('bat', 'BB'), ('bat', 'HBP'), ('bat', 'SO'), ('bat', 'GO'), ('bat', 'FO'), ('bat', 'NP'), ('bat', 'GDP'), ('bat', 'LOB'), ('bat', 'AVG'), ('bat', 'OPS'), ('bat', 'LI'), ('bat', 'WPA'), ('bat', 'RE24'),
])

team_pitch_columns = pd.MultiIndex.from_tuples([
    ('game', 'game_id'),
    ('game', 'date'),
    ('game', 'team_type'), 
    ('game', 'team_name'), 
    ('game', 'score'),
    ('pitch', 'IP'), ('pitch', 'TBF'), ('pitch', 'H'), ('pitch', 'R'), ('pitch', 'ER'), ('pitch', 'BB'), ('pitch', 'HBP'), ('pitch', 'K'), ('pitch', 'HR'), ('pitch', 'GO-FO'), ('pitch', 'NP-S'), ('pitch', 'IR-IS'), ('pitch', 'GSC'), ('pitch', 'ERA'), ('pitch', 'WHIP'), ('pitch', 'LI'), ('pitch', 'WPA'), ('pitch', 'RE24'),
])

# create dataframe to store individual datum(individual_datum_df) and team datum(team_aggregated_datum_df)
individual_bat_datum_df = pd.DataFrame(columns=individual_bat_columns)
individual_pitch_datum_df = pd.DataFrame(columns=individual_pitch_columns)

team_aggregated_bat_datum_df = pd.DataFrame(columns=team_bat_columns)
team_aggregated_pitch_datum_df = pd.DataFrame(columns=team_pitch_columns)

# ----------------------------------------------------------

def print_progress_bar(curr, target):
    bar_length = 50  # 진행 바의 길이
    filled_length = int(bar_length * (curr / target))
    bar = '█' * filled_length + '-' * (bar_length - filled_length)
    print(f'\r|{bar}| {(curr / target * 100):.2f}% 완료 ({curr} / {target})', end='\r')

start_year = int(input("데이터 수집을 시작할 연도를 입력하세요(2014 ~ 2024): "))
end_year = int(input("데이터 수집을 시작할 연도를 입력하세요(2014 ~ 2024): "))

game_ids = crawlGameIds(start_year, end_year)
count = 0
for game_id in game_ids:
    try:
        individual_bat_datum_df, individual_pitch_datum_df, team_aggregated_bat_datum_df, team_aggregated_pitch_datum_df = crawlGameDatum(game_id, individual_bat_datum_df, individual_pitch_datum_df, team_aggregated_bat_datum_df, team_aggregated_pitch_datum_df)
    except:
        print(f"\nID: {game_id} 데이터 수집 중 결측치 발생. 박스 스코어 페이지가 정의되어 있지 않습니다.\n")
    
    count += 1
    if count % 10 == 0:
        os.system('cls' if os.name == 'nt' else 'clear')
        print_progress_bar(count, len(game_ids))
    
# 크롤링한 데이터의 무결성 검증
print(f"\n팀 합계 데이터 포인트 개수(720 경기 * {end_year - start_year + 1}년 * home/away team = {1440 * (end_year - start_year + 1)} points): {len(team_aggregated_bat_datum_df)}/{1440 * (end_year - start_year + 1)}")

filename = datetime.now().strftime("%Y%m%d-%H%M-KBO-data")
print(f"수집한 데이터를 현재 디렉터리에 파일명: {filename}.xlsx 로 저장합니다.")
with pd.ExcelWriter(f"{filename}.xlsx") as writer:
    individual_bat_datum_df.to_excel(writer, sheet_name="개인 타격 기록")
    individual_pitch_datum_df.to_excel(writer, sheet_name='개인 투구 기록')
    team_aggregated_bat_datum_df.to_excel(writer, sheet_name="팀 합계 타격 기록")
    team_aggregated_pitch_datum_df.to_excel(writer, sheet_name="팀 합계 투구 기록")

|██████████████████████████████--------------------| 61.11% 완료 (1320 / 2160)
ID: 20230872 데이터 수집 중 결측치 발생. 박스 스코어 페이지가 정의되어 있지 않습니다.

팀 합계 데이터 포인트 개수(720 경기 * 3년 * home/away team = 4320 points): 4318/4320/ 2160)
수집한 데이터를 현재 디렉터리에 파일명: 20241001-2231-KBO-data.xlsx 로 저장합니다.
