In [4]:
import os
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import time
import pandas as pd

In [9]:
def requestWithHandlingHttperr(url):
    RETRY_COUNT = 12                # 기본 반복 12회
    RETRY_DELAY_SEC = 10            # 대기 10초
    ERRNO_10054 = 10054
    ERRNO_500 = 500
    ERRNO_503 = 503
    ERRNO_504 = 504

    REQUEST_INTERVAL_SEC = 0.1

    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36'}  # 서버에 내 신분을 속이기 위한 유저에이전트... 자세히는 잘 모릅니다

    time.sleep(REQUEST_INTERVAL_SEC)    # 먼저 0.1초 쉬고

    for i in range(RETRY_COUNT):
        try:
            result = requests.get(url, headers = headers)       # API에 request 요청
            result.raise_for_status()                           # http에러가 나오면 예외를 발생시킴 -> except로 점프
            return result
        except requests.exceptions.ConnectionError as e:
            if isinstance(e.args[0], ConnectionResetError) and e.args[0].winerror == ERRNO_10054:
                print(f"Attempt {i + 1} failed with error 10054. Retrying in {RETRY_DELAY_SEC} seconds...")
                time.sleep(RETRY_DELAY_SEC)
            else:                           # 다른 http 에러면
                raise
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == ERRNO_500:
                print(f"Attempt {i + 1} failed with 500 Internal Server Error. Retrying in {RETRY_DELAY_SEC} seconds...")
                time.sleep(RETRY_DELAY_SEC)
            elif e.response.status_code == ERRNO_503:
                print(f"Attempt {i + 1} failed with 503 Service Unavailable. Retrying in {RETRY_DELAY_SEC} seconds...")
                time.sleep(RETRY_DELAY_SEC)
            elif e.response.status_code == ERRNO_504:
                print(f"Attempt {i + 1} failed with 504 Gateway Timeout. Retrying in {RETRY_DELAY_SEC} seconds...")
                time.sleep(RETRY_DELAY_SEC)
            else:  # 다른 HTTPError 예외 처리
                raise
    base = url[:url.rfind("/")]
    tournament_name = base[base.rfind("/")+1:]
    print(f"Failed to fetch data from tournament : {tournament_name} after {RETRY_COUNT} attempts.")
    raise Exception(f"Failed to fetch data from tournament : {tournament_name} after {RETRY_COUNT} attempts")

In [11]:
base_url = "https://lol.fandom.com/wiki/LPL/2023_Season/Spring_Season"

match_history = base_url + "/Match_History"
match_history

'https://lol.fandom.com/wiki/LPL/2023_Season/Spring_Season/Match_History'

In [12]:
response = requestWithHandlingHttperr(match_history)
soup = BeautifulSoup(response.text, "html.parser")
soup

<!DOCTYPE html>

<html class="client-nojs sse-control" dir="ltr" lang="en">
<head>
<meta charset="utf-8"/>
<title>LPL 2023 Spring - Match History - Leaguepedia | League of Legends Esports Wiki</title>
<script>document.documentElement.className="client-js sse-control";RLCONF={"wgBreakFrames":false,"wgSeparatorTransformTable":["",""],"wgDigitTransformTable":["",""],"wgDefaultDateFormat":"dmy","wgMonthNames":["","January","February","March","April","May","June","July","August","September","October","November","December"],"wgRequestId":"4845607acce866e5d243abd64f4242a1","wgCSPNonce":false,"wgCanonicalNamespace":"","wgCanonicalSpecialPageName":false,"wgNamespaceNumber":0,"wgPageName":"LPL/2023_Season/Spring_Season/Match_History","wgTitle":"LPL/2023 Season/Spring Season/Match History","wgCurRevisionId":3418188,"wgRevisionId":3418188,"wgArticleId":763604,"wgIsArticle":true,"wgIsRedirect":false,"wgAction":"view","wgUserName":null,"wgUserGroups":["*"],"wgCategories":["Tournament Match Histories

In [39]:
NUMBER_OF_PLAYERS_OF_A_TEAM = 5
result = []

for match in soup.find_all("tr", class_=["mhgame-red multirow-highlighter", "mhgame-blue multirow-highlighter"]):
    data = match.find_all("td")
    href = data[1].find("a").attrs["href"]
    patch = href[href.rfind("_")+1:]
    blueteam = data[2].find("img").attrs["alt"][:data[2].find("img").attrs["alt"].find("logo std")]
    redteam = data[3].find("img").attrs["alt"][:data[3].find("img").attrs["alt"].find("logo std")]
    winner = data[4].find("img").attrs["alt"][:data[4].find("img").attrs["alt"].find("logo std")]
    if winner == blueteam: winner_side = "Blue"
    elif winner == redteam: winner_side = "Red"
    row = {
        "date" : data[0].text,
        "patch" : patch,
        "blueteam" : blueteam,
        "redteam" : redteam,
        "winner_side" : winner_side
    }
    for idx, ban in enumerate(data[5].find_all("span")):
        row[f"ban_{idx}"] = ban.attrs["title"]
    for idx, ban in enumerate(data[6].find_all("span")):
        row[f"ban_{idx + NUMBER_OF_PLAYERS_OF_A_TEAM}"] = ban.attrs["title"]
    for idx, pick in enumerate(data[7].find_all("span")):
        row[f"pick_{idx}"] = pick.attrs["title"]
    for idx, pick in enumerate(data[7].find_all("span")):
        row[f"pick_{idx + NUMBER_OF_PLAYERS_OF_A_TEAM}"] = pick.attrs["title"]
    result.append(row)

df = pd.DataFrame(result)
df.to_excel("test.xlsx", index = False)

In [28]:
period = pd.period_range(start='2022-01-13 00:00:00',end='2022-01-13 02:30:00',freq='30T')
type(period)

  period = pd.period_range(start='2022-01-13 00:00:00',end='2022-01-13 02:30:00',freq='30T')


pandas.core.indexes.period.PeriodIndex