In [3]:
import json
from tqdm import tqdm
import requests
from bs4 import BeautifulSoup
import os
import requests

# Download All Tournament Games

In [181]:
loaded_tournaments = {}
for file in os.listdir("../Data/Games"):
    tournament = "_".join(file.split("_")[0].split("-")[:-1])
    if tournament not in loaded_tournaments:
        loaded_tournaments[tournament] = 0
    loaded_tournaments[tournament] += 1

In [4]:
def downloadTournament(link, tournament):
    skipped = 0
    print("\033[32mDownloading {} games\033[0m".format(tournament))
    
    page = 1
    all_games = set()

    while True:
        if page > 1000:
            break
        paginated_link = f"{link}?&players={page}"
        ht = requests.get(paginated_link)
        soup = BeautifulSoup(ht.content, 'html.parser')
        hrefs = soup.find_all('a', href=True)
        games = [x["href"] for x in hrefs if "game" in x['href']]
        games = set(games)
        if len(games) == 0:
            break
        
        # Check if all games are already in all_games
        if all([x in all_games for x in games]):
            break

        # Add to all_games
        all_games = all_games.union(games)
        page += 1
        print(f"\033[32mPage {page} ({len(games)}) loaded.\033[0m", end = "")

    if len(all_games) == 0:
        print(f"\033[31mNo games found for {tournament}\033[0m")
        return

    for game in tqdm(all_games, desc=f"Games in {tournament}", leave=False):
        id = game.split("/")[-1]
        if os.path.exists(f"../Data/Games/{tournament}_{id}.json"):
            skipped += 1
            continue

        first_game = "https://www.chess.com/callback/live/game/" + id
        ht = requests.get(first_game)
        game_data = ht.json()
        with open(f"../Data/Games/{tournament}_{id}.json", "w") as f:
            json.dump(game_data, f)

    print("\033[31mDownloaded {} games and skipped {}\033[0m".format(len(all_games), skipped))


In [5]:
tournaments = []

tournaments_link = f"https://www.chess.com/tournament/live/titled-tuesdays?&page="
for i in range(1,7):
    ht = requests.get(tournaments_link + str(i))
    soup = BeautifulSoup(ht.content, 'html.parser')
    hrefs = soup.find_all('a', href=True)
    links = [x["href"] for x in hrefs if "tournament" in x['href'] and "returnUrl" not in x["href"]]

    for url in set(links):
        if 'chess.com/tournament/live/' in url and 'titled-tuesday' in url:
            parts = url.split('/')
            name = parts[-1]
            date = '-'.join(name.split('-')[-4:-1])
            tournaments.append({'url': url, 'name': name, 'date': date})

In [10]:
for tournament in tournaments:
    downloadTournament(tournament['url'], tournament['name'])

[32mDownloading early-titled-tuesday-blitz-august-01-2023-4198579 games[0m
[32mPage 2 (239) loaded.[0m[32mPage 3 (272) loaded.[0m[32mPage 4 (280) loaded.[0m[32mPage 5 (282) loaded.[0m[32mPage 6 (288) loaded.[0m[32mPage 7 (290) loaded.[0m[32mPage 8 (280) loaded.[0m[32mPage 9 (292) loaded.[0m[32mPage 10 (289) loaded.[0m[32mPage 11 (271) loaded.[0m[32mPage 12 (280) loaded.[0m[32mPage 13 (271) loaded.[0m[32mPage 14 (275) loaded.[0m[32mPage 15 (215) loaded.[0m[32mPage 16 (134) loaded.[0m[32mPage 17 (227) loaded.[0m[32mPage 18 (204) loaded.[0m[32mPage 19 (172) loaded.[0m[32mPage 20 (190) loaded.[0m[32mPage 21 (155) loaded.[0m[32mPage 22 (106) loaded.[0m[32mPage 23 (109) loaded.[0m

                                                                                                    

[31mDownloaded 2439 games and skipped 2439[0m
[32mDownloading late-titled-tuesday-blitz-august-15-2023-4225474 games[0m
[32mPage 2 (240) loaded.[0m[32mPage 3 (271) loaded.[0m[32mPage 4 (273) loaded.[0m[32mPage 5 (281) loaded.[0m[32mPage 6 (277) loaded.[0m[32mPage 7 (284) loaded.[0m[32mPage 8 (273) loaded.[0m[32mPage 9 (266) loaded.[0m[32mPage 10 (227) loaded.[0m[32mPage 11 (159) loaded.[0m[32mPage 12 (210) loaded.[0m[32mPage 13 (183) loaded.[0m[32mPage 14 (174) loaded.[0m[32mPage 15 (120) loaded.[0m[32mPage 16 (96) loaded.[0m

                                                                                                   

[31mDownloaded 1625 games and skipped 1625[0m
[32mDownloading early-titled-tuesday-blitz-september-05-2023-4265728 games[0m
[32mPage 2 (245) loaded.[0m[32mPage 3 (275) loaded.[0m[32mPage 4 (281) loaded.[0m[32mPage 5 (290) loaded.[0m[32mPage 6 (290) loaded.[0m[32mPage 7 (289) loaded.[0m[32mPage 8 (281) loaded.[0m[32mPage 9 (280) loaded.[0m[32mPage 10 (289) loaded.[0m[32mPage 11 (277) loaded.[0m[32mPage 12 (272) loaded.[0m[32mPage 13 (263) loaded.[0m[32mPage 14 (273) loaded.[0m[32mPage 15 (219) loaded.[0m[32mPage 16 (148) loaded.[0m[32mPage 17 (173) loaded.[0m[32mPage 18 (233) loaded.[0m[32mPage 19 (198) loaded.[0m[32mPage 20 (182) loaded.[0m[32mPage 21 (167) loaded.[0m[32mPage 22 (139) loaded.[0m[32mPage 23 (116) loaded.[0m[32mPage 24 (117) loaded.[0m[32mPage 25 (66) loaded.[0m

                                                                                                       

[31mDownloaded 2539 games and skipped 2539[0m
[32mDownloading early-titled-tuesday-blitz-august-08-2023-4212115 games[0m
[32mPage 2 (245) loaded.[0m[32mPage 3 (271) loaded.[0m[32mPage 4 (276) loaded.[0m[32mPage 5 (284) loaded.[0m[32mPage 6 (292) loaded.[0m[32mPage 7 (291) loaded.[0m[32mPage 8 (286) loaded.[0m[32mPage 9 (281) loaded.[0m[32mPage 10 (284) loaded.[0m[32mPage 11 (278) loaded.[0m[32mPage 12 (283) loaded.[0m[32mPage 13 (290) loaded.[0m[32mPage 14 (252) loaded.[0m[32mPage 15 (231) loaded.[0m[32mPage 16 (190) loaded.[0m[32mPage 17 (110) loaded.[0m[32mPage 18 (225) loaded.[0m[32mPage 19 (210) loaded.[0m[32mPage 20 (197) loaded.[0m[32mPage 21 (162) loaded.[0m