In [11]:
import requests
import json
import os
import pandas as pd
import time
import glob
import datetime

current_year = datetime.datetime.utcnow().year
current_month = datetime.datetime.utcnow().month

data_sources_df = pd.read_csv("/data/chess_prj/data_sources.csv")

CHESSCOM_JSON_FOLDER = "/data/chess_prj/games/"

headers = {"User-Agent":"bob@bobert.io"}
usernames = data_sources_df.gm_username.unique()

# Gather the archives. Avoid downloaind archives as much as possible because earlier archives are better.
# The reason is that people change usernames and we lose the ability to find their games.
for username in usernames:
    print(username)
    resp = requests.get(f"https://api.chess.com/pub/player/{username}/games/archives", headers = headers)
    assert resp.status_code == 200
    archive_urls = json.loads(resp.text)['archives']
    for archive_url in archive_urls:
        print(archive_url)
        archive_year = int(archive_url.split('/')[7])
        archive_month = int(archive_url.split('/')[8])

        PART_fname = os.path.join(CHESSCOM_JSON_FOLDER, "archives", f"{username}_{archive_year:04}{archive_month:02}.PART.json")
        DONE_fname = os.path.join(CHESSCOM_JSON_FOLDER, "archives", f"{username}_{archive_year:04}{archive_month:02}.DONE.json")

        if os.path.exists(DONE_fname):
            continue
        resp = requests.get(archive_url, headers = headers)
        assert resp.status_code == 200

        
        if (archive_year < current_year) or (archive_year == current_year and archive_month < current_month):
            # The archive is certainly in the past. save as DONE and delete the WIP if it exists
            output_fname = DONE_fname
            try:
                os.remove(PART_fname)
            except FileNotFoundError:
                pass
        else:
            # The archive is not nessicarily final. save as WIP.    
            output_fname = WIP_fname
            
        with open(output_fname, "wt", encoding='utf8') as fp:
            fp.write(resp.text)


# Now make the final outputs
for username in usernames:
    print(username)
    archive_fnames = glob.glob(os.path.join(CHESSCOM_JSON_FOLDER,"archives",f"{username}*.json"))
    games = []
    for archive_fname in archive_fnames:
        with open(archive_fname,"rt",encoding='utf8') as fp:
            data = json.load(fp)
        games.extend(data['games'])
    with open(os.path.join(CHESSCOM_JSON_FOLDER, f"{username}.json"), "wt",encoding='utf8') as fp:
        json.dump(games,fp)

VBKN
https://api.chess.com/pub/player/vbkn/games/2021/01
https://api.chess.com/pub/player/vbkn/games/2021/02
https://api.chess.com/pub/player/vbkn/games/2021/03
https://api.chess.com/pub/player/vbkn/games/2021/04
https://api.chess.com/pub/player/vbkn/games/2021/06
https://api.chess.com/pub/player/vbkn/games/2023/01
https://api.chess.com/pub/player/vbkn/games/2023/02
https://api.chess.com/pub/player/vbkn/games/2023/03
https://api.chess.com/pub/player/vbkn/games/2023/04
https://api.chess.com/pub/player/vbkn/games/2023/05
sterkurstrakur
https://api.chess.com/pub/player/sterkurstrakur/games/2023/01
PoojaFan69
https://api.chess.com/pub/player/poojafan69/games/2023/05
https://api.chess.com/pub/player/poojafan69/games/2023/06
https://api.chess.com/pub/player/poojafan69/games/2023/07
slowbrah
https://api.chess.com/pub/player/slowbrah/games/2023/05
https://api.chess.com/pub/player/slowbrah/games/2023/06
https://api.chess.com/pub/player/slowbrah/games/2023/07
https://api.chess.com/pub/player/slo

VBKN
sterkurstrakur
PoojaFan69
slowbrah
Hippopotamus
systemplayer
speedonly
redwinereduction
systematic
ebenetzebeth
iattackuresign
tungjatjeta
wonestall


In [13]:
games

[{'url': 'https://www.chess.com/game/live/71453068009',
  'pgn': '[Event "Live Chess"]\n[Site "Chess.com"]\n[Date "2023.03.01"]\n[Round "-"]\n[White "wonestall"]\n[Black "Nate614"]\n[Result "1-0"]\n[CurrentPosition "1r2R3/p1p2p1k/3p1B1p/5Qp1/8/7P/PPP2PP1/6K1 b - -"]\n[Timezone "UTC"]\n[ECO "C40"]\n[ECOUrl "https://www.chess.com/openings/Kings-Pawn-Opening-Kings-Knight-Variation"]\n[UTCDate "2023.03.01"]\n[UTCTime "23:42:12"]\n[WhiteElo "548"]\n[BlackElo "332"]\n[TimeControl "180"]\n[Termination "wonestall won by checkmate"]\n[StartTime "23:42:12"]\n[EndDate "2023.03.01"]\n[EndTime "23:44:00"]\n[Link "https://www.chess.com/game/live/71453068009"]\n\n1. e4 {[%clk 0:03:00]} 1... e5 {[%clk 0:02:59.1]} 2. Nf3 {[%clk 0:02:59.9]} 2... Bd6 {[%clk 0:02:57.1]} 3. Nc3 {[%clk 0:02:59.8]} 3... Nf6 {[%clk 0:02:54.9]} 4. Bc4 {[%clk 0:02:59.7]} 4... Nc6 {[%clk 0:02:53.6]} 5. d3 {[%clk 0:02:59.6]} 5... O-O {[%clk 0:02:48.7]} 6. h3 {[%clk 0:02:59.5]} 6... b5 {[%clk 0:02:46]} 7. O-O {[%clk 0:02:59.4]} 7.

In [9]:
!head {archive_fname}

[]

In [10]:
archive_fname

'/data/chess_prj/games/archives/VBKN_202106.DONE.json'