In [8]:
from datetime import datetime
from multiprocess import Pool
import pandas
import psutil
import requests
from tqdm.notebook import tqdm

user_agent = "ArithmeticErrorMozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0"

titled_players_df = pandas.read_csv("titled.csv")
usernames: list[str] = list(titled_players_df["username"])


def chunks(xs, n):
    n = max(1, n)
    return (xs[i : i + n] for i in range(0, len(xs), n))

def get_stats(username):
    try:
        url = f"https://api.chess.com/pub/player/{username}/stats"
        response = requests.get(url, headers={"User-Agent": user_agent})
        data: dict = response.json()
        rapid: str = "chess_rapid"
        blitz: str = "chess_blitz"
        bullet: str = "chess_bullet"
        # Rapid
        rapid_rating_deviation: int = data.get(rapid, {}).get("last", {}).get("rd", 0)
        rapid_rating_last: int = data.get(rapid, {}).get("last", {}).get("rating", 0)
        rapid_rating_best: int = data.get(rapid, {}).get("best", {}).get("rating", 0)
        rapid_record_win: int = data.get(rapid, {}).get("record", {}).get("win", 0)
        rapid_rating_draw: int = data.get(rapid, {}).get("record", {}).get("draw", 0)
        rapid_rating_loss: int = data.get(rapid, {}).get("record", {}).get("loss", 0)
        # Blitz
        blitz_rating_deviation: int = data.get(blitz, {}).get("last", {}).get("rd", 0)
        blitz_rating_last: int = data.get(blitz, {}).get("last", {}).get("rating", 0)
        blitz_rating_best: int = data.get(blitz, {}).get("best", {}).get("rating", 0)
        blitz_record_win: int = data.get(blitz, {}).get("record", {}).get("win", 0)
        blitz_rating_draw: int = data.get(blitz, {}).get("record", {}).get("draw", 0)
        blitz_rating_loss: int = data.get(blitz, {}).get("record", {}).get("loss", 0)
        # Bullet
        bullet_rating_deviation: int = data.get(bullet, {}).get("last", {}).get("rd", 0)
        bullet_rating_last: int = data.get(bullet, {}).get("last", {}).get("rating", 0)
        bullet_rating_best: int = data.get(bullet, {}).get("best", {}).get("rating", 0)
        bullet_record_win: int = data.get(bullet, {}).get("record", {}).get("win", 0)
        bullet_rating_draw: int = data.get(bullet, {}).get("record", {}).get("draw", 0)
        bullet_rating_loss: int = data.get(bullet, {}).get("record", {}).get("loss", 0)
        return {
            # Rapid
            "rapid_rating_last": rapid_rating_last,
            "rapid_rating_deviation": rapid_rating_deviation,
            "rapid_rating_best": rapid_rating_best,
            "rapid_record_win": rapid_record_win,
            "rapid_rating_draw": rapid_rating_draw,
            "rapid_rating_loss": rapid_rating_loss,
            # Blitz
            "blitz_rating_last": blitz_rating_last,
            "blitz_rating_deviation": blitz_rating_deviation,
            "blitz_rating_best": blitz_rating_best,
            "blitz_record_win": blitz_record_win,
            "blitz_rating_draw": blitz_rating_draw,
            "blitz_rating_loss": blitz_rating_loss,
            # Bullet
            "bullet_rating_last": bullet_rating_last,
            "bullet_rating_deviation": bullet_rating_deviation,
            "bullet_rating_best": bullet_rating_best,
            "bullet_record_win": bullet_record_win,
            "bullet_rating_draw": bullet_rating_draw,
            "bullet_rating_loss": bullet_rating_loss,
        }
    except Exception as exception:
        print(exception)
        return {
            # Rapid
            "rapid_rating_best": 0,
            "rapid_rating_deviation": 0,
            "rapid_rating_last": 0,
            "rapid_record_win": 0,
            "rapid_rating_draw": 0,
            "rapid_rating_loss": 0,
            # Blitz
            "blitz_rating_best": 0,
            "blitz_rating_deviation": 0,
            "blitz_rating_last": 0,
            "blitz_record_win": 0,
            "blitz_rating_draw": 0,
            "blitz_rating_loss": 0,
            # Bullet
            "bullet_rating_best": 0,
            "bullet_rating_deviation": 0,
            "bullet_rating_last": 0,
            "bullet_record_win": 0,
            "bullet_rating_draw": 0,
            "bullet_rating_loss": 0,
        }


def get_player(username: str):
    try:
        url = f"https://api.chess.com/pub/player/{username}"
        response = requests.get(url, headers={"User-Agent": user_agent})
        data: dict = response.json()
        player_id: int = data.get("player_id", 0)
        name: str = data.get("name", "")
        title: str = data.get("title", "")
        status: str = data.get("status", "")
        league: str = data.get("league", "")
        avatar: str = data.get("avatar", "")
        verified: bool = data.get("verified", False)
        # Time
        format_string = "%Y-%m-%d"
        # format_string = "%Y-%m-%d %H:%M:%S"
        joined: int = data.get("joined", 0)
        joined_date: str = datetime.fromtimestamp(joined).strftime(format_string)
        last_online: int = data.get("last_online", 0)
        last_online_date: str = datetime.fromtimestamp(last_online).strftime(format_string)
        # Stats
        stats = get_stats(username)
        # Country
        country_url = data.get("country", "")
        country_response = requests.get(country_url, headers={"User-Agent": user_agent})
        country_data = country_response.json()
        country_name: list[str] = country_data.get("name", "")
        country_code: list[str] = country_data.get("code", "")
        # Streamer
        is_streamer: list[str] = data.get("is_streamer", False)
        twitch_url: list[str] = data.get("twitch_url", "")
        # Data
        return {
            "id": player_id,
            "username": username,
            "name": name,
            "title": title,
            "country": country_name,
            "country_code": country_code,
            "verified": verified,
            "status": status,
            "league": league,
            "avatar": avatar,
            **stats,
            "is_streamer": is_streamer,
            "twitch_url": twitch_url,
            "joined": joined,
            "joined_date": joined_date,
            "last_online": last_online,
            "last_online_date": last_online_date,
        }
    except Exception as exception:
        print(exception)
        return {
            "id": 0,
            "username": username,
            "name": "",
            "title": "",
            "country": "",
            "country_code": "",
            "verified": False,
            "status": "",
            "league": "",
            "avatar": "",
            **stats,
            "is_streamer": False,
            "twitch_url": "",
            "joined": 0,
            "joined_date": datetime.fromtimestamp(0).strftime(format_string),
            "last_online": 0,
            "last_online_date": datetime.fromtimestamp(0).strftime(format_string),
        }


def get_players(usernames: list[str]):
    pool = Pool(processes=3)
    players = pool.map(get_player, usernames)
    return players

threads_count = psutil.cpu_count() // psutil.cpu_count(logical=False)

print(len(usernames))
print(psutil.cpu_count())
print(threads_count)


players = []
chunk_usernames = list(chunks(usernames, psutil.cpu_count()))
for usernames in tqdm(chunk_usernames):
    chunk_players = get_players(usernames)
    players = players + chunk_players
    players_df = pandas.DataFrame(players)
    players_df.to_csv("titled-players.csv", index=False)

12699
8
1


  0%|          | 0/1588 [00:00<?, ?it/s]