In [25]:
TITLES : list[str] = ["GM", "IM", "FM", "CM", "NM", "WGM", "WIM", "WFM", "WCM", "WNM"]
USER_AGENT = "ArithmeticErrorMozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0"

In [26]:
import requests
import json

titled_players: list[dict] = []
for title in TITLES:
    url = f"https://api.chess.com/pub/titled/{title}"
    response = requests.get(url, headers={"User-Agent": USER_AGENT})
    data: dict = response.json()
    usernames: list[str] = data.get("players", [])
    json_file_path = f"./json/titles/{title}.json"
    json_file = open(json_file_path, "w")
    json.dump(usernames, json_file, indent=4)

KeyboardInterrupt: 

In [12]:
import json

usernames = []
for title in TITLES:
    json_file_path = f"./json/titles/{title}.json"
    json_file = open(json_file_path, "r")
    usernames_per_title : list[str] = json.load(json_file)
    usernames = usernames + usernames_per_title

all_json_file_path = "./json/titles/all.json"
all_json_file = open(all_json_file_path, "w")
json.dump(usernames, all_json_file, indent=4)

In [None]:
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import requests
import os
from tqdm.notebook import tqdm

USER_AGENT = "ArithmeticErrorMozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:127.0) Gecko/20100101 Firefox/127.0"
json_file_path = "./json/titles/all.json"
json_file = open(json_file_path, "r")
usernames : list[str] = json.load(json_file)
usernames.reverse()

def get_player(username : str):
    player_url = f"https://api.chess.com/pub/player/{username}"
    player_stats_url = f"https://api.chess.com/pub/player/{username}/stats"
    player_response = requests.get(player_url, headers={"User-Agent": USER_AGENT})
    player: dict = player_response.json()
    player_stats_response = requests.get(player_stats_url, headers={"User-Agent": USER_AGENT})
    player_stats: dict = player_stats_response.json()
    player_id = player.get("player_id", 0)
    player_json_file_path = f"./json/players/{player_id}.json"
    player_json_file = open(player_json_file_path, "w")
    json.dump({
        **player,
        **player_stats
    }, player_json_file, indent=4)

if __name__ == "__main__":
    cpu_cores : int = os.cpu_count()
    max_workers : int = cpu_cores * 2
    print(cpu_cores, max_workers)
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(get_player, username): username for username in usernames}

        # Use tqdm to display a progress bar
        for future in tqdm(as_completed(futures), total=len(futures), desc="Processing tasks"):
            future.result()

JSONDecodeError: Expecting ',' delimiter: line 13207 column 19 (char 246014)

In [None]:
import os
import pandas as pd
import json
from tqdm.notebook import tqdm

folder_path = "./json/players"
data_frames = []

for file_name in tqdm(os.listdir(folder_path)):
    if file_name.endswith(".json"):
        json_file_path = os.path.join(folder_path, file_name)
        json_file = open(json_file_path, "r")
        data = json.load(json_file)
        df = pd.json_normalize(data)
        data_frames.append(df)

combined_df = pd.concat(data_frames, ignore_index=True)
combined_df.to_csv("./csv/players.csv", index=False)

In [29]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

chess_rating = "chess_blitz.last.rating"

players_df = pd.read_csv("./csv/players.csv", usecols=['title', chess_rating], low_memory=False)
sk_df = players_df[["title", chess_rating]]
sk_df.fillna(0)
sk_df = sk_df[sk_df[chess_rating] > 0]
TITLES : list[str] = ["GM", "IM", "FM", "CM", "NM", "WGM", "WIM", "WFM", "WCM", "WNM"]
sk_df["title_index"] = sk_df["title"].apply(TITLES.index)
sk_df["median"] = sk_df['title_index'].apply(lambda value: sk_df[sk_df['title_index'] == value][chess_rating].median())
sk_df = sk_df[sk_df[chess_rating] > sk_df["median"]]
print(sk_df[["title_index", "median"]])

# Separate feature and target
X = sk_df[[chess_rating]]  # Note the double brackets to keep X as a DataFrame
y = sk_df["title_index"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
print(X_train_scaled)
X_test_scaled = scaler.transform(X_test)
print(X_test_scaled)

# Initialize the model (e.g., Logistic Regression)
model = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=42)

# Train the model
model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = model.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

      title_index  median
1               1  2570.0
3               0  2758.0
8               2  2456.0
9               0  2758.0
12              3  2323.0
...           ...     ...
9369            2  2456.0
9370            3  2323.0
9374            0  2758.0
9375            0  2758.0
9376            2  2456.0

[4317 rows x 2 columns]
[[ 0.25559731]
 [-0.37555039]
 [-0.61344452]
 ...
 [ 0.40124678]
 [ 1.39651815]
 [-0.68141427]]
[[-1.27946289e-01]
 [-5.11489890e-01]
 [-1.95916041e-01]
 [-4.82359997e-01]
 [ 1.70723702e+00]
 [-1.14263759e+00]
 [ 1.77031805e-02]
 [ 7.36240561e-01]
 [ 6.48850880e-01]
 [-1.03671377e-01]
 [ 4.49796605e-01]
 [ 6.73125791e-01]
 [-7.44529041e-01]
 [ 1.27999858e+00]
 [ 2.75017242e-01]
 [ 6.00301057e-01]
 [ 1.15376904e+00]
 [-6.18299501e-01]
 [ 8.91599995e-01]
 [-1.57473101e+00]
 [ 2.00339094e+00]
 [ 1.86745144e+00]
 [-1.02611801e+00]
 [ 1.49847278e+00]
 [-1.37656253e-01]
 [ 3.22681274e-02]
 [ 1.09550925e+00]
 [ 8.96454977e-01]
 [ 2.75017242e-01]
 [-8.22208758e-0