In [5]:
import os
import json
import requests
import pandas as pd

from serpapi import GoogleSearch

from dotenv import load_dotenv

from tqdm.notebook import tqdm
tqdm.pandas()

ModuleNotFoundError: No module named 'serpapi'

## Top Players ##
For the purposes of organization and simplicity, we create a dictionary which holds the top ten best chess players. Here is the structure and reasoning of the dictionary.

- 'name': Stores the players name in format convenient to naming files. This helps create consistency among the json files in /data
- 'fide_number': Holds the player's FIDE Number. This is necessary in order to utilize the FIDE Webscraper API 
- 'chess_come_username': Holds the player's Chess.com username. This is necessary in order to utilize the Chess.com published api

When these rankings ultimately change, this dictionary will need to be updated.


In [53]:
# Top Chess Players in the world as of December 16, 2024
# Ordering within the dictionary does not matter
top_players =[
    {'name': 'Magnus_Carlsen', 'fide_number': '1503014', 'chess_com_username': 'magnuscarlsen','country':'NO'},
    {'name': 'Fabiano_Caruana', 'fide_number': '2020009', 'chess_com_username': 'fabianocaruana','country':'US'},
    {'name': 'Hikaru_Nakamura', 'fide_number': '2016192', 'chess_com_username': 'hikaru','country':'US'},
    {'name': 'Arjun_Erigaisi', 'fide_number': '35009192', 'chess_com_username': 'ghandeevam2003','country':'IN'},
    {'name': 'Gukesh_Dommaraju', 'fide_number': '46616543', 'chess_com_username': 'gukeshdommaraju','country':'IN'},
    {'name': 'Nodirbek_Abdusattorov', 'fide_number': '14204118', 'chess_com_username': 'chesswarrior7197','country':'UZ'},
    {'name': 'Alireza-Firouzja', 'fide_number': '12573981', 'chess_com_username': 'firouzja2003','country':'FR'},
    {'name': 'Ian_Nepomniachtchi', 'fide_number': '4168119', 'chess_com_username': 'lachesisq','country':'RU'},
    {'name': 'Yi_Wei', 'fide_number': '8603405', 'chess_com_username': 'wei-yi','country':'CN'},
    {'name': 'Viswanathan_Anand', 'fide_number': '5000017', 'chess_com_username': 'thevish'.'country':'IN'}
]


## SERPAPI Setup

This API pulls Google Trends data for a provided keyword search and country (via code). It will be our proxy for understanding the popularity/social effect of the given chess players inside their home countries.

This API requires a user to sign up in order to be granted authentication. 

Set up can be found in the following URL: https://serpapi.com/

Once you have been given SERPAPI credentials, places them inside your .env

In [9]:
# Pulls key from .env 
SERPAPI_KEY = os.getenv("serpapi_key")

The following function is very similar to how Google Trends works.

It receives a country code, a desired keyword to search, and the SERPAPI Key which was imported in the previous step.

It returns the timeseries data for the popularity of the given keyword inside the specified country in a json file.

In [10]:
# Fetches Google Trends data since 2004 for a given keyword and country code - saves it to a JSON file.
def fetch_google_trends(country_code, keyword, SERPAPI_KEY):    
    
    # Keeps the file names uniform - changing artist_name from here on
    keyword_filename = keyword

    # Replace underscores with spaces in the artist's name
    keyword = keyword_filename.replace("_", " ")
    
    # Parameters for the API call
    params = {
        "engine": "google_trends",
        "q": keyword,
        "data_type": "TIMESERIES",
        "date": "all",  # Specify the time range
        "api_key": SERPAPI_KEY , # Replace with your SerpAPI key
        "geo": country_code , 
    }
    
    # Fetch data using SerpAPI
    search = GoogleSearch(params)
    results = search.get_dict()
    
    # Extract 'interest_over_time' section
    interest_over_time = results.get("interest_over_time", {})
    
    # Define the output file path
    output_path = f"../data/GTrends/{keyword}_{country_code}_GTrends.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(interest_over_time, file, indent=4)
    
    return print(f"Google Trends data successfully saved to {output_path}")

Similar function but adjusted for player name search

In [None]:
def fetch_google_trends(country_code, keyword, SERPAPI_KEY):    
    
    # Keeps the file names uniform - changing artist_name from here on
    keyword_filename = keyword

    # Replace underscores with spaces in the artist's name
    keyword = keyword_filename.replace("_", " ")
    
    # Parameters for the API call
    params = {
        "engine": "google_trends",
        "q": keyword,
        "data_type": "TIMESERIES",
        "date": "all",  # Specify the time range
        "api_key": SERPAPI_KEY , # Replace with your SerpAPI key
        "geo": country_code , 
    }
    
    # Fetch data using SerpAPI
    search = GoogleSearch(params)
    results = search.get_dict()
    
    # Extract 'interest_over_time' section
    interest_over_time = results.get("interest_over_time", {})
    
    # Define the output file path
    output_path = f"../data/GTrends/{keyword}_GTrends.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(interest_over_time, file, indent=4)
    
    return print(f"Google Trends data successfully saved to {output_path}")

Here are the following countries the top ten players hail from as well as the players themselves - we will use this list in order to retrieve the trend data for only the desired player's countries and the player's search rates itself in their country.

In [1]:
## we can remove these lists and dictonary and work with top_players if the the following 2 gtrend calls work

player_countries = [
    "FR", # France - Alireza
    "IN", # India - Arjun, Gukesh, Viswanathan
    "US", # United States - Fabiano, Hikaru
    "RU", # Russia - Ian
    "NO", # Norway - Magnus
    "UZ", # Uzbekistan - Nodirbek
    "CN", # China - Yi
]
players_countries = [
    {'name':"Magnus Carlsen",'country':"NO"},
    {'name':"Fabio Caruana",'country':"US"},
    {'name':"Hikaru Nakamura",'country':"US"},
    {'name':"Arjun Erigaisi",'country':'IN'},
    {'name':"Gukesh Dommaraju",'country':"IN"},
    {'name':"Nodirbek Abdusattorov",'country':"UZ"},
    {'name':"Ian Nepomniachtchi",'country':'RU'},
    {'name':"Yi Wei",'country':"CN"},
    {'name':"Viswanathan Anand",'country':"IN"}
]

We will now retrieve the google trends data for the keyword search of "Chess" inside each of the top ten players country. 

The following loop will utilize the player_countries list and the fetch_google_trends() function to iterate through all players

In [18]:
for country in player_countries:
    print(f"Retrieving Google Trends data for country {country}")
    fetch_google_trends(country, "Chess", SERPAPI_KEY)
print("All Google Trends data for countries have been processed.")


#Uncomment this part if the retrieval of player data works
#for player in top_players:
#    print(f"Retrieving Google Trends data for country {player['country'}")
#    fetch_google_trends(player['country'], "Chess", SERPAPI_KEY)
#print("All Google Trends data for countries have been processed.")

Retrieving Google Trends data for country FR


NameError: name 'GoogleSearch' is not defined

In [1]:
#run this code once 
for player in top_players:
    print(f"Retreving Google Trends data for player : {player['name']}")
    fetch_google_trends(player['country'],f"{player['name']}", SERPAPI_KEY)
    print("All Google Trends data for players have been processed")
   

NameError: name 'players_countries' is not defined

## FIDE Webscraper Setup
Here, we are setting up the FIDE webscraper to get a list of the current top 10 chess players.

Follow the install instructions on the project README.md in order to set up the correct environment.

The function below receives a fide_number and returns a json file containing FIDE information and historical ELO progression.

In [56]:
# Takes a player's FIDE number and returns their information and ELO history
def fide_data(fide_number):
    
    # Find the player's name based on their FIDE number
    player = next((p for p in top_players if p["fide_number"] == fide_number), None)

    if not player:
        raise ValueError(f"Player with FIDE number {fide_number} not found.")

    # Replace spaces with underscores for the file name
    player_name = player["name"].replace(" ", "_") 

    # Build the API URL
    url = f"http://localhost:3000/player/{fide_number}/info?include_history=true"

    # Make a GET request
    response = requests.get(url)

    # Convert the HTTP response's JSON content into a Python object
    data = response.json()

    # Create the name and destination of the JSON file
    output_path = f"../data/Fide/{player_name}_fide.json"

    # Save the data as a JSON file
    os.makedirs(os.path.dirname(output_path), exist_ok=True)  # Ensure the directory exists
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)

    print(f"FIDE data successfully saved to {output_path}")

The loop below utilizes the function we created above and the top_players() dictionary to iterate through the top ten players. 

In [60]:
for player in top_players:
    username = player["fide_number"]
    print(f"Processing Chess.com data for {player['name']} (Username: {username})...")
    fide_data(username)

print("All Chess.com data has been processed.")

Processing Chess.com data for Magnus_Carlsen (Username: 1503014)...
FIDE data successfully saved to ../data/Fide/Magnus_Carlsen_fide.json
Processing Chess.com data for Fabiano_Caruana (Username: 2020009)...
FIDE data successfully saved to ../data/Fide/Fabiano_Caruana_fide.json
Processing Chess.com data for Hikaru_Nakamura (Username: 2016192)...
FIDE data successfully saved to ../data/Fide/Hikaru_Nakamura_fide.json
Processing Chess.com data for Arjun_Erigaisi (Username: 35009192)...
FIDE data successfully saved to ../data/Fide/Arjun_Erigaisi_fide.json
Processing Chess.com data for Gukesh_Dommaraju (Username: 46616543)...
FIDE data successfully saved to ../data/Fide/Gukesh_Dommaraju_fide.json
Processing Chess.com data for Nodirbek_Abdusattorov (Username: 14204118)...
FIDE data successfully saved to ../data/Fide/Nodirbek_Abdusattorov_fide.json
Processing Chess.com data for Alireza-Firouzja (Username: 12573981)...
FIDE data successfully saved to ../data/Fide/Alireza-Firouzja_fide.json
Proc

##  Chess.com API Set Up

We use the Chess.com Published Data API to return the top players Chess.com stats.

The function receives a player's Chess.com username and outputs their publicly available Chess.com stats

In [58]:
# Takes a player's Chess.com username and returns player's game stats
def chess_com_data(username):
    # Look up the player's name in the top_players dictionary
    player = next((p for p in top_players if p["chess_com_username"] == username), None)

    if not player:
        print(f"Error: Player with username '{username}' not found in top_players.")
        return

    # Get the player's name
    player_name = player["name"].replace(" ", "_")

    # Builds URL for desired player's game stats
    url = f"https://api.chess.com/pub/player/{username}/stats"

    # Define headers
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    # Make a GET request with headers
    response = requests.get(url, headers=headers)

    # Check for HTTP errors
    if response.status_code != 200:
        print(f"Error: Received status code {response.status_code} for user {username}")
        return

    # Ensure the response contains valid JSON
    if not response.text:
        print(f"Error: Empty response for user {username}")
        return

    try:
        # Parse the JSON content
        data = response.json()
    except json.JSONDecodeError:
        print(f"Error: Failed to decode JSON response for user {username}")
        return

    # Creates the name and destination of the JSON file
    output_path = f"../data/Chess_com/{player_name}_stats.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)

    print(f"Chess.com stats successfully saved to {output_path}")

Similar to the previous loop, this loop utilizes the function we created above and the top_players() dictionary to iterate through the top ten players. 

In [59]:

for player in top_players:
    username = player["chess_com_username"]
    print(f"Processing Chess.com data for {player['name']} (Username: {username})...")
    chess_com_data(username)

print("All Chess.com data has been processed.")


Processing Chess.com data for Magnus_Carlsen (Username: magnuscarlsen)...
Chess.com stats successfully saved to ../data/Chess_com/Magnus_Carlsen_stats.json
Processing Chess.com data for Fabiano_Caruana (Username: fabianocaruana)...
Chess.com stats successfully saved to ../data/Chess_com/Fabiano_Caruana_stats.json
Processing Chess.com data for Hikaru_Nakamura (Username: hikaru)...
Chess.com stats successfully saved to ../data/Chess_com/Hikaru_Nakamura_stats.json
Processing Chess.com data for Arjun_Erigaisi (Username: ghandeevam2003)...
Chess.com stats successfully saved to ../data/Chess_com/Arjun_Erigaisi_stats.json
Processing Chess.com data for Gukesh_Dommaraju (Username: gukeshdommaraju)...
Chess.com stats successfully saved to ../data/Chess_com/Gukesh_Dommaraju_stats.json
Processing Chess.com data for Nodirbek_Abdusattorov (Username: chesswarrior7197)...
Chess.com stats successfully saved to ../data/Chess_com/Nodirbek_Abdusattorov_stats.json
Processing Chess.com data for Alireza-Firo