In [3]:
import os
import json
import requests
import pandas as pd

from serpapi import GoogleSearch

from dotenv import load_dotenv

from tqdm.notebook import tqdm
tqdm.pandas()

In [16]:
# List of Top Chess Players FIDE Number
top_players = [
    "1503014", # Magnus Carlsen
    "2020009", # Fabiano Caruana
    "2016192", # Hikaru Nakamura
    "35009192", # Arjun Erigaisi
    "46616543", # Gukesh Dommaraju
    "14204118", # Nodirbek Abdusattorov
    "12573981", # Alireza Firouzja
    "4168119", # Ian Nepomniachtchi
    "8603405", # Yi Wei
    "5000017", # Viswanathan Anand
]

In [20]:
# List of Top Chess Player's Chess.com Usernames
# Same order as the top_players list

top_player_usernames = [
    "magnuscarlsen", # Magnus Carlsen
    "fabianocaruana", # Fabiano Caruana
    "hikaru", # Hikaru Nakamura
    "ghandeevam2003", # Arjun Erigaisi
    "gukeshdommaraju", # Gukesh Dommaraju
    "chesswarrior7197", # Nodirbek Abdusattorov
    "firouzja2003", # Alireza Firouzja
    "lachesisq", # Ian Nepomniachtchi
    "wei-yi", # Yi Wei
    "thevish", # Viswanathan Anand
]

### SERPAPI Setup
Setting up the SERPAPI API Pull

In [2]:
# Pulls key from .env 
SERPAPI_KEY = os.getenv("serpapi_key")

In [3]:
# Fetches Google Trends data since 2004 for a given keyword and country code - saves it to a JSON file.
def fetch_google_trends(country_code, keyword, SERPAPI_KEY):    
    
    # Keeps the file names uniform - changing artist_name from here on
    keyword_filename = keyword

    # Replace underscores with spaces in the artist's name
    keyword = keyword_filename.replace("_", " ")
    
    # Parameters for the API call
    params = {
        "engine": "google_trends",
        "q": keyword,
        "data_type": "TIMESERIES",
        "date": "all",  # Specify the time range
        "api_key": SERPAPI_KEY , # Replace with your SerpAPI key
        "geo": country_code , 
    }
    
    # Fetch data using SerpAPI
    search = GoogleSearch(params)
    results = search.get_dict()
    
    # Extract 'interest_over_time' section
    interest_over_time = results.get("interest_over_time", {})
    
    # Define the output file path
    output_path = f"../data/GTrends/{keyword}_{country_code}_GTrends.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(interest_over_time, file, indent=4)
    
    return print(f"Google Trends data successfully saved to {output_path}")

In [4]:
# Example GB
# fetch_google_trends("GB", "Chess", SERPAPI_KEY)

### FIDE Webscraper Setup
Here, we are setting up the FIDE webscraper to get a list of the current top 10 chess players.

Follow the install instructions on the project README.md in order to set up the correct environment

In [13]:
# Takes a player's fide number and returns player's information and ELO history
def fide_data(fide_number) :
    # Replace with the actual FIDE number
    url = f"http://localhost:3000/player/{fide_number}/info?include_history=true"

    # Make a GET request
    response = requests.get(url)

    # Converts the HTTP response's JSON content into a Python object that can be serialized.
    data = response.json()

    # Creates the name and destination of the json file
    output_path = f"../data/Fide/{fide_number}_fide.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    
    return print(f"FIDE data successfully saved to {output_path}")

In [15]:
for player_id in top_players:
    fide_data(player_id)

FIDE data successfully saved to ../data/Fide/1503014_fide.json
FIDE data successfully saved to ../data/Fide/2020009_fide.json
FIDE data successfully saved to ../data/Fide/2016192_fide.json
FIDE data successfully saved to ../data/Fide/35009192_fide.json
FIDE data successfully saved to ../data/Fide/46616543_fide.json
FIDE data successfully saved to ../data/Fide/14204118_fide.json
FIDE data successfully saved to ../data/Fide/12573981_fide.json
FIDE data successfully saved to ../data/Fide/4168119_fide.json
FIDE data successfully saved to ../data/Fide/8603405_fide.json
FIDE data successfully saved to ../data/Fide/5000017_fide.json


###  Chess.com API Set Up

We use the Chess.com Published Data API to return the top players

In [19]:
# Testing cURL for the player endpoint

! curl -v https://api.chess.com/pub/player/magnuscarlsen/stats

*   Trying 104.18.140.67:443...
* Connected to api.chess.com (104.18.140.67) port 443 (#0)
* ALPN, offering h2
* ALPN, offering http/1.1
*  CAfile: /etc/ssl/certs/ca-certificates.crt
*  CApath: /etc/ssl/certs
* TLSv1.0 (OUT), TLS header, Certificate Status (22):
* TLSv1.3 (OUT), TLS handshake, Client hello (1):
* TLSv1.2 (IN), TLS header, Certificate Status (22):
* TLSv1.3 (IN), TLS handshake, Server hello (2):
* TLSv1.2 (IN), TLS header, Finished (20):
* TLSv1.2 (IN), TLS header, Supplemental data (23):
* TLSv1.3 (IN), TLS handshake, Encrypted Extensions (8):
* TLSv1.3 (IN), TLS handshake, Certificate (11):
* TLSv1.3 (IN), TLS handshake, CERT verify (15):
* TLSv1.3 (IN), TLS handshake, Finished (20):
* TLSv1.2 (OUT), TLS header, Finished (20):
* TLSv1.3 (OUT), TLS change cipher, Change cipher spec (1):
* TLSv1.2 (OUT), TLS header, Supplemental data (23):
* TLSv1.3 (OUT), TLS handshake, Finished (20):
* SSL connection using TLSv1.3 / TLS_AES_256_GCM_SHA384
* ALPN, server accepted to us