In [3]:
import os
import json
import requests
import pandas as pd

from serpapi import GoogleSearch

from dotenv import load_dotenv

from tqdm.notebook import tqdm
tqdm.pandas()

### SERPAPI Setup
Setting up the SERPAPI API Pull

In [2]:
# Pulls key from .env 
SERPAPI_KEY = os.getenv("serpapi_key")

In [3]:
# Fetches Google Trends data since 2004 for a given keyword and country code - saves it to a JSON file.

# Not finished - parameters need to be set up to pull time series data in "data_type" and country code specification needs to be added
# Currently isn't pulling correct information

def fetch_google_trends(country_code, keyword, SERPAPI_KEY):    
    
    # Keeps the file names uniform - changing artist_name from here on
    keyword_filename = keyword

    # Replace underscores with spaces in the artist's name
    keyword = keyword_filename.replace("_", " ")
    
    # Parameters for the API call
    params = {
        "engine": "google_trends",
        "q": keyword,
        "data_type": "TIMESERIES",
        "date": "all",  # Specify the time range
        "api_key": SERPAPI_KEY , # Replace with your SerpAPI key
        "geo": country_code , 
    }
    
    # Fetch data using SerpAPI
    search = GoogleSearch(params)
    results = search.get_dict()
    
    # Extract 'interest_over_time' section
    interest_over_time = results.get("interest_over_time", {})
    
    # Define the output file path
    output_path = f"../data/GTrends/{keyword}_{country_code}_GTrends.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(interest_over_time, file, indent=4)
    
    return print(f"Google Trends data successfully saved to {output_path}")

In [4]:
# Example GB
# fetch_google_trends("GB", "Chess", SERPAPI_KEY)

In [5]:
# Example US
# fetch_google_trends("US", "Chess", SERPAPI_KEY)

### FIDE Webscraper Setup
Here, we are setting up the FIDE webscraper to get a list of the current top 10 chess players.

Follow the install instructions on the project README.md in order to set up the correct environment

In [7]:
def fide_data(fide_number) :
    # Replace with the actual FIDE number
    url = f"http://localhost:3000/player/{fide_number}/info?include_history=true"

    # Make a GET request
    response = requests.get(url)

    # Check response status
    if response.status_code == 200:
        player_info = response.json()
        print("Player Info:", player_info)
    else:
        print("Error:", response.status_code, response.text)

    data = response.json()

    output_path = f"../data/Fide/{fide_number}_fide.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(data, file, indent=4)
    
    return print(f"FIDE data successfully saved to {output_path}")

In [10]:
# Top Chess Players FIDE Number
top_players = [
    "1503014", # Magnus Carlsen
    "2020009", # Fabiano Caruana
    "2016192", # Hikaru Nakamura
    "35009192", # Arjun Erigaisi
    "46616543", # Gukesh Dommaraju
    "14204118", # Nodirbek Abdusattorov
    "12573981", # Alireza Firouzja
    "4168119", # Ian Nepomniachtchi
    "8603405", # Yi Wei
    "5000017", # Viswanathan Anand
]

In [12]:
for player_id in top_players:
    fide_data(player_id)

Player Info: {'name': 'Carlsen, Magnus ', 'federation': 'Norway', 'birth_year': 1990, 'sex': 'Male', 'title': 'Grandmaster', 'standard_elo': '', 'rapid_elo': '', 'blitz_elo': '', 'world_rank_all_players': 1, 'world_rank_active_players': 1, 'national_rank_all_players': 1, 'national_rank_active_players': 1, 'continental_rank_all_players': 1, 'continental_rank_active_players': 1, 'history': [{'date': '2024-Dec', 'numeric_date': 202412, 'standard': '2831', 'num_standard_games': '0', 'rapid': '2838', 'num_rapid_games': '9', 'blitz': '2890', 'num_blitz_games': '18'}, {'date': '2024-Nov', 'numeric_date': 202411, 'standard': '2831', 'num_standard_games': '0', 'rapid': '2825', 'num_rapid_games': '10', 'blitz': '2893', 'num_blitz_games': '6'}, {'date': '2024-Oct', 'numeric_date': 202410, 'standard': '2831', 'num_standard_games': '8', 'rapid': '2834', 'num_rapid_games': '0', 'blitz': '2888', 'num_blitz_games': '0'}, {'date': '2024-Sep', 'numeric_date': 202409, 'standard': '2832', 'num_standard_ga

### Obtaining Chess.com Data

Finally, we are defining a function to output player data based on the list of top 10 players, obtained through the FIDE webscraper

In [None]:
# Testing cURL for the player endpoint

# ! curl -v https://api.chess.com/pub/player/hikaru