## NB01 - Data Collection

In [3]:
# Importing necessary packages
import os
import json
import requests
import pandas as pd
import numpy as np
import subprocess

from serpapi import GoogleSearch

from dotenv import load_dotenv

from functions import *

from tqdm.notebook import tqdm
tqdm.pandas()

In [None]:
# Top Chess Players in the world as of December 16, 2024
# Ordering within the dictionary does not matter
top_players =[
    {'name': 'Magnus_Carlsen', 'fide_number': '1503014', 'chess_com_username': 'magnuscarlsen','country':'NO'},
    {'name': 'Fabiano_Caruana', 'fide_number': '2020009', 'chess_com_username': 'fabianocaruana','country':'US'},
    {'name': 'Hikaru_Nakamura', 'fide_number': '2016192', 'chess_com_username': 'hikaru','country':'US'},
    {'name': 'Arjun_Erigaisi', 'fide_number': '35009192', 'chess_com_username': 'ghandeevam2003','country':'IN'},
    {'name': 'Gukesh_Dommaraju', 'fide_number': '46616543', 'chess_com_username': 'gukeshdommaraju','country':'IN'},
    {'name': 'Nodirbek_Abdusattorov', 'fide_number': '14204118', 'chess_com_username': 'chesswarrior7197','country':'UZ'},
    {'name': 'Alireza_Firouzja', 'fide_number': '12573981', 'chess_com_username': 'firouzja2003','country':'FR'},
    {'name': 'Ian_Nepomniachtchi', 'fide_number': '4168119', 'chess_com_username': 'lachesisq','country':'RU'},
    {'name': 'Yi_Wei', 'fide_number': '8603405', 'chess_com_username': 'wei-yi','country':'CN'},
    {'name': 'Viswanathan_Anand', 'fide_number': '5000017', 'chess_com_username': 'thevish','country':'IN'}
]

In [None]:
# Testing the fetch_fide_data function with Magnus Carlsen's fide number
fide_number = 1503014
fide_data = fetch_fide_data(fide_number)
print(fide_data)

In [None]:
# List of FIDE IDs for the top players (example FIDE IDs)
top_10_fide_numbers = [player['fide_number'] for player in top_players]


# Fetch data for each FIDE ID
all_data = []
for fide_number in top_10_fide_numbers:
    player_data = fetch_fide_data_with_history(fide_number)
    all_data.extend(player_data)

# Convert combined data to a pandas DataFrame
df_combined = pd.DataFrame(all_data)

# Display the DataFrame
print(df_combined)


In [None]:
df_combined

In [None]:
df_combined.to_csv("../data/Fide/fide_data.csv")

In [None]:
df_combined[['standard', 'rapid', 'blitz']] = df_combined[['standard', 'rapid', 'blitz']].replace(r'^\s*$', np.nan, regex=True)

In [None]:
df_combined[df_combined[['standard', 'rapid', 'blitz']].isnull().any(axis=1)]

### Step 3: Google Trends data via SERPAPI

In [None]:
# Pulls key from .env 
SERPAPI_KEY = os.getenv("serpapi_key")

In [None]:
# Fetches Google Trends data since 2004 for a given keyword and country code - saves it to a JSON file.
def fetch_google_trends(country_code, keyword, destination, SERPAPI_KEY):    
    # destination is used to craft the file name and where the file is saved

    # Keeps the file names uniform - changing artist_name from here on
    keyword_filename = keyword

    # Replace underscores with spaces in the artist's name
    keyword = keyword_filename.replace("_", " ")
    
    # Parameters for the API call
    params = {
        "engine": "google_trends",
        "q": keyword,
        "data_type": "TIMESERIES",
        "date": "all",  # Specify the time range
        "api_key": SERPAPI_KEY , # Replace with your SerpAPI key
        "geo": country_code , 
    }
    
    # Fetch data using SerpAPI
    search = GoogleSearch(params)
    results = search.get_dict()
    
    # Extract 'interest_over_time' section
    interest_over_time = results.get("interest_over_time", {})
    
    # Define the output file path
    output_path = f"../data/{destination}/{keyword}_{country_code}_{destination}.json"

    # Save the data as a JSON file
    with open(output_path, "w") as file:
        json.dump(interest_over_time, file, indent=4)
    
    return print(f"Google Trends data successfully saved to {output_path}")

In [None]:
We will now retrieve the google trends data for the keyword search of "Chess" inside each of the top ten players country. 

# Retrieves the Country Google Trends data (GTrends_Country
destination = "GTrends_Country"

# Remove the hashtag below to run the loop
# for countries in player_countries:
    keyword = "Chess"
    country_code = countries
    
    # Call the fetch_google_trends function
    fetch_google_trends(country_code, keyword, destination, SERPAPI_KEY)