## NB01 - Data Collection

In [3]:
# Importing necessary packages
import os
import json
import requests
import pandas as pd
import numpy as np
import subprocess
import time 

from serpapi import GoogleSearch

from dotenv import load_dotenv

from functions import *

from tqdm.notebook import tqdm
tqdm.pandas()

In [None]:
# Top Chess Players in the world as of December 16, 2024
# Ordering within the dictionary does not matter
top_players =[
    {'name': 'Magnus_Carlsen', 'fide_number': '1503014', 'chess_com_username': 'magnuscarlsen','country':'NO'},
    {'name': 'Fabiano_Caruana', 'fide_number': '2020009', 'chess_com_username': 'fabianocaruana','country':'US'},
    {'name': 'Hikaru_Nakamura', 'fide_number': '2016192', 'chess_com_username': 'hikaru','country':'US'},
    {'name': 'Arjun_Erigaisi', 'fide_number': '35009192', 'chess_com_username': 'ghandeevam2003','country':'IN'},
    {'name': 'Gukesh_Dommaraju', 'fide_number': '46616543', 'chess_com_username': 'gukeshdommaraju','country':'IN'},
    {'name': 'Nodirbek_Abdusattorov', 'fide_number': '14204118', 'chess_com_username': 'chesswarrior7197','country':'UZ'},
    {'name': 'Alireza_Firouzja', 'fide_number': '12573981', 'chess_com_username': 'firouzja2003','country':'FR'},
    {'name': 'Ian_Nepomniachtchi', 'fide_number': '4168119', 'chess_com_username': 'lachesisq','country':'RU'},
    {'name': 'Yi_Wei', 'fide_number': '8603405', 'chess_com_username': 'wei-yi','country':'CN'},
    {'name': 'Viswanathan_Anand', 'fide_number': '5000017', 'chess_com_username': 'thevish','country':'IN'}
]

### Step 1: Fetching FIDE Data

In [None]:
# Testing the fetch_fide_data function with Magnus Carlsen's fide number
fide_number = 1503014
fide_data = fetch_fide_data(fide_number)
print(fide_data)

In [None]:
# List of FIDE IDs for the top players
top_10_fide_numbers = [player['fide_number'] for player in top_players]

# Fetching data for each FIDE ID
all_data = []
for fide_number in top_10_fide_numbers:
    player_data = fetch_fide_data_with_history(fide_number)
    all_data.extend(player_data)

# Converting combined data to a pandas dataframe
df_combined = pd.DataFrame(all_data)

# Displaying the dataframe
df_combined


In [None]:
# Converting the dataframe into a CSV
df_combined.to_csv("../data/Fide/fide_data.csv")

# 
df_combined[['standard', 'rapid', 'blitz']] = df_combined[['standard', 'rapid', 'blitz']].replace(r'^\s*$', np.nan, regex=True)

# 
df_combined[df_combined[['standard', 'rapid', 'blitz']].isnull().any(axis=1)]

### Step 2: Google Trends Data via SERPAPI

In [None]:
We will now retrieve the google trends data for the keyword search of "Chess" inside each of the top ten players country. 

# Retrieves the Country Google Trends data (GTrends_Country
destination = "GTrends_Country"

# Remove the hashtag below to run the loop
# for countries in player_countries:
    keyword = "Chess"
    country_code = countries
    
    # Call the fetch_google_trends function
    fetch_google_trends(country_code, keyword, destination, SERPAPI_KEY)



In [None]:
# Creating pandas dataframe and CSV for players
all_player_gtrends =[]
for players in top_players:
    name = players["name"].replace("_"," ")
    country = players["country"]
    all_player_gtrends.extend(gtrends_players(name,country))
    
final_player_gtrends_df = pd.DataFrame(all_player_gtrends)
final_player_gtrends_df.to_csv("../data/GTrends_Player/players_gtrends_data.csv")

In [None]:
# Creating pandas dataframe and CSV for countries
all_country_gtrends =[]
for country in countries:
    all_country_gtrends.extend(gtrends_country(country))
final_country_gtrends_df = pd.DataFrame(all_country_gtrends)
final_country_gtrends_df.to_csv("../data/GTrends_Country/country_gtrends_data.csv")

### Step 3: Fetching Chess.com Data

In [None]:
# Defining the base URL for the API, using the endpoint "stats"
base_url = "https://api.chess.com/pub/player/{}/stats"
headers = {
    "User-Agent": "Python script for educational use"
}

In [None]:
# Fetchşng stats for all players and storing it in a pandas dataframe
df_players_stats = fetch_all_players_stats(top_players)

# Displaying the dataframe
df_players_stats

In [None]:
# Converting the pandas dataframe into a CSV file
df_players_stats.to_csv("../data/Chess_com/chesscom.csv")