In [1]:
#In this code ı have used chess.com api in order to extract all my games played on chess.com
#So, basically this code creates and api url and extracts the info in that url

import requests
import pandas as pd
from datetime import datetime


#this function gets the archives that contains all the games ı have played monthly since my first game
def fetch_game_archives(username):
    """Fetch all game archive URLs for a user."""
    url = f"https://api.chess.com/pub/player/{username}/games/archives"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json().get("archives", [])
    else:
        print(f"Error fetching archives: {response.status_code}")
        return []



#this function extracts the games from spesific archive url
def fetch_games_from_archive(archive_url):
    """Fetch all games from a specific archive."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    response = requests.get(archive_url, headers=headers)
    if response.status_code == 200:
        return response.json().get("games", [])
    else:
        print(f"Error fetching games from {archive_url}: {response.status_code}")
        return []



def save_games_to_csv(games, filename="C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\DSA210_Term_Project_bugrahan.yapilmisev\\Data_Extraction\\raw_chess_games.csv"):
    """Save filtered game data to a CSV file."""
    df = pd.DataFrame(games)
    df.to_csv(filename, index=False)
    print(f"Filtered games saved to {filename}")



def main():
    username = "kosotbaba" # my chess.com username
    
    # Fetch all archive URLs
    archives = fetch_game_archives(username)
    if not archives:
        print("No archives found.")
        return

    # Define the filter date (September 20, 2022) as a UNIX timestamp
    filter_date = datetime(2022, 9, 20)
    filter_timestamp = int(filter_date.timestamp())

    all_games = []
    for archive_url in archives:
        print(f"Fetching games from {archive_url}...")
        games = fetch_games_from_archive(archive_url)
        for game in games:
            # Filter games by date
            end_time = game.get("end_time", 0)  # Default to 0 if 'end_time' is missing
            if end_time >= filter_timestamp:
                game_data = {
                    "url": game.get("url"),
                    "time_control": game.get("time_control"),
                    "end_time": datetime.fromtimestamp(end_time).strftime("%Y-%m-%d %H:%M:%S"),
                    "rated": game.get("rated"),
                    "fen": game.get("fen"),
                    "pgn": game.get("pgn"),
                    "white_player": game.get("white", {}).get("username"),
                    "white_rating": game.get("white", {}).get("rating"),
                    "black_player": game.get("black", {}).get("username"),
                    "black_rating": game.get("black", {}).get("rating"),
                    "result": game.get("white", {}).get("result"),
                }
                all_games.append(game_data)

    # Save filtered games to CSV
    save_games_to_csv(all_games)

if __name__ == "__main__":
    main()


Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2017/12...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2018/01...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2018/02...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2018/03...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2018/04...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2018/05...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2018/09...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2018/10...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2019/01...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2019/03...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2020/12...
Fetching games from https://api.chess.com/pub/player/kosotbaba/games/2021/03...
Fetching games from https://api.chess.co

In [2]:
import pandas as pd

# Load the dataset
file_path = "C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\DSA210_Term_Project_bugrahan.yapilmisev\\Data_Extraction\\raw_chess_games.csv"
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,url,time_control,end_time,rated,fen,pgn,white_player,white_rating,black_player,black_rating,result
0,https://www.chess.com/game/live/57361114845,600,2022-09-20 00:28:09,True,8/4k3/2Q5/4R3/5p1p/1P6/6KP/8 b - -,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat...",CapitaineJo,631,kosotbaba,634,win
1,https://www.chess.com/game/live/57361712859,180,2022-09-20 00:34:37,True,1B6/8/2b5/3k4/6R1/4p1P1/8/6K1 w - -,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat...",kosotbaba,569,austincarter,619,timeout
2,https://www.chess.com/game/live/57364624969,180,2022-09-20 01:17:09,True,R6N/1pp1k1pp/4pn2/8/8/2K1P3/PnP3PP/5R2 b - -,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat...",kosotbaba,616,potofigueroa,557,win
3,https://www.chess.com/game/live/57364681925,600,2022-09-20 01:26:04,True,4k2r/pp2N1pp/4np2/8/8/P7/2P2PPP/R1Br2K1 w k -,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat...",elie_sahyoun,582,kosotbaba,641,checkmated
4,https://www.chess.com/game/live/57365280945,600,2022-09-20 01:29:54,True,r2q3k/1pp1r1pp/p1n2n2/3p1B2/3P4/2B2P1N/PPP3PP/...,"[Event ""Live Chess""]\n[Site ""Chess.com""]\n[Dat...",kosotbaba,632,memetea,621,resigned


In [3]:

# Here I have filtered my data since it contains many irrelevant information for my project topic.

import pandas as pd

# Load the dataset
file_path = "C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\DSA210_Term_Project_bugrahan.yapilmisev\\Data_Extraction\\raw_chess_games.csv"
df = pd.read_csv(file_path)

def standardize_result(row):
    pgn = row.get("pgn")
    pgn_list = pgn.split("\n")
    pgn_correct = ""
    for i in pgn_list:
        space_idx = i.find(" ")
        check_label = i[1:space_idx]
        if check_label.strip() not in ["Site","Round", "ECO", "ECOUrl", "TimeControl", "Timezone", "UTCDate", "UTCTime","CurrentPosition","Link"]:
            if i != pgn_list[-1]:
                pgn_correct += i + "\n"
            else:
                pgn_correct += i
    return pgn_correct

df["pgn"] = df.apply(standardize_result, axis=1)

df_cleaned = df
# Save the modified dataset to a new file
output_path = "C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\DSA210_Term_Project_bugrahan.yapilmisev\\Data_Extraction\\filtered_chess_games.csv"
df_cleaned.to_csv(output_path, index=False)

print(f"Cleaned dataset saved to {output_path}")


Cleaned dataset saved to C:\Users\kosot\OneDrive\Masaüstü\CS210-HW2\Project\DSA210_Term_Project_bugrahan.yapilmisev\Data_Extraction\filtered_chess_games.csv


In [4]:
import pandas as pd

# Load the dataset
file_path = "C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\filtered_chess_games.csv"
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,url,time_control,end_time,rated,fen,pgn,white_player,white_rating,black_player,black_rating,result
0,https://www.chess.com/game/live/57361114845,600,2022-09-20 00:28:09,True,8/4k3/2Q5/4R3/5p1p/1P6/6KP/8 b - -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",CapitaineJo,631,kosotbaba,634,win
1,https://www.chess.com/game/live/57361712859,180,2022-09-20 00:34:37,True,1B6/8/2b5/3k4/6R1/4p1P1/8/6K1 w - -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",kosotbaba,569,austincarter,619,timeout
2,https://www.chess.com/game/live/57364624969,180,2022-09-20 01:17:09,True,R6N/1pp1k1pp/4pn2/8/8/2K1P3/PnP3PP/5R2 b - -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",kosotbaba,616,potofigueroa,557,win
3,https://www.chess.com/game/live/57364681925,600,2022-09-20 01:26:04,True,4k2r/pp2N1pp/4np2/8/8/P7/2P2PPP/R1Br2K1 w k -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",elie_sahyoun,582,kosotbaba,641,checkmated
4,https://www.chess.com/game/live/57365280945,600,2022-09-20 01:29:54,True,r2q3k/1pp1r1pp/p1n2n2/3p1B2/3P4/2B2P1N/PPP3PP/...,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",kosotbaba,632,memetea,621,resigned


In [5]:
import pandas as pd

# Load the dataset
file_path = "C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\DSA210_Term_Project_bugrahan.yapilmisev\\Data_Extraction\\filtered_chess_games.csv"
df = pd.read_csv(file_path)
# Function to standardize the result
def standardize_result(row,):
    pgn = row.get("pgn")
    termination = pgn.split("\n")[7]
    if termination.find('[Termination "kosotbaba won') != -1:
        return "win"
    else:
        if termination.find('[Termination "Game drawn')!= -1:
            
            return "drawn"
        return "lose"
    

# Apply the standardization function to the result column
df["result_standardized"] = df.apply(standardize_result, axis=1)

# Save the modified dataset to a new file
df = df.drop(columns=["result"])
output_path = "C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\DSA210_Term_Project_bugrahan.yapilmisev\\Data_Extraction\\filtered_chess_games_modified_png.csv"
df.to_csv(output_path, index=False)

print(f"Standardized dataset saved to {output_path}")


Standardized dataset saved to C:\Users\kosot\OneDrive\Masaüstü\CS210-HW2\Project\DSA210_Term_Project_bugrahan.yapilmisev\Data_Extraction\filtered_chess_games_modified_png.csv


In [6]:
import pandas as pd

# Load the dataset
file_path = "C:\\Users\\kosot\\OneDrive\\Masaüstü\\CS210-HW2\\Project\\DSA210_Term_Project_bugrahan.yapilmisev\\Data_Extraction\\filtered_chess_games_modified_png.csv"
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,url,time_control,end_time,rated,fen,pgn,white_player,white_rating,black_player,black_rating,result_standardized
0,https://www.chess.com/game/live/57361114845,600,2022-09-20 00:28:09,True,8/4k3/2Q5/4R3/5p1p/1P6/6KP/8 b - -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",CapitaineJo,631,kosotbaba,634,lose
1,https://www.chess.com/game/live/57361712859,180,2022-09-20 00:34:37,True,1B6/8/2b5/3k4/6R1/4p1P1/8/6K1 w - -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",kosotbaba,569,austincarter,619,lose
2,https://www.chess.com/game/live/57364624969,180,2022-09-20 01:17:09,True,R6N/1pp1k1pp/4pn2/8/8/2K1P3/PnP3PP/5R2 b - -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",kosotbaba,616,potofigueroa,557,win
3,https://www.chess.com/game/live/57364681925,600,2022-09-20 01:26:04,True,4k2r/pp2N1pp/4np2/8/8/P7/2P2PPP/R1Br2K1 w k -,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",elie_sahyoun,582,kosotbaba,641,win
4,https://www.chess.com/game/live/57365280945,600,2022-09-20 01:29:54,True,r2q3k/1pp1r1pp/p1n2n2/3p1B2/3P4/2B2P1N/PPP3PP/...,"[Event ""Live Chess""]\n[Date ""2022.09.19""]\n[Wh...",kosotbaba,632,memetea,621,lose
