In [None]:
# For reference   

 # info_dict = {
    #     "game_id": unique_game_id,
    #     "transcript": game_state,
    #     "result": result,
    #     "player_one": player_one_title,
    #     "player_two": player_two_title,
    #     "player_one_score": player_one_score,
    #     "player_two_score": player_two_score,
    #     "player_one_illegal_moves": player_one_illegal_moves,
    #     "player_two_illegal_moves": player_two_illegal_moves,
    #     "player_one_resignation": player_one_resignation,
    #     "player_two_resignation": player_two_resignation,
    #     "game_title": f"{player_one_title} vs. {player_two_title}",
    #     "number_of_moves": board.fullmove_number,
    #     "time_taken": total_time,
    # }

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def graph_player_gpt_score(player: str, df: pd.DataFrame) -> None:
    if player != "player_one" and player != "player_two":
        raise ValueError("player must be either 'player_one' or 'player_two'")
    
    # Convert the player_one_score column to numeric type (if it's not already)
    df[f"{player}_score"] = pd.to_numeric(df[f"{player}_score"], errors="coerce")
    print(df[f"{player}_score"].unique())

    # Compute average score of player_one grouped by game_title
    average_scores = df.groupby("game_title")[f"{player}_score"].mean()

    # Display the result
    print(average_scores)

    average_scores.index = average_scores.index.str.split(' vs. ').str[1].str.strip()

    average_scores.plot(kind="bar", figsize=(10, 5))
    plt.title(f"Average gpt-3.5-turbo-instruct Win Rate by Opponent Skill over 15 rounds")
    plt.ylabel("Average Score")
    plt.xlabel("Stockfish Level (time = 0.1 seconds per move)")
    plt.xticks(rotation=45)
    plt.tight_layout()
    # plt.show()
    plt.savefig("gpt-3.5-turbo-instruct-win-rate.png")

filename = "logs/ckpt_16_pt_vs_stockfish_sweep.csv"
filename = "logs/ckpt_8_pt_vs_stockfish_sweep.csv"
# filename = "logs/2325_lichess_ckpt_pt_vs_stockfish_sweep.csv"
# filename = "logs/ckpt_synthetic_split_no_opt_pt_vs_stockfish_sweep.csv"
filename = "logs/lichess_16_ckpt_pt_vs_stockfish_sweep.csv"
# filename = "logs/lichess_8layers_ckpt_no_optimizer_pt_vs_stockfish_sweep.csv"
# filename = "logs/lichess_stockfish_mix_8layers_ckpt_no_optimizer_pt_vs_stockfish_sweep.csv"
# filename = "logs/lichess_8layers_results_ckpt_no_optimizer_pt_vs_stockfish_sweep.csv"
# filename = "logs/12_25_01_coeff_pt_vs_stockfish_sweep.csv"
# filename = "logs/no_activation_pt_vs_stockfish_sweep copy.csv"
# filename = "logs/lichess_train_layer_12_pos_start_25_activations_pt_vs_stockfish_sweep.csv"
# filename = "logs/lichess_train_layer_12_pos_start_25_activations_pt_10_random_moves_vs_stockfish_sweep.csv"
# filename = "logs/no_activation_pt_10_random_moves_vs_stockfish_sweep.csv"
# filename = "logs/lichess_200k_bins_16layers_ckpt_no_optimizer_pt_vs_stockfish_sweep.csv"
df = pd.read_csv(filename)
print(len(df))

graph_player_gpt_score("player_one", df)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os


def graph_player_gpt_scores(player: str, filenames: list[str]) -> None:
    if player != "player_one" and player != "player_two":
        raise ValueError("player must be either 'player_one' or 'player_two'")
    
    dfs = {}

    for filename in filenames:
        dfs[filename] = pd.read_csv(filename)

    modified = ""

    avg_scores_list = []

    for df in dfs.values():

        # Convert the player_score column to numeric type (if it's not already)
        df[f"{modified}{player}_score"] = pd.to_numeric(
            df[f"{modified}{player}_score"], errors="coerce"
        )

        # Compute average score of player grouped by game_title
        avg_scores = df.groupby("game_title")[f"{modified}{player}_score"].mean()
        avg_scores_list.append(avg_scores)

    # Plotting
    plt.figure(figsize=(10, 5))
    # for i, avg_scores in enumerate(avg_scores_list):
    #     avg_scores.index = avg_scores.index.str.split(' vs. ').str[1].str.strip()
    #     avg_scores.plot(kind="bar", position=i, label=labels[i])

    for filename in filenames:
        df = dfs[filename]
        # df = df[df['player_two'] != 'Stockfish 10'].copy()
        # Convert the player_score column to numeric type (if it's not already)
        # df[f"{modified}{player}_score"] = pd.to_numeric(df[f"{modified}{player}_score"], errors="coerce")

        # Compute average score of player grouped by game_title
        df = df[~df["player_two"].isin(["Stockfish -1", "Stockfish -2", "Stockfish -3"])]
        avg_scores = df.groupby("game_title")[f"{modified}{player}_score"].mean()

        # Clean up the index for better labeling
        avg_scores.index = avg_scores.index.str.split(" vs. ").str[1].str.strip()

        # Plotting
        plt.plot(avg_scores, label=labels[filename], marker="o")  # Line chart with markers
        # print(label)
        # print(avg_scores)

    plt.title(f"Average Chess-GPT Win Rate by Opponent Skill over 1000 rounds")
    plt.ylabel("Average Win Rate")
    plt.xlabel("Stockfish Level (time = 0.1 seconds per move)")
    plt.xticks(rotation=45)
    plt.legend()
    # plt.legend(loc='upper left', bbox_to_anchor=(1, 1))
    plt.tight_layout()
    plt.savefig("llm-win-rate.png")
    plt.show()  # Uncomment this if you want to display the plot


file_dir = "logs/grid_search_llms/"
filenames = [
    f"{file_dir}{filename}" for filename in os.listdir(file_dir) if filename.endswith(".csv")
]
print(filenames)
filenames = [filename for filename in filenames if "0_1" in filename]
filenames = [filename for filename in filenames if "130b" not in filename]

labels = {
        "logs/grid_search_llms/lichess_stockfish_mix_8layers_ckpt_no_optimizer_pt_0_1_time_vs_stockfish_sweep.csv": "Lichess / Stockfish Mix - 8 Layers",
        "logs/grid_search_llms/lichess_8layers_gt_18k_ckpt_no_optimizer_pt_0_1_time_vs_stockfish_sweep.csv": "Lichess > 1800 ELO - 8 Layers",
        "logs/grid_search_llms/stockfish_16layers_ckpt_no_optimizer_pt_0_1_time_vs_stockfish_sweep.csv": "Stockfish - 16 layers",
        "logs/grid_search_llms/stockfish_8layers_ckpt_no_optimizer_pt_0_1_time_vs_stockfish_sweep.csv": "Stockfish - 8 layers",
        "logs/grid_search_llms/lichess_8layers_ckpt_no_optimizer_pt_0_1_time_vs_stockfish_sweep.csv": "Lichess - 8 Layers",
        "logs/grid_search_llms/lichess_16layers_ckpt_no_optimizer_pt_0_1_time_vs_stockfish_sweep.csv": "Lichess - 16 Layers",
}

graph_player_gpt_scores("player_one", filenames)

In [None]:
import os

filedir = "logs/grid_search_llms/"
filenames = [f"{filedir}{file}" for file in os.listdir(filedir) if file.endswith(".csv")]
print(filenames)

In [None]:
import pandas as pd
import chess
import chess.engine
import chess.pgn
import io

# This fixes the following error:
# Chess-GPT has a maximum context size of 1024 tokens, enough for 180 half-moves. The analysis code mistakenly called an active game at 180 half-moves a draw. This scores each game using Stockfish
# To use a centipawn scale, where any player above 100 centipawn advantage is a win, and it is a draw otherwise.

def modify_scores(df, stockfish_path, turn=90):
    engine = chess.engine.SimpleEngine.popen_uci(stockfish_path)

    for index, row in df.iterrows():
        if row['result'] == '*':
            game = chess.pgn.read_game(io.StringIO(row['transcript']))
            board = game.board()
            
            # Go to turn 90
            for move in game.mainline_moves():
                board.push(move)
                if board.fullmove_number == turn and board.turn == chess.BLACK:
                    break

            # Analyze the position
            info = engine.analyse(board, chess.engine.Limit(time=0.1))
            score = info["score"].white().score(mate_score=10000)

            # Modify player_one_score based on the score
            if score < 100:
                modified_score = "0"
            elif score > 100:
                modified_score = "1"
            else:
                modified_score = "1/2"
            
            df.at[index, 'modified_player_one_score'] = modified_score
        else:
            df.at[index, 'modified_player_one_score'] = row['player_one_score']

    engine.quit()
    return df

# Load the DataFrame
for filename in filenames:
    df = pd.read_csv(filename)

    # Path to Stockfish
    stockfish_path = "/usr/games/stockfish"

    # Modify the scores
    df = modify_scores(df, stockfish_path)

    # Save the modified DataFrame
    df.to_csv(filename, index=False)

    print("Modified DataFrame saved.")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def graph_player_gpt_score(player: str, df: pd.DataFrame) -> None:
    if player not in ["player_one", "player_two"]:
        raise ValueError("player must be either 'player_one' or 'player_two'")
    
    # Convert the score column to numeric type (if it's not already)
    df[f"{player}_score"] = pd.to_numeric(df[f"{player}_score"], errors="coerce")
    df[f"modified_{player}_score"] = pd.to_numeric(df[f"modified_{player}_score"], errors="coerce")

    # Categorize each game result into win, loss, or draw
    def categorize(score):
        if score == 1:
            return 'Win'
        elif score == 0:
            return 'Loss'
        else:
            return 'Draw'
        
    df = df[df['player_two'] != 'Stockfish 10'].copy()

    df[f"{player}_result"] = df[f"modified_{player}_score"].apply(categorize)

    # Group by game title and result type, then count occurrences
    result_counts = df.groupby(["player_two", f"{player}_result"]).size().unstack().fillna(0)
    result_counts = result_counts[['Win', 'Draw', 'Loss']]

    # Create a stacked bar chart
    result_counts.plot(kind="bar", stacked=True, figsize=(10, 5))

    plt.suptitle(f"50M Chess-GPT vs Stockfish")
    # plt.title("Stockfish 0: ELO ~1300, Stockfish 9: ELO ~2700")
    plt.ylabel("Number of Games")
    plt.xlabel("Stockfish Level (time = 0.1 seconds per move, Stockfish 0: ELO ~1300, Stockfish 9: ELO ~2700)")
    # plt.text(0.5, -0.4, "Stockfish 0: ELO ~1300, Stockfish 9: ELO ~2700", ha='center', va='center', transform=plt.gca().transAxes)

    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f"50M-Chess-GPT-win-rate.png")

# Example usage
filename = "logs/grid_search_llms/stockfish_16layers_ckpt_no_optimizer_pt_0_1_time_vs_stockfish_sweep.csv"
df = pd.read_csv(filename)
print(len(df))
graph_player_gpt_score("player_one", df)


In [None]:
import pandas as pd
def analyze_move_history(df: pd.DataFrame, model: str, player: str) -> None:
    if player != "player_one" and player != "player_two":
        raise ValueError("player must be either 'player_one' or 'player_two'")
    
    model = df[player].unique()[0]

    # Filter the DataFrame for games where player title is model
    filtered_df = df[df[player] == model]

    # Compute total illegal moves and total moves
    total_illegal_moves = filtered_df[f"{player}_illegal_moves"].sum()
    total_legal_moves = filtered_df[f"{player}_legal_moves"].sum()
    total_moves = filtered_df['total_moves'].sum()

    print(f"\n{model} Stats\n")

    # Calculate the ratio
    ratio = total_illegal_moves / total_moves
    print(f"total moves: {total_moves}, total illegal moves: {total_illegal_moves}")
    print(f"total legal moves: {total_legal_moves}")
    # Display the result
    print(f"Ratio of Player One's Illegal Moves to Total Moves: {ratio:.4f}")
    print(f"Ratio of Player One's Legal Moves to Total Moves: {(total_legal_moves / total_moves):.4f}")

    # Other stats
    min_moves = filtered_df['number_of_moves'].min()
    max_moves = filtered_df['number_of_moves'].max()
    median_moves = filtered_df['number_of_moves'].median()
    std_dev_moves = filtered_df['number_of_moves'].std()

    
    print(f"Minimum Moves: {min_moves}")
    print(f"Maximum Moves: {max_moves}")
    print(f"Median Moves: {median_moves}")
    print(f"Standard Deviation of Moves: {std_dev_moves:.2f}")

# Load the CSV file into a DataFrame
df = pd.read_csv(filename)

analyze_move_history(df, "gpt-3.5-turbo-instruct", "player_one")
analyze_move_history(df, "gpt-4", "player_one")
analyze_move_history(df, "gpt-3.5-turbo", "player_one")
analyze_move_history(df, "babbage", "player_one")
analyze_move_history(df, "davinci", "player_one")
analyze_move_history(df, "replicate/meta/llama-2-7b:527827021d8756c7ab79fde0abbfaac885c37a3ed5fe23c7465093f0878d55ef", "player_one")
analyze_move_history(df, "replicate/meta/llama-2-70b:a52e56fee2269a78c9279800ec88898cecb6c8f1df22a6483132bea266648f00", "player_one")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file into a DataFrame
df = pd.read_csv(filename)

# Histogram for distribution of number of moves per game
plt.figure(figsize=(10,6))
df['number_of_moves'].hist(bins=30, edgecolor='black', color='skyblue')
plt.title('Distribution of Number of Moves per Game')
plt.xlabel('Number of Moves')
plt.ylabel('Number of Games')
plt.grid(axis='y')
plt.tight_layout()
plt.show()


In [None]:
# Group by player_two_title and count the occurrences
title_counts = df.groupby('game_title').size()

print(title_counts)

# Filter titles with a count less than 30
# titles_less_than_30 = title_counts[title_counts < 30]

# # Display the result
# print(titles_less_than_30)

In [None]:
df = pd.read_csv("logs/games.csv")
filtered_game = df[
    (df['player_one'] == "gpt-3.5-turbo-instruct") &
    (df['player_two'] == "Stockfish 9") & 
    (df['player_one_score'] != "0")
]

print(filtered_game)

In [None]:
df = pd.read_csv("logs/games.csv")
filtered_game = df[
    (df['player_one'] == "gpt-3.5-turbo-instruct") &
    (df['player_one_illegal_moves'] > 0)
]

print(filtered_game)