# Load the Data

In [132]:
import pandas as pd
import numpy as np

# Load white and black datasets
white_df = pd.read_csv("white_df_100.csv")
black_df = pd.read_csv("black_df_100.csv")


# Pre-process Data

In [133]:
# Concatenate the data
all_df = pd.concat([black_df, white_df])

# Pre-process data
all_df['Is_Check'] = all_df['Is_Check'].astype(int)
all_df['Is_Capture'] = all_df['Piece_Captured'].apply(lambda x: 1 if x != None else 0)
all_df = all_df.drop(columns=['FEN'])

# Save the combined dataset to a file
all_df.to_csv('all_df_100.csv', index=False)


# Define Helper Functions

In [134]:
import ast

# Define helper functions for converting chess moves to tuples and vice versa
def chess_square_to_number(square: str) -> int:
    file = square[0]
    rank = int(square[1])
    file_dict = {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8}
    return (file_dict[file] - 1) * 8 + rank

def chess_move_to_tuple(move):
    start_square = chess_square_to_number(move[:2])
    end_square = chess_square_to_number(move[2:])
    return (start_square, end_square)

def chess_moves_to_tuples(moves):
    result = []
    for move in moves:
        result.append(chess_move_to_tuple(move))
    return result


# Convert Moves to Tuples

In [135]:
# Convert moves to tuples
all_df["Move"] = all_df["Move"].apply(lambda x: chess_move_to_tuple(x))
all_df["Top_10_Moves"] = all_df["Top_10_Moves"].apply(lambda x: ast.literal_eval(x))
all_df["Top_10_Moves"] = all_df["Top_10_Moves"].apply(lambda x: chess_moves_to_tuples(x))


# Prepare Data for Model Training

In [136]:
# Prepare data for model training
candidate_data = all_df["Top_10_Moves"].tolist()
target_data = all_df["Move"].tolist()


# Calculate Euclidean Distances

In [137]:
# Calculate the Euclidean distance between each candidate move and the target move
X, y = [], []
for i, candidates in enumerate(candidate_data):
    target = target_data[i]
    for candidate in candidates:
        X.append(candidate)
        y.append(np.linalg.norm(np.array(candidate) - np.array(target)))

X = np.array(X)
y = np.array(y)

# Train-Test Split

In [138]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)


# Import Libraries

In [139]:
import random
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor


# Define Models

In [140]:
# Create a list of models
models = [
    ("RandomForestRegressor", RandomForestRegressor(n_estimators=100, random_state=42)),
    ("LinearRegression", LinearRegression()),
    ("Ridge", Ridge(alpha=1.0, random_state=42)),
    ("Lasso", Lasso(alpha=1.0, random_state=42)),
    ("ElasticNet", ElasticNet(alpha=1.0, l1_ratio=0.5, random_state=42)),
    ("KNeighborsRegressor", KNeighborsRegressor(n_neighbors=5, weights="uniform", algorithm="auto")),
    ("DecisionTreeRegressor", DecisionTreeRegressor(random_state=42)),
    ("GradientBoostingRegressor", GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)),
]


# Define Functions for Accuracy Calculation

In [141]:
def sort_candidates(candidates, model):
    likelihoods = model.predict(candidates)
    return sorted(candidates, key=lambda x: likelihoods[candidates.index(x)], reverse=True)

def get_top_candidate(candidates, model):
    likelihoods = model.predict(candidates)
    sorted_candidates = sorted(candidates, key=lambda x: likelihoods[candidates.index(x)], reverse=False)
    return sorted_candidates[0]

def get_random_candidate(candidates):
    return random.choice(candidates)


# Train Models, Calculate Accuracy, and Compare

In [142]:
# Placeholder for results
results = []

for model_name, model in models:
    model.fit(X_train, y_train)
    
    # Calculate the accuracy of the model by comparing the top predicted move to the target move
    correct = sum(get_top_candidate(candidates, model) == target for candidates, target in zip(candidate_data, target_data))
    total = len(candidate_data)
    accuracy = correct / total
    
    # Calculate the accuracy of randomly selecting a move from the candidates
    random_correct = sum(get_random_candidate(candidates) == target for candidates, target in zip(candidate_data, target_data))
    random_accuracy = random_correct / total
    
    results.append([model_name, accuracy, random_accuracy])

# Create a DataFrame to display the results
results_df = pd.DataFrame(results, columns=["Model", "Pure Accuracy", "Random Selection Accuracy"])
print(results_df)


                       Model  Pure Accuracy  Random Selection Accuracy
0      RandomForestRegressor       0.232723                   0.117355
1           LinearRegression       0.093515                   0.112246
2                      Ridge       0.093515                   0.111679
3                      Lasso       0.093089                   0.112388
4                 ElasticNet       0.093515                   0.108557
5        KNeighborsRegressor       0.205194                   0.112814
6      DecisionTreeRegressor       0.235561                   0.113240
7  GradientBoostingRegressor       0.141762                   0.109834


# Select the Best Model

In [143]:
# Find the model with the highest pure accuracy
best_model_name = results_df.loc[results_df["Pure Accuracy"].idxmax()]["Model"]

# Find the corresponding model object from the list of models
best_model = [model for name, model in models if name == best_model_name][0]

print(f"Best model: {best_model_name}")
print(best_model)

Best model: DecisionTreeRegressor
DecisionTreeRegressor(random_state=42)


# Save the Best Model

In [144]:
import joblib

# Save the best model to a file
joblib.dump(best_model, 'final_model_chess.joblib')


['final_model_chess.joblib']