In [1]:
import re
import ast
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib
import numpy as np
from sklearn.metrics import f1_score, recall_score, log_loss



In [2]:


# Load the data
data = pd.read_json('../data/opening_classified.json')

# Custom function to parse the 'input' column
def parse_input(input_str):
    # Remove ellipses and multiple spaces
    input_str = re.sub(r'\.\.\.', '', input_str)
    input_str = re.sub(r'\s+', ',', input_str.strip())
    # Ensure proper list format by adding commas between elements
    input_str = input_str.replace('[,', '[').replace(',]', ']').replace(',,', ',')
    return ast.literal_eval(input_str)

# Apply the custom function to the 'input' column
#data['input'] = data['input'].apply(parse_input)

# Prepare the feature matrix (X) and labels (y)
X = np.array(data['input'].tolist())
y = data['game_state'].apply(lambda x: 1 if x == 'opening' else 0).values

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Initialize and train the logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Save the trained model
joblib.dump(model, '../model/opening_middlegame_classifier.pkl')

# Evaluate the model
accuracy = model.score(X_test, y_test)
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]  # probabilities for the positive class
f1 = f1_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
loss = log_loss(y_test, y_pred_proba)



In [14]:

joblib.dump(model, '../model/opening_middlegame_classifier.pkl')

# Print metrics
print(f'Model Accuracy: {accuracy * 100:.2f}%')
print(f'F1 Score: {f1:.2f}')
print(f'Recall: {recall:.2f}')
print(f'Log Loss: {loss:.2f}')
print(f'Learning Rate (C): {model.C:.2f}')

Model Accuracy: 97.46%
F1 Score: 0.96
Recall: 0.96
Log Loss: 0.06
Learning Rate (C): 1.00


In [90]:
data = pd.read_csv('../data/opening_classified.csv')
data['input']

0         [0 0 0 ... 1 0 0]
1         [0 0 0 ... 1 0 1]
2         [0 0 0 ... 1 0 0]
3         [0 0 0 ... 1 0 1]
4         [0 0 0 ... 1 0 0]
                ...        
109034    [0 0 0 ... 0 0 0]
109035    [0 0 0 ... 0 0 1]
109036    [0 0 0 ... 0 0 0]
109037    [0 0 0 ... 0 0 1]
109038    [0 0 0 ... 0 0 0]
Name: input, Length: 109039, dtype: object

In [92]:


# Custom function to parse the 'input' column
def parse_input(input_str):
    # Remove ellipses and multiple spaces
    input_str = re.sub(r'\.\.\.', '', input_str)
    input_str = re.sub(r'\s+', ',', input_str.strip())
    # Ensure proper list format by adding commas between elements
    input_str = input_str.replace('[,', '[').replace(',]', ']').replace(',,', ',')
    return ast.literal_eval(input_str)

# Apply the custom function to the 'input' column
data['input'] = data['input'].apply(parse_input)

In [93]:
data

Unnamed: 0,position,game_state,input
0,rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...,opening,"[0, 0, 0, 1, 0, 0]"
1,rnbqkbnr/pppp1ppp/8/4p3/4P3/8/PPPP1PPP/RNBQKBN...,opening,"[0, 0, 0, 1, 0, 1]"
2,rnbqkbnr/pppp1ppp/8/4p3/4P3/2N5/PPPP1PPP/R1BQK...,opening,"[0, 0, 0, 1, 0, 0]"
3,r1bqkbnr/pppp1ppp/2n5/4p3/4P3/2N5/PPPP1PPP/R1B...,opening,"[0, 0, 0, 1, 0, 1]"
4,r1bqkbnr/pppp1ppp/2n5/4p3/2B1P3/2N5/PPPP1PPP/R...,opening,"[0, 0, 0, 1, 0, 0]"
...,...,...,...
109034,3r2k1/ppp2ppp/3r4/1P1NQ3/2q1P3/1RP1B2P/P4PP1/3...,middlegame,"[0, 0, 0, 0, 0, 0]"
109035,3r2k1/pp3ppp/2pr4/1P1NQ3/2q1P3/1RP1B2P/P4PP1/3...,middlegame,"[0, 0, 0, 0, 0, 1]"
109036,3r2k1/pp2Nppp/2pr4/1P2Q3/2q1P3/1RP1B2P/P4PP1/3...,middlegame,"[0, 0, 0, 0, 0, 0]"
109037,3r1k2/pp2Nppp/2pr4/1P2Q3/2q1P3/1RP1B2P/P4PP1/3...,middlegame,"[0, 0, 0, 0, 0, 1]"


In [97]:
input_data = np.array(data['input'].tolist())
first_entry = input_data[0].reshape(1, -1)

# Predict the first entry
prediction = model.predict(first_entry)
print(f'Prediction for the first entry: {prediction[0]}')

Prediction for the first entry: 1


In [15]:
import joblib
import pandas as pd
import numpy as np
import re
import ast

# Custom function to parse the 'input' column
def parse_input(input_str):
    input_str = re.sub(r'\.\.\.', '', input_str)
    input_str = re.sub(r'\s+', ',', input_str.strip())
    input_str = input_str.replace('[,', '[').replace(',]', ']').replace(',,', ',')
    return ast.literal_eval(input_str)

# Load the test data
test_data = pd.read_json('../data/test.json')

# Apply the custom function to the 'input' column
#test_data['input'] = test_data['input'].apply(parse_input)

# Load the trained model
model = joblib.load('../model/opening_middlegame_classifier.pkl')

# Prepare the input data for prediction
#input_data = np.array(test_data['input'].tolist())
input_data = np.array(test_data['input'].tolist())
print(input_data)

# Predict the game state for the entire test data
predictions = model.predict(input_data)

# Map predictions to 'opening' and 'middle'
predicted_labels = ['opening' if pred == 1 else 'middle' for pred in predictions]

# Add predictions to the test data
test_data['predicted_game_state'] = predicted_labels

# Display the updated test data with predictions
print(test_data[['input', 'predicted_game_state']])

[[0 0 0 ... 1 0 0]
 [0 0 0 ... 1 0 1]
 [0 0 0 ... 1 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 0]]
                                                input predicted_game_state
0   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, ...              opening
1   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, ...              opening
2   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, ...              opening
3   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, ...              opening
4   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, ...              opening
..                                                ...                  ...
76  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...               middle
77  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...               middle
78  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...               middle
79  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...               middle
80  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  

In [25]:

test_data = pd.read_json('../data/test.json')


print(test_data)

                                             position  game_state  \
0   rnbqkbnr/pppppppp/8/8/4P3/8/PPPP1PPP/RNBQKBNR ...     opening   
1   rnbqkbnr/ppp1pppp/8/3p4/4P3/8/PPPP1PPP/RNBQKBN...     opening   
2   rnbqkbnr/ppp1pppp/8/3P4/8/8/PPPP1PPP/RNBQKBNR ...     opening   
3   rnb1kbnr/ppp1pppp/8/3q4/8/8/PPPP1PPP/RNBQKBNR ...     opening   
4   rnb1kbnr/ppp1pppp/8/3q4/8/2N5/PPPP1PPP/R1BQKBN...     opening   
..                                                ...         ...   
63     8/6pp/5p2/4pk2/2rp4/7P/PRPB1PP1/3K4 w - - 1 33  middlegame   
64    8/6pp/5p2/4pk2/P1rp4/7P/1RPB1PP1/3K4 b - - 0 33  middlegame   
65   8/6pp/5p2/4pk2/P1r5/3p3P/1RPB1PP1/3K4 w - - 0 34  middlegame   
66   8/6pp/5p2/P3pk2/2r5/3p3P/1RPB1PP1/3K4 b - - 0 34  middlegame   
67    8/6pp/5p2/P3pk2/r7/3p3P/1RPB1PP1/3K4 w - - 1 35  middlegame   

                                                input  
0   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, ...  
1   [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, ...  
2  

In [11]:
print(test_data[['predicted_game_state']].to_string(index=False))


predicted_game_state
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
             opening
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
              middle
             