# Linear Regression on Mistakes

In [1]:
import winning_chances
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
pd.set_option('display.max_columns', None)

## WCL database

First, we calculate the winning chance for each evaluation and calculate the WCL (Winning Chance Loss) for each move. 

We then reorganize the data to get WCL for each player and each game, i. e. one line for White and one for Black.

This is the data we then use for our linear regression.

As an example and to limit CPU usage, only files with depth 20 are used here. 

In [None]:
mistake_bins = [5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 100]

df=pd.read_csv("../huge_analyzed_games/combined_analyzed_games_20.csv")

# table=winning_chances.compute_winning_chance_table(df) # re-compute the winning chance table
table=pd.read_csv("table_20.csv") # load pre-computed winning chance table for eval depth 20
#table=pd.read_csv("winning_chances_all_moves.csv") # load pre-computed winning chance table for all games
summary_table=winning_chances.create_summary_table(df,mistake_bins=mistake_bins,winning_chance_table=table)
summary_table.to_csv("summary_table_20.csv",index=False)

#summary_table=pd.read_csv("big_summary_table.csv") # Load pre-computed summary table of all games

# summary_table=pd.read_csv("summary_table_20.csv") # Load pre-computed summary table of games with eval depth 20

## Linear Regression Model

Perform a linear regression on the summary table. 

We use the following features:

- Number of mistakes in each bin (each bin corresponding to a different gravity of mistake)
- Opening
- Result
- Total number of moves binned

The last three features are categorical, we use a one-hot encoder to take them into account. 

In [None]:
Number_of_Openings = 70  # Number of top openings to consider
Color_Player = 'White'   # Player color to analyze ('White' or 'Black')
categorical_features = ['Opening', 'Result', 'TotalMovesInterval']  # Initial categorical features (Removed 'TotalMovesInterval' here)
numerical_features = []                 # Initial numerical features
total_moves_bins = [0, 40, 60, 80, 100, 120, np.inf]  # Bins for total moves

# Preprocess 'Opening' column to group less frequent openings as 'Other'
top_openings = summary_table['Opening'].value_counts().nlargest(Number_of_Openings).index.tolist()
summary_table['Opening'] = summary_table['Opening'].apply(lambda x: x if x in top_openings else 'Other')

# Filter the summary table for the specified player color
new_summary_table = summary_table[summary_table['Player'] == Color_Player].copy()

# Define mistake labels based on columns after 'AWCL'
start_index = new_summary_table.columns.get_loc('AWCL') + 1
mistake_labels = list(new_summary_table.columns[start_index:])

# Update numerical features to include mistake labels
numerical_features += mistake_labels

# Define total moves labels based on the bins
total_moves_labels = [
    f'({total_moves_bins[i]},{total_moves_bins[i+1]}]' if not np.isinf(total_moves_bins[i + 1]) else f'({total_moves_bins[i]},∞]'
    for i in range(len(total_moves_bins) - 1)
]

# Create 'TotalMovesInterval' using pd.cut
new_summary_table['TotalMovesInterval'] = pd.cut(
    new_summary_table['TotalMoves'],
    bins=total_moves_bins,
    labels=total_moves_labels,
    right=True,
    include_lowest=True
)


# Handle missing values in the target variable
new_summary_table = new_summary_table.dropna(subset=['Elo'])

# Process categorical features
for col in categorical_features:
    # Convert to 'category' dtype
    new_summary_table[col] = new_summary_table[col].astype('category')
    # Add 'Unknown' to categories if not present
    if 'Unknown' not in new_summary_table[col].cat.categories:
        new_summary_table[col] = new_summary_table[col].cat.add_categories(['Unknown'])
    # Fill NaN values with 'Unknown'
    new_summary_table[col] = new_summary_table[col].fillna('Unknown')

# Handle missing values in numerical features (mistake intervals)
new_summary_table[numerical_features] = new_summary_table[numerical_features].fillna(0)

# Define target variable
y = new_summary_table['Elo']

# Define features
X = new_summary_table[categorical_features + numerical_features]

# Split the data with stratification on binned 'Elo' values
y_binned = pd.qcut(y, q=10, duplicates='drop')

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=40,
    stratify=y_binned
)

# Create a ColumnTransformer to apply OneHotEncoder to categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'  # Pass through numerical features unchanged
)

# Create a pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
print(f'R-squared Score (R²): {r2:.2f}')

# Calculate percentage of predictions within a threshold
absolute_errors = np.abs(y_pred - y_test)
threshold = 300
within_threshold = np.sum(absolute_errors <= threshold)
total_predictions = len(y_test)
percentage_within_threshold = (within_threshold / total_predictions) * 100

print(f"Percentage of predictions within ±{threshold} Elo: {percentage_within_threshold:.2f}%")

# Get the names of the categorical features after one-hot encoding
onehot_feature_names = pipeline.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(categorical_features)

# Combine with numerical feature names
all_feature_names = np.concatenate([onehot_feature_names, numerical_features])
coefficients = pipeline.named_steps['regressor'].coef_

# Create a DataFrame to display feature names and their coefficients
coef_df = pd.DataFrame({
    'Feature': all_feature_names,
    'Coefficient': coefficients
})

# Sort the coefficients by absolute value
coef_df['AbsCoefficient'] = coef_df['Coefficient'].abs()
coef_df = coef_df.sort_values(by='AbsCoefficient', ascending=False)

# Display the top 5 features with the highest absolute coefficients
# print("\nTop 5 features by absolute coefficient value:")
# print(coef_df[['Feature', 'Coefficient']].head(5))

mistake_coef_df = coef_df[coef_df['Feature'].isin(mistake_labels)]

# Display the coefficients for the mistake features
# print("\nCoefficients for the mistake labels:")
# print(mistake_coef_df[['Feature', 'Coefficient']])
print(f"min: {y_pred.min()}")
print(f"max: {y_pred.max()}")
print(f"mean: {y_pred.mean()}")

# Graphs and observations

In [None]:
mistake_coefficients = mistake_coef_df.set_index('Feature')['Coefficient'].to_dict()
mistake_coefficients = {k: abs(v) for k, v in mistake_coefficients.items()}
# Step 2: Compute 'WeightedMistakeScore' using the coefficients
# Vectorized calculation for efficiency
coefficients_series = pd.Series(mistake_coefficients)
new_summary_table['WeightedMistakeScore'] = new_summary_table[mistake_labels].dot(coefficients_series)

# Step 3: Create Elo bins and labels
elo_bins = [1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800]
elo_labels = []
for i in range(len(elo_bins) - 1):
    lower = elo_bins[i]
    upper = elo_bins[i + 1] - 1  # Subtract 1 to make the upper limit inclusive
    label = f'{int(lower)}-{int(upper)}'
    elo_labels.append(label)

# Assign Elo bins to the data
new_summary_table['EloBin'] = pd.cut(
    new_summary_table['Elo'],
    bins=elo_bins,
    labels=elo_labels,
    right=False,        # Left-inclusive intervals
    include_lowest=True # Include the lowest value
)

# Remove rows with missing 'EloBin' values (if any)
new_summary_table = new_summary_table.dropna(subset=['EloBin'])

# Step 4: Compute average 'WeightedMistakeScore' per Elo bin
avg_mistake_per_elo_bin = new_summary_table.groupby('EloBin')['WeightedMistakeScore'].mean().reset_index()

# 1. Coefficient Bar Plot (Top 10 Features)
# Get feature names after preprocessing
onehot_feature_names = pipeline.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(categorical_features)
feature_names = np.concatenate([onehot_feature_names, numerical_features])

# Get coefficients from the linear regression model
coefficients = pipeline.named_steps['regressor'].coef_

# Create a DataFrame for coefficients
coef_df = pd.DataFrame({
    'Feature': feature_names,
    'Coefficient': coefficients
})

# Calculate absolute coefficients
coef_df['AbsCoefficient'] = coef_df['Coefficient'].abs()

# Sort the coefficients in descending order of absolute value
coef_df_sorted = coef_df.sort_values(by='AbsCoefficient', ascending=False)

# Get the top 10 features by absolute coefficient value
top_features = coef_df_sorted.head(10)

# Plotting
plt.figure(figsize=(10, 6))
plt.bar(top_features['Feature'], top_features['AbsCoefficient'], color='orange')
plt.title('Top 10 Features by Absolute Coefficient Value (Linear Regression)')
plt.xlabel('Feature')
plt.ylabel('Absolute Coefficient Value')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# 2. Predicted vs. Actual Elo Ratings Scatter Plot
# Assuming y_test and y_pred are defined
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', linewidth=2)
plt.title('Predicted vs. Actual Elo Ratings (Linear Regression)')
plt.xlabel('Actual Elo Rating')
plt.ylabel('Predicted Elo Rating')
plt.grid(True)
plt.tight_layout()
plt.show()

# 3. Residuals Plot
# Calculate residuals
residuals = y_test - y_pred

# Plot residuals
plt.figure(figsize=(10, 6))
plt.scatter(y_pred, residuals, alpha=0.5)
plt.hlines(y=0, xmin=y_pred.min(), xmax=y_pred.max(), linestyles='dashed', colors='red')
plt.title('Residuals Plot (Linear Regression)')
plt.xlabel('Predicted Elo Rating')
plt.ylabel('Residuals (Actual - Predicted)')
plt.grid(True)
plt.tight_layout()
plt.show()

# 4. Error Distribution Histogram
# Plot histogram of residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=50, color='blue', edgecolor='black')
plt.title('Distribution of Residuals (Linear Regression)')
plt.xlabel('Residuals (Actual - Predicted)')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.show()

# 5. Average Weighted Mistake Score per Elo Rating Bin Plot
# Assuming 'new_summary_table' and 'avg_mistake_per_elo_bin' are defined
plt.figure(figsize=(10, 6))
plt.plot(avg_mistake_per_elo_bin['EloBin'], avg_mistake_per_elo_bin['WeightedMistakeScore'], marker='o')
plt.title('Average Weighted Mistake Score per Elo Rating Bin (Linear Regression Model)')
plt.xlabel('Elo Rating Bin')
plt.ylabel('Average Weighted Mistake Score')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Assume all necessary libraries are already imported and previous code has been executed
# The pipeline 'pipeline' has been trained, and 'categorical_features', 'numerical_features',
# and 'top_openings' are defined from your previous code.

# Step 1: Select 20 games with Elo < 1500
# Use the original 'new_summary_table' and filter for the specified player color if needed
new_summary_table_low_elo = new_summary_table[new_summary_table['Elo'] < 1500].copy()

# Check if there are at least 20 games; if not, adjust accordingly
if len(new_summary_table_low_elo) < 20:
    print(f"Only {len(new_summary_table_low_elo)} games found with Elo < 1500.")
    sample_games = new_summary_table_low_elo
else:
    # Randomly select 20 games
    sample_games = new_summary_table_low_elo.sample(n=20, random_state=42)

# Step 2: Preprocess the selected games

# Ensure the 'Opening' column is processed the same way as in training
# If 'top_openings' was used to group less frequent openings under 'Other', apply the same here
sample_games['Opening'] = sample_games['Opening'].apply(lambda x: x if x in top_openings else 'Other')

# Process categorical features
for col in categorical_features:
    sample_games[col] = sample_games[col].astype('category')
    # Ensure the categories match those in the training set
    sample_games[col] = sample_games[col].cat.set_categories(new_summary_table[col].cat.categories)
    # Fill NaN values with 'Unknown'
    sample_games[col] = sample_games[col].fillna('Unknown')

# Handle missing values in numerical features
sample_games[numerical_features] = sample_games[numerical_features].fillna(0)

# Define features for prediction
X_new = sample_games[categorical_features + numerical_features]

# Step 3: Predict using the trained model
y_new_pred = pipeline.predict(X_new)

# Step 4: Output the predictions per game
sample_games = sample_games.reset_index(drop=True)  # Reset index for clarity
sample_games['Predicted_Elo'] = y_new_pred

# Display the predictions per game
print("Predictions for each game:")
print(sample_games[['GameID', 'Elo', 'Predicted_Elo']])

# Step 5: Compute and display average, min, and max of the predictions
average_pred = y_new_pred.mean()
min_pred = y_new_pred.min()
max_pred = y_new_pred.max()

print(f"\nAverage Predicted Elo: {average_pred:.2f}")
print(f"Minimum Predicted Elo: {min_pred:.2f}")
print(f"Maximum Predicted Elo: {max_pred:.2f}")


In [None]:
#Histogram of Elo Distribution (Stratification Justification):

plt.figure(figsize=(10, 6))
plt.hist(new_summary_table['Elo'], bins=30, color='skyblue', edgecolor='black')
plt.title('Distribution of Elo Ratings')
plt.xlabel('Elo')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
# Feature Importance Bar Chart
top_features = coef_df.head(10)  # Top 10 features by absolute value
plt.figure(figsize=(10, 6))
plt.bar(top_features['Feature'], top_features['AbsCoefficient'], color='orange')
plt.title('Top 10 Features by Absolute Coefficient Value')
plt.xlabel('Feature')
plt.ylabel('Absolute Coefficient')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

In [None]:
#Scatter Plot of Predicted vs Actual Elo:
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5, color='blue')
plt.title('Predicted vs Actual Elo Ratings')
plt.xlabel('Actual Elo')
plt.ylabel('Predicted Elo')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:

# Plot Predicted vs. Actual Elo Ratings
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.5)
plt.title('Predicted vs. Actual Elo Ratings')
plt.xlabel('Actual Elo Rating')
plt.ylabel('Predicted Elo Rating')
plt.grid(True)
plt.show()


In [None]:
# Calculate residuals
residuals = y_test - y_pred

# Plot Residuals vs. Predicted Elo Ratings
plt.figure(figsize=(10, 6))
plt.scatter(y_pred, residuals, alpha=0.5)
plt.hlines(0, y_pred.min(), y_pred.max(), colors='r', linestyles='dashed')
plt.title('Residuals vs. Predicted Elo Ratings')
plt.xlabel('Predicted Elo Rating')
plt.ylabel('Residuals (Actual - Predicted)')
plt.grid(True)
plt.show()


In [None]:
# Combine numerical features into a single feature
X_combined = X[numerical_features].sum(axis=1)
# Reshape X_combined for model fitting
X_combined_reshaped = X_combined.values.reshape(-1, 1)

# Split the combined data
X_train_combined, X_test_combined, y_train_combined, y_test_combined = train_test_split(
    X_combined_reshaped, y, test_size=0.2, random_state=40
)

# Create and train a new linear regression model
model_combined = LinearRegression()
model_combined.fit(X_train_combined, y_train_combined)
# Predict using the combined feature
y_pred_combined = model_combined.predict(X_test_combined)

# Plot the data points and the regression line
plt.figure(figsize=(10, 6))
plt.scatter(X_test_combined, y_test_combined, alpha=0.5, label='Actual Data')
plt.plot(X_test_combined, y_pred_combined, color='red', linewidth=2, label='Regression Line')
plt.title('Elo Rating vs. Combined Numerical Feature')
plt.xlabel('Sum of Numerical Features')
plt.ylabel('Elo Rating')
plt.legend()
plt.grid(True)
plt.show()


## Creating table based on train/test set

For double-checking, we perform a train test split and compute the winning chance tables only on the training dataset. The results are near enough identical. 

In [1]:
import winning_chances
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
pd.set_option('display.max_columns', None)

df=pd.read_csv("../huge_analyzed_games/combined_analyzed_games_20.csv")

In [None]:
mistake_bins = [5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 100]

df_train, df_test = winning_chances.train_test_split_games(df, train_size=0.8, random_state=42)

table_train = winning_chances.compute_winning_chance_table(df_train)

# Create summary tables for training and testing sets
summary_table_train = winning_chances.create_summary_table(df_train, mistake_bins=mistake_bins, winning_chance_table=table_train)
summary_table_test = winning_chances.create_summary_table(df_test, mistake_bins=mistake_bins, winning_chance_table=table_train)


Performing the linear regression on the new dataset

In [3]:
numerical_features=['AWCL']
categorical_features=['Opening', 'Result', 'TotalMovesInterval']
total_moves_bins=[0, 40,  60,  80,  100, 120, np.inf] 
mistake_bins=[5, 10, 15, 20, 25, 30, 35, 40, 50, 60, 70, 100]
Opening_number=70 
Color_Player='White'
    
    
    

# Process the training summary table
top_openings = summary_table_train['Opening'].value_counts().nlargest(Opening_number).index.tolist()
summary_table_train['Opening'] = summary_table_train['Opening'].apply(lambda x: x if x in top_openings else 'Other')

new_summary_table_train = summary_table_train[summary_table_train['Player'] == Color_Player].copy()

# Define mistake labels based on columns after 'AWCL'
start_index = new_summary_table_train.columns.get_loc('AWCL') + 1
mistake_labels = list(new_summary_table_train.columns[start_index:])

# Update numerical features to include mistake labels and 'AWCL'
numerical_features = numerical_features + mistake_labels 

# Create 'TotalMovesInterval' using pd.cut
total_moves_labels = [
    f'({total_moves_bins[i]},{total_moves_bins[i+1]}]' if not np.isinf(total_moves_bins[i + 1]) else f'({total_moves_bins[i]},∞]'
    for i in range(len(total_moves_bins) - 1)
]

# Create 'TotalMovesInterval' using pd.cut
new_summary_table_train['TotalMovesInterval'] = pd.cut(
    new_summary_table_train['TotalMoves'],
    bins=total_moves_bins,
    labels=total_moves_labels,
    right=True,
    include_lowest=True
)

# Handle missing values in the target variable
new_summary_table_train = new_summary_table_train.dropna(subset=['Elo'])

# Process categorical features in the training set
for col in categorical_features:
    new_summary_table_train[col] = new_summary_table_train[col].astype('category')
    if 'Unknown' not in new_summary_table_train[col].cat.categories:
        new_summary_table_train[col] = new_summary_table_train[col].cat.add_categories(['Unknown'])
    new_summary_table_train[col] = new_summary_table_train[col].fillna('Unknown')

# Handle missing values in numerical features
new_summary_table_train[numerical_features] = new_summary_table_train[numerical_features].fillna(0)

# Define target variable and features for training
y_train = new_summary_table_train['Elo']
X_train = new_summary_table_train[categorical_features + numerical_features]

# Process the testing summary table
summary_table_test['Opening'] = summary_table_test['Opening'].apply(lambda x: x if x in top_openings else 'Other')

new_summary_table_test = summary_table_test[summary_table_test['Player'] == Color_Player].copy()

new_summary_table_test['TotalMovesInterval'] = pd.cut(
    new_summary_table_test['TotalMoves'],
    bins=total_moves_bins,
    labels=total_moves_labels,
    right=True,
    include_lowest=True
)

new_summary_table_test = new_summary_table_test.dropna(subset=['Elo'])

# Process categorical features in the testing set
for col in categorical_features:
    new_summary_table_test[col] = new_summary_table_test[col].astype('category')
    # Ensure the categories match those in the training set
    new_summary_table_test[col] = new_summary_table_test[col].cat.set_categories(new_summary_table_train[col].cat.categories)
    new_summary_table_test[col] = new_summary_table_test[col].fillna('Unknown')

# Handle missing values in numerical features
new_summary_table_test[numerical_features] = new_summary_table_test[numerical_features].fillna(0)

# Define target variable and features for testing
y_test = new_summary_table_test['Elo']
X_test = new_summary_table_test[categorical_features + numerical_features]

# Create a ColumnTransformer to apply OneHotEncoder to categorical features
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'  # Pass through numerical features unchanged
)

# Create a pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f'Root Mean Squared Error (RMSE): {rmse:.2f}')
print(f'R-squared Score (R²): {r2:.2f}')

# Calculate percentage of predictions within a threshold
absolute_errors = np.abs(y_pred - y_test)
threshold = 300
within_threshold = np.sum(absolute_errors <= threshold)
total_predictions = len(y_test)
percentage_within_threshold = (within_threshold / total_predictions) * 100

print(f"Percentage of predictions within ±{threshold} Elo: {percentage_within_threshold:.2f}%")

# Get the names of the categorical features after one-hot encoding
onehot_feature_names = pipeline.named_steps['preprocessor'].named_transformers_['cat'].get_feature_names_out(categorical_features)

# Combine with numerical feature names
all_feature_names = np.concatenate([onehot_feature_names, numerical_features])
coefficients = pipeline.named_steps['regressor'].coef_

# Create a DataFrame to display feature names and their coefficients
coef_df = pd.DataFrame({
    'Feature': all_feature_names,
    'Coefficient': coefficients
})

# Sort the coefficients by absolute value
coef_df['AbsCoefficient'] = coef_df['Coefficient'].abs()
coef_df = coef_df.sort_values(by='AbsCoefficient', ascending=False)

# Display the top 5 features with the highest absolute coefficients
print("\nTop 5 features by absolute coefficient value:")
print(coef_df[['Feature', 'Coefficient']].head(5))

# Display the coefficients for the mistake labels
mistake_coef_df = coef_df[coef_df['Feature'].isin(mistake_labels)]
print("\nCoefficients for the mistake labels:")
print(mistake_coef_df[['Feature', 'Coefficient']])

Root Mean Squared Error (RMSE): 284.09
R-squared Score (R²): nan
Percentage of predictions within ±300 Elo: 100.00%

Top 5 features by absolute coefficient value:
                                        Feature  Coefficient
29  Opening_Queen's pawn game, Krause variation  -668.680325
42                               Opening_Vienna   625.007025
11                         Opening_Four knights  -569.692350
23                                 Opening_Pirc  -478.208058
41                              Opening_Unknown   438.710561

Coefficients for the mistake labels:
     Feature  Coefficient
59   (35,40]   177.481030
63  (70,100]  -132.551046
60   (40,50]  -121.685672
56   (20,25]   -77.045770
61   (50,60]    63.838408
57   (25,30]    46.343011
55   (15,20]   -19.601725
58   (30,35]   -19.111015
54   (10,15]   -15.096423
53    (5,10]     8.223799
62   (60,70]     0.000000


