<a href="https://colab.research.google.com/github/Xelaro2304/MSB1015-Scientific-Programming/blob/main/Chess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U ydata-profiling
!pip install berserk
!pip install optuna

In [None]:
import gdown
import os
import berserk
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
from ydata_profiling import ProfileReport


In [None]:
def plot_distribution(data, plot="hist", title=None, label=None, bins=30, show_stats=True, normalize=False):
    """
    Plot a histogram (numeric) or count plot (categorical) for a single variable.

    Parameters:
    - data: array-like, the variable to plot
    - plot: "hist" for histogram, "count" for categorical count plot
    - title: optional plot title
    - label: optional x-axis label
    - bins: number of bins for histogram
    - show_stats: show mean/median/mode (only for histogram)
    - normalize: bool, whether to normalize frequencies/counts (0-1 or percentages)
    """
    plt.figure(figsize=(6,6))

    if plot == "hist":
        stat_type = 'density' if normalize else 'count'
        sns.histplot(data, bins=bins, kde=False, color=sns.color_palette("colorblind")[0], stat=stat_type)

        if show_stats:
            mean_val = np.mean(data)
            median_val = np.median(data)
            mode_val = stats.mode(data, keepdims=True)[0][0]
            plt.axvline(mean_val, color="red", linestyle="--", linewidth=1.5, label=f"Mean = {mean_val:.2f}")
            plt.axvline(median_val, color="green", linestyle="--", linewidth=1.5, label=f"Median = {median_val:.2f}")
            plt.axvline(mode_val, color="blue", linestyle="--", linewidth=1.5, label=f"Mode = {mode_val:.2f}")
            plt.legend()

        plt.ylabel("Density" if normalize else "Frequency")
        plt.xlabel(label if label else "Value")

    elif plot == "count":
        counts = data.value_counts(normalize=normalize)
        counts.plot(kind='bar', color=sns.color_palette("colorblind", len(counts)))
        plt.ylabel("Proportion" if normalize else "Count")
        plt.xlabel(label if label else "Category")

    else:
        raise ValueError("plot must be either 'hist' or 'count'")

    if title:
        plt.title(title)
    plt.tight_layout()
    plt.show()

def plot_winner_by(df, col, title=None, ylabel=None):
    """
    Plot horizontal percentage-stacked bar chart for chess game results.

    Parameters:
    - df: DataFrame containing 'winner' and the numeric column.
    - col: Column name to group by (e.g., 'start_time', 'increment', 'avg_rating').
    - title: Plot title.
    - ylabel: Label for y-axis.
    """
    df_plot = df.copy()

    if col == 'avg_rating':
        bins = [-np.inf, 1000, 1100, 1200, 1300, 1400, 1500, 1600, 1700, 1800, 1900, 2000,
                2100, 2200, 2300, 2400, np.inf]
        labels = ["< 1000","1000-1100","1100-1200","1200-1300","1300-1400","1400-1500",
                  "1500-1600","1600-1700","1700-1800","1800-1900","1900-2000","2000-2100",
                  "2100-2200","2200-2300","2300-2400","> 2400"]
        df_plot[col] = pd.cut(df_plot[col], bins=bins, labels=labels, include_lowest=True)

    if col == 'rating_diff':
        df_plot['rating_diff_c2'] = np.where(df_plot['rating_diff'] > 0,
                                            "White higher rating", "White not-higher rating")
        # Count per category and winner
        count_df = df_plot.groupby(['rating_diff_c2', 'winner']).size().reset_index(name='count')
        count_pivot = count_df.pivot(index='rating_diff_c2', columns='winner', values='count').fillna(0)

        # Plot heatmap
        plt.figure(figsize=(8,6))
        sns.heatmap(count_pivot, annot=True, fmt='g', cmap='Greys', linewidths=0.8, linecolor='black', cbar=False,
                    annot_kws={"size": 15})
        plt.title(title if title else "Result of games by categorical difference in rating")
        plt.xlabel("Colour of winner")
        plt.ylabel("Rating before game")
        plt.show()
        return

    # --- Count per category and winner ---
    count_df = df_plot.groupby([col, 'winner']).size().reset_index(name='count')

    # --- Pivot for stacked percentage plot ---
    count_pivot = count_df.pivot(index=col, columns='winner', values='count').fillna(0)
    count_pct = count_pivot.div(count_pivot.sum(axis=1), axis=0)
    count_pct = count_pct[['white', 'draw', 'black']]  # desired stacking order

    # --- Define colors ---
    color_map = {'white': '#d9d9d9', 'draw': 'grey', 'black': 'black'}

    # --- Sort descending by the column ---
    count_pct = count_pct.sort_index(ascending=False)
    count_pivot = count_pivot.loc[count_pct.index]

    # --- Plot ---
    fig, ax = plt.subplots(figsize=(10, 8))
    count_pct.plot(kind='barh', stacked=True,
                   color=[color_map.get(c, 'grey') for c in count_pct.columns],
                   alpha=0.95, width=1, edgecolor='black', ax=ax)

    ax.set_xlabel("Share of wins")
    ax.set_ylabel(ylabel if ylabel else col)
    ax.set_title(title if title else f"Result of games by {col}")
    ax.legend(title="Colour of winner")
    ax.xaxis.set_major_formatter(plt.matplotlib.ticker.PercentFormatter(1.0))

    # --- Add counts in the middle of each segment ---
    for i, val in enumerate(count_pct.index):
        left = 0
        for winner in count_pct.columns:
            frac = count_pct.loc[val, winner]  # fraction for plotting
            value = count_pivot.loc[val, winner]  # raw count
            if value > 0:
                text_color = 'black' if winner == 'white' else 'white'
                ax.text(left + frac/2, i, int(value), ha='center', va='center',
                        color=text_color, fontsize=12)
                left += frac

    plt.tight_layout()
    plt.show()


In [None]:
url = 'https://docs.google.com/uc?export=download&id=1lBXYMdZtKdMm4AtGWjJFjmBygUtn8w5y&confirm=t'
path = os.getcwd()
output = path + '/games.csv'
!wget -O $output 'https://docs.google.com/uc?export=download&id=1lBXYMdZtKdMm4AtGWjJFjmBygUtn8w5y&confirm=t'

In [None]:
games_df = pd.read_csv(output, sep=';')


In [None]:
games_df.head()

In [None]:
games_df.shape

In [None]:
games_df.info()

In [None]:
games_df.isnull().sum()

In [None]:
games_df.describe()

Everything seems normal except for that minimum white rating, which will be inspected further

In [None]:
negative_rating = games_df["white_rating"]
negative_rating = negative_rating[negative_rating < 0]
print('Number of negative values:', len(negative_rating))
negative_rating.head()

There is another game with a negative value for a rating

In [None]:
negative_rating_indices = list(negative_rating.index)
negative_rating_info = games_df.iloc[list(negative_rating_indices),]
negative_rating_info.head()

Will check the original values of the game by fetching it with game ID

In [None]:
with open('./token') as f:

    token = f.read()
    token = token.strip()


session = berserk.TokenSession(token)

client = berserk.Client(session)

In [None]:
negative_rating_games = list(negative_rating_info["id"])
corrected_ratings = []
for g in negative_rating_games:
    game = client.games.export(g, as_pgn=True)
    print(game)
    game = game.split('\n')
    corrected_ratings.append(int(game[9][11:15]))
print(corrected_ratings)

In [None]:
games_positive_rtg = games_df
games_positive_rtg.loc[negative_rating_indices, 'white_rating'] = corrected_ratings
games_positive_rtg.loc[negative_rating_indices]

In [None]:
profile = ProfileReport(games_df,title="Games report")

profile.to_file("games_report.html")


In [None]:
#!env BROWSER=firefox
#!open games_report.html
from IPython.display import HTML

# show an HTML file inside the notebook
HTML(filename="games_report.html")

In [None]:
plt.hexbin(games_df['white_rating'], games_df['black_rating'], gridsize=20, cmap='viridis')
plt.colorbar(label="Number of games")
plt.xlabel("White rating")
plt.ylabel("Black rating")
plt.title("Player ratings heatmap")
plt.show()


In [None]:
# Average rating
avg_rating = games_df
games_df['avg_rating'] = (games_df['white_rating'] + games_df['black_rating']) / 2
plot_winner_by(games_df, 'avg_rating', title="Result of games by average rating", ylabel="Average rating")


In [None]:
rating_diff = games_df['white_rating'] - games_df['black_rating']
sns.histplot(rating_diff, kde=True, bins=50)
plt.xlabel("Rating difference (White - Black)")
plt.title("Distribution of rating differences")
plt.show()


Things to notice:

1.   There seem to be some duplicated instances
1.   There are 400 unique increment codes, which seems problematic to use for classification
1.   Winner classes are somewhat balanced, except for the amount of draws
2.   The number of draws in winner is higher than the number of draws in victory status, will need to check that












Duplicated instances

In [None]:
import matplotlib.pyplot as plt
from scipy import stats

game_ids = games_positive_rtg['id']
#duplicates = [i for i in game_ids if game_ids.count(i) > 1]
#print(duplicates)
print('Number of unique records:', len(games_positive_rtg['id'].unique()))
duplicate_counts = games_positive_rtg['id'].value_counts()
duplicate_ids = list(duplicate_counts[duplicate_counts > 1].index)
duplicate_counts = duplicate_counts[duplicate_counts > 1]
print('Total number of duplicated records:', sum(duplicate_counts))
print('Number of records duplicated:', len(duplicate_counts))
print('Duplicated ids:', duplicate_ids)

plot_distribution(duplicate_counts.values, 'hist', 'Number of Duplicates per Game ID', 'Amount of times duplicated', show_stats=False)


Out of the 20,058 records, 19113 are unique, but it is detecting only 813 replicates instead of 945

Repetition is mainly occuring in duplicates, although some of them are repeated 3-5 times

In [None]:
games_unique = games_positive_rtg.drop_duplicates(keep='first')
print(f"Original rows: {len(games_positive_rtg)}, After removing duplicates: {len(games_unique)}")

Trying to remove duplicates values only removes ~400 of them, so I'll inspect further

In [None]:
duplicate_sample = games_positive_rtg[games_positive_rtg['id'].isin(duplicate_ids[0:4])]
duplicate_sample.sort_values(by='id')

Some of the repeated instances have distinct values of "created_at" and "last_move_at", so I'll try removing it

In [None]:
games_time_dropped = games_positive_rtg.drop(columns=['created_at', 'last_move_at'])
#games_time_dropped = games_positive_rtg.drop('last_move_at', axis=1)

games_unique = games_time_dropped.drop_duplicates(keep='first').reset_index(drop=True)
print(f"Original rows: {len(games_time_dropped)}, After removing duplicates: {len(games_unique)}")

All duplicates removed

Convert increment codes

I'll try to handle the increment code in two ways:


1.   Separate time into minutes and time increment per move
2.   Classify each increment code into a time control



In [None]:
increment_code = games_unique['increment_code']
increment_code_split = [time.split('+') for time in increment_code]
print('Splitted increment codes:', increment_code_split)

#As minutes and increment
start_time = [int(minutes[0]) for minutes in increment_code_split]
print('Starting time in minutes:', start_time)

#bar_chart(list(games_unique.iloc()), start_time, 'Starting time per game ID')

increment = [int(seconds[1]) for seconds in increment_code_split]
print('Increment in seconds:', increment)



In [None]:
plot_distribution(start_time, 'hist', 'Starting time per game ID', 'Minutes', show_stats=True)

In [None]:
plot_distribution(increment, 'hist', 'Increment per game ID', 'Seconds', show_stats=True)

In [None]:
start_time_df = pd.DataFrame(start_time, columns=['start_time'])
increment_df = pd.DataFrame(increment, columns=['increment'])
#check if there are games with 0 < start time < 1
#games_unique
under_minute = ((start_time_df < 1) & (start_time_df > 0)).sum()
print('Games with less than 1 minute of start time:', under_minute.iloc[0])


Most games are finish (no increment) and have 10 minutes as start time, with no game starting with less than a minute

In [None]:
games_unique = pd.concat([games_unique, start_time_df], axis = 1)
games_unique = pd.concat([games_unique, increment_df], axis = 1)
games_unique.info()

In [None]:
plot_winner_by(games_unique, 'start_time', title="Result of games by start time", ylabel="Start time (minutes)")

In [None]:
plot_winner_by(games_unique, 'increment', title="Result of games by increment", ylabel="Increment (seconds)")


In [None]:
games_unique.info()

In [None]:
games_unique.head()

According to the data source, Lichess, time controls are decided assuming a game length of 40 moves and assigning the following categories depending on the duration:

    ≤ 29s = UltraBullet
    ≤ 179s = Bullet
    ≤ 479s = Blitz
    ≤ 1499s = Rapid
    ≥ 1500s = Classical

In [None]:
def set_time_control(minutes, increment):
    total_time = minutes*60+increment*40
    if total_time <= 29:
        return 'UltraBullet'
    elif total_time <= 179:
        return 'Bullet'
    elif total_time <= 479:
        return 'Blitz'
    elif total_time <= 1499:
        return 'Rapid'
    else:
        return 'Classical'

time_control = games_unique.apply(lambda x: set_time_control(x['start_time'], x['increment']), axis=1)
time_control_df = pd.DataFrame({'time_control': time_control})
time_control_df.info()


In [None]:
plot_distribution(time_control, "count",'Time control per game ID', 'Time control')

Too few blitz games to the point they are not even appreciated

In [None]:
blitz = [i for i in time_control if i == 'Blitz']
print('Number of blitz games:', len(blitz))

Very few blitz games

In [None]:
games_unique = pd.concat([games_unique, time_control_df], axis=1)
games_unique.info()

In [None]:
games_unique.head()

Accordint to the stats report, not all games with a winner status of draw have a winner value of draw

In [None]:
draws = games_unique[['victory_status','winner']]
draws = draws[games_unique['winner'] == 'draw']
not_draw = draws[draws['victory_status'] != 'draw']
plot_distribution(draws['victory_status'], 'count', 'Victory status of draws', 'Victory Status')

The alternative victory status  of drawed games is out of time, which makes sense since the game can result in a draw by insufficient winning material even when running out of time, so it is not a recording error

Finally, I will also add the rating difference as a feature to see if it is useful for the predictions

In [None]:
games_unique['rating_diff'] = games_unique['white_rating'] - games_unique['black_rating']

plt.figure(figsize=(6, 6))

# Violin plot with boxplot and points
sns.violinplot(y=games_unique['rating_diff'], inner=None, color="lightblue")  # violin
#sns.boxplot(y=games_unique['rating_diff'], width=0.1, color="white")          # boxplot

plt.title("Rating difference violing plot")
plt.ylabel("Rating difference")
plt.show()

In [None]:
# Categorical rating difference
plot_winner_by(games_unique, 'rating_diff', title="Result of games by higher rating player in rating")

In [None]:
from sklearn.model_selection import train_test_split

def define_train_test(df):
    #Defines predictors and class variable and returns the train and test datasets
    #If submission = True, it returns X and y without splitting since it will be used for training new dataset

    target = 'winner'
    y = df[target]
    X = df.drop(target, axis = 1)

    #80/20 split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

    print('Shape of Data (20%)')
    print("X_train shape : ", X_train.shape)
    print("y_train shape : ", y_train.shape)
    print("X_test shape : ", X_test.shape)
    print("y_test shape : ", y_test.shape)
    return X_train, X_test, y_train, y_test

games_preprocessed = games_unique.drop(['id',
                                        'turns',
                                        'increment_code',
                                        'victory_status',
                                        'white_id',
                                        'black_id',
                                        'moves',
                                        'opening_eco',
                                        'opening_name',
                                        'opening_ply'], axis = 1)
X_train, X_test, y_train, y_test = define_train_test(games_preprocessed)

In [None]:
games_preprocessed.info()
X_train.info()

In [None]:
train_increment = X_train.drop(columns=['time_control'])
test_increment = X_test.drop(columns=['time_control'])

In [None]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
all_labels = pd.concat([y_train, y_test]).astype(str).unique()
le.fit(all_labels)

y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)

y_train = y_train.astype(int)
y_test = y_test.astype(int)


In [None]:
import optuna
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
from sklearn.model_selection import cross_val_score
from optuna.samplers import TPESampler


def logging_callback(study, frozen_trial):
    previous_best_value = study.user_attrs.get("previous_best_value", None)
    if previous_best_value != study.best_value:
        study.set_user_attr("previous_best_value", study.best_value)
        print(
            "Trial {} finished with best value: {} and parameters: {}. ".format(
            frozen_trial.number,
            frozen_trial.value,
            frozen_trial.params,
            )
        )

def objective(trial):

    def rf_model(trial):
        #Objective function for bayesian hyperparameter optimization of a
        #Random Forest classifier using Optuna
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 50, 250),
            "max_depth": trial.suggest_int("max_depth", 4, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 10, 30),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf",  5, 20),
            "max_features": trial.suggest_int("max_features", 1,5),
            "bootstrap": trial.suggest_categorical("bootstrap", [True, False]),
            "random_state": 42

        }
        model = RandomForestClassifier(**params, n_jobs=-1)
        return model

    def lgbm_model(trial):
        #Objective function for bayesian hyperparameter optimization of a
        #Light Gradient-Boosting Machine using Optuna
        params = {
            "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1000),
            "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1000),
            "random_state": 42,
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        }
        model = LGBMClassifier(**params, verbose=-1)
        return model

    def xgb_model(trial):
        #Objective function for bayesian hyperparameter optimization of an
        #eXtreme Gradient Boostin classifier using Optuna
        params = {
            "reg_alpha": trial.suggest_float("reg_alpha", 0, 1000),
            "reg_lambda": trial.suggest_float("reg_lambda", 0, 1000),
            "subsample": trial.suggest_float("subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
            "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
            "eval_metric": "logloss",
            "random_state": 42
        }
        model = XGBClassifier(**params)
        return model

    # Select which model to use
    model_name = trial.suggest_categorical("model", ["RandomForest", "XGBoost", "Light Gradient-Boosting Machine"])

    if model_name == "RandomForest":
        model = rf_model(trial)
    elif model_name == "XGBoost":
        model = xgb_model(trial)
    else:
        model = lgbm_model(trial)

    # Evaluate with cross-validation
    recall_scorer = make_scorer(recall_score, average='micro')
    score = cross_val_score(model, train_increment, y_train, cv=5, scoring=recall_scorer).mean()
    return score


#Create optuna study
sampler = TPESampler(seed=10)
study_increment = optuna.create_study(direction="maximize", sampler = sampler)
#Optimize study
study_increment.optimize(objective, n_trials=200, callbacks = [logging_callback])
print(f"Best precision: {study_increment.best_value:.4f}")
best_params = study_increment.best_params.copy()
print(f"Best hyperparameters: {best_params}")


In [None]:
best_model_name = best_params.pop("model")

if best_model_name == "RandomForest":
    best_model = RandomForestClassifier(**best_params, random_state=42, n_jobs=-1)
elif best_model_name == "XGBoost":
    best_model = XGBClassifier(**best_params, eval_metric="logloss", random_state=42)
else:  # Light Gradient-Boosting Machine
    best_model = LGBMClassifier(**best_params, max_iter=1000, random_state=42)

best_model.fit(train_increment, y_train)
test_prec = best_model.score(test_increment, y_test)
print(f"Best model: {best_model_name}")
print(f"Test set precision: {test_prec:.4f}")

In [None]:

# Assuming best_model is already fitted (RandomForest, XGB, or LGBM)
importances = best_model.feature_importances_
features = train_increment.columns

#Normalize importance
importances = importances / importances.sum()

# Put into a DataFrame for easy sorting
feat_imp = pd.DataFrame({
    "Feature": features,
    "Importance": importances
}).sort_values(by="Importance", ascending=False)

# Plot
plt.figure(figsize=(8, 5))
plt.barh(feat_imp["Feature"], feat_imp["Importance"], color="skyblue")
plt.gca().invert_yaxis()  # Most important at the top
plt.xlabel("Importance")
plt.title("Feature Importance")
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

def plot_classification_metrics(y_true, y_pred, labels=None):
    """
    Plot confusion matrix and print classification metrics.
    """
    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels)

    fig, ax = plt.subplots(figsize=(6,6))
    disp.plot(cmap='Blues', ax=ax, colorbar=False)
    plt.title("Confusion Matrix")
    plt.show()

    # Classification report
    print("Classification report:\n")
    print(classification_report(y_true, y_pred, target_names=labels))
y_pred = best_model.predict(test_increment)
labels = ['white', 'draw', 'black']

# Decode
y_test_decoded = le.inverse_transform(y_test)
y_pred_decoded = le.inverse_transform(y_pred)

# Now it will match
plot_classification_metrics(y_test_decoded, y_pred_decoded, labels=['white','draw','black'])

plot_classification_metrics(y_test, y_pred, labels=labels)


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
import numpy as np


def plot_multiclass_roc(y_true, y_pred_proba, classes):
    """
    Plots ROC curves for multi-class classification.

    Parameters:
    - y_true: array-like of true labels (strings or numbers)
    - y_pred_proba: predicted probabilities (n_samples x n_classes)
    - classes: list of class names (strings) corresponding to y_pred_proba columns
    """
    # Encode y_true if necessary
    le = LabelEncoder()
    le.fit(classes)  # ensures classes order matches probabilities
    y_true_encoded = le.transform(y_true)
    classes_encoded = le.transform(classes)

    # Binarize labels for multi-class ROC
    y_true_bin = label_binarize(y_true_encoded, classes=classes_encoded)

    # Number of classes
    n_classes = len(classes)

    plt.figure(figsize=(8,6))

    for i in range(n_classes):
        fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred_proba[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, lw=2, label=f'{classes[i]} (AUC = {roc_auc:.2f})')

    plt.plot([0,1], [0,1], 'k--', lw=2)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Multi-class ROC Curve')
    plt.legend(loc='lower right')
    plt.show()

# --- Example usage ---
classes = ['white', 'draw', 'black']               # class names
y_pred_proba = best_model.predict_proba(test_increment)   # predicted probabilities
plot_multiclass_roc(y_test, y_pred_proba, classes)


In [None]:
from sklearn.metrics import precision_recall_curve, average_precision_score

def plot_precision_recall_curve(y_true, y_prob, classes):
    y_true_bin = label_binarize(y_true, classes=classes)
    n_classes = len(classes)

    plt.figure(figsize=(8,6))

    for i in range(n_classes):
        precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_prob[:, i])
        ap = average_precision_score(y_true_bin[:, i], y_prob[:, i])
        plt.plot(recall, precision, lw=2, label=f'{classes[i]} (AP = {ap:.2f})')

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.legend(loc='lower left')
    plt.show()
classes = ['white', 'draw', 'black']
y_pred_proba = best_model.predict_proba(test_increment)   # predicted probabilities
plot_precision_recall_curve(y_test, y_pred_proba, classes)

In [None]:
print(set(y_test) - set(y_train))
