In [43]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import OrdinalEncoder
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, classification_report
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix, balanced_accuracy_score
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from collections import Counter
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
import xgboost as xgb
import re

In [27]:
# Load cleaned data
drive.mount('/content/drive')
path = "/content/drive/MyDrive/Colab Notebooks/Pokémon Data Mining/clean_pokemon_data.csv"
pokemon_data= pd.read_csv(path, index_col=0)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Data Preparation**

In [28]:
# Separate the data into categorical columns
cat_cols = pokemon_data.select_dtypes(exclude=[np.number]).columns.tolist()

# # Initialize the ordinal encoder
# encoder = OrdinalEncoder()

# # Fit and transform the data
# data_encoded = encoder.fit_transform(pokemon_data[cat_cols])

# # Initialize the imputer
# imputer = IterativeImputer(max_iter=10, random_state=0)

# # Fit and transform the data
# data_imputed = imputer.fit_transform(data_encoded)

# # Inverse transform the ordinal encoder to get the original categories
# data_imputed_cat = encoder.inverse_transform(data_imputed)

# # Update dataframe
# pokemon_data[cat_cols] = data_imputed_cat


In [29]:
print(pokemon_data.isnull().sum())

name                     0
german_name              0
japanese_name            0
generation               0
status                   0
species                  0
type_number              0
type_1                   0
type_2                   0
height_m                 0
weight_kg                0
abilities_number         0
ability_1                0
ability_2                0
ability_hidden           0
total_points             0
hp                       0
attack                   0
defense                  0
sp_attack                0
sp_defense               0
speed                    0
catch_rate               0
base_friendship          0
base_experience          0
growth_rate              0
egg_type_number          0
egg_type_1               0
egg_type_2               0
percentage_male          0
egg_cycles               0
against_normal           0
against_fire             0
against_water            0
against_electric         0
against_grass            0
against_ice              0
a


The decision to keep the 'Unknown' category in the dataset, specifically for the type_2 and abilities columns, is because of the potential bias that might have been introduced through the MICE and Ordinal Encoder imputation methods. MICE might introduce bias because it would generate values based on the distribution of known values. This means that if one category was particularly dominant in the existing data, MICE would most likely fill missing values with this dominant category. The ordinal encoder could also introduce bias because it imposes an order on the categories, meaning that the model may interpret the categories as having an ordered relationship that doesn't actually exist. Keeping 'Unknown' as a category for missing values has its own potential bias. It assumes that the missing data is completely at random and does not depend on any other feature in the dataset. However, this might not always be true. It's also possible that we're losing potentially valuable information that could have been inferred from other columns. Additionally, some machine learning models may interpret 'Unknown' as a distinct category, which could influence the model in unintended ways.
Considering the current dataset and the Pokémon universe's complexity, it was decided that the 'Unknown' category was the least biased way to handle missing values.

In [30]:
# Separate target variable
y = pokemon_data['status']
pokemon_data.drop('status', axis=1, inplace=True)
# print(pokemon_data.columns)

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(pokemon_data, y, test_size=0.2, random_state=42)

# List of categorical columns
cat_cols = X_train.select_dtypes(include=[object]).columns.tolist()

# One-hot encoding for categorical columns
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
encoder.fit(X_train[cat_cols])

# Transform both training and testing data
encoded_cols_train = encoder.transform(X_train[cat_cols])
encoded_cols_test = encoder.transform(X_test[cat_cols])

# Create dataframes from the encoded columns
encoded_df_train = pd.DataFrame(encoded_cols_train, columns = encoder.get_feature_names_out(cat_cols))
encoded_df_test = pd.DataFrame(encoded_cols_test, columns = encoder.get_feature_names_out(cat_cols))

# Drop original categorical columns
X_train.drop(cat_cols, axis=1, inplace=True)
X_test.drop(cat_cols, axis=1, inplace=True)

# Reset index for the encoded dataframes
encoded_df_train = encoded_df_train.reset_index(drop=True)
encoded_df_test = encoded_df_test.reset_index(drop=True)

# print("Duplicate columns in X_train: ", X_train.columns[X_train.columns.duplicated()])
# print("Duplicate columns in encoded_df_train: ", encoded_df_train.columns[encoded_df_train.columns.duplicated()])

# print("Shape of X_train: ", X_train.shape)
# print("Shape of encoded_df_train: ", encoded_df_train.shape)
# print("Shape of X_test: ", X_test.shape)
# print("Shape of encoded_df_test: ", encoded_df_test.shape)

X_train = X_train.reset_index(drop=True)
encoded_df_train = encoded_df_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
encoded_df_test = encoded_df_test.reset_index(drop=True)


# Add the encoded columns
X_train = pd.concat([X_train, encoded_df_train], axis=1)
X_test = pd.concat([X_test, encoded_df_test], axis=1)



In the snippet above, the target variable, 'status', is separated from the main dataset and stored as 'y'. All categorical columns in the dataset are then identified. One-hot encoding is then employed to transform these categorical columns, a crucial process that facilitates more efficient interpretation of categorical data by machine learning algorithms. Unique names are generated for these newly encoded columns, and the transformed data is stored in a new DataFrame, 'encoded_df'. Following this transformation, the original categorical columns are removed from the dataset, and the indices for both the original and encoded DataFrames are reset to ensure consistency. The original DataFrame (with the dropped categorical columns) and the 'encoded_df' are then combined into a single DataFrame. Lastly, the data is split into training and test sets, with 80% allocated for training and 20% for testing. This allows for training and subsequent evaluation of the machine learning model's performance.

In [31]:
# # Instantiate the scalers
# scaler = StandardScaler()  # or MinMaxScaler()

# # Scale all the numerical columns

# # List of numerical columns
# num_cols = pokemon_data.select_dtypes(include=[np.number]).columns.tolist()

# # Fit and transform the data
# pokemon_data[num_cols] = scaler.fit_transform(pokemon_data[num_cols])

# Instantiate the scalers
scaler = StandardScaler()

# List of numerical columns
num_cols_train = X_train.select_dtypes(include=[np.number]).columns.tolist()
num_cols_test = X_test.select_dtypes(include=[np.number]).columns.tolist()

# Fit and transform the training data
X_train[num_cols_train] = scaler.fit_transform(X_train[num_cols_train])

# Transform the test data
X_test[num_cols_test] = scaler.transform(X_test[num_cols_test])

In [32]:
# Initialize the model
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on training data
rf.fit(X_train, y_train)

# Get feature importances
importances = rf.feature_importances_

# Get the indices of the features sorted by importance
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")

for f in range(X_train.shape[1]):
    print("%d. feature %s (%f)" % (f + 1, X_train.columns[indices[f]], importances[indices[f]]))

# Select a subset of data based on the feature importances
selected_features = X_train.columns[indices[:10]]  # top 10 features

X_train_selected = X_train[selected_features]


Feature ranking:
1. feature egg_cycles (0.061646)
2. feature egg_type_1_Undiscovered (0.059287)
3. feature individual_points_sum (0.057876)
4. feature total_points (0.052460)
5. feature catch_rate (0.045452)
6. feature base_experience (0.038832)
7. feature total_points_bins_(555.0, 586.667] (0.037379)
8. feature base_friendship (0.035431)
9. feature percentage_male (0.027527)
10. feature total_points_bins_(650.0, 681.667] (0.023591)
11. feature height_m (0.022422)
12. feature growth_rate_Slow (0.020978)
13. feature hp (0.020559)
14. feature weight_kg (0.019682)
15. feature total_points_bins_(586.667, 618.333] (0.019451)
16. feature sp_attack (0.019355)
17. feature ability_hidden_Unknown (0.017171)
18. feature abilities_number (0.016499)
19. feature speed (0.014530)
20. feature defense (0.011111)
21. feature sp_defense (0.010050)
22. feature attack (0.009643)
23. feature generation (0.007868)
24. feature ability_2_Unknown (0.007005)
25. feature against_ground (0.006291)
26. feature agai

In [33]:
# Initialize the model
rf = RandomForestClassifier(n_estimators=100, random_state=42)

# Perform 5-fold cross validation
scores = cross_val_score(rf, X_train_selected, y_train, cv=5)

print("Cross-validation scores:", scores)
print("Average cross-validation score:", scores.mean())


Cross-validation scores: [0.96428571 0.98802395 0.98802395 0.9760479  0.9760479 ]
Average cross-validation score: 0.9784858853721129


The average cross-validation score is approximately 0.978 or 97.8%, which indicates that the model, on average, correctly classified about 97.8% of the instances across all folds. This is a high accuracy score and suggests that the model is performing quite well.

However, there seems to be some variance in the cross-validation scores (ranging from 0.988 to 0.964, or approximately 98.8% to 96.4%). This might suggest that the model's performance is somewhat dependent on the particular subset of the data it is trained on, which may be a sign of overfitting. As a result, it may perform poorly on unseen data.

To address this, we can consider strategies such as adding regularization to the model or pruning (for decision trees). But given that even the lowest score is still relatively high, the model seems to be performing well overall as of now.

In [34]:
# Create the classifier
clf = SVC()

# Generate cross-validated estimates for each input data point
y_train_pred = cross_val_predict(clf, X_train, y_train, cv=5)

# Calculate metrics
precision = precision_score(y_train, y_train_pred, average='weighted')  
recall = recall_score(y_train, y_train_pred, average='weighted')  
f1 = f1_score(y_train, y_train_pred, average='weighted') 

print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)


Precision:  0.7708671962638218
Recall:  0.8779904306220095
F1 Score:  0.8209490141102611


  _warn_prf(average, modifier, msg_start, len(result))


In [35]:
# Fit the classifier to the training data
clf.fit(X_train, y_train)

# Predict the classes on the test set
y_pred = clf.predict(X_test)

# Generate confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(cm)

# Compute balanced accuracy
balanced_acc = balanced_accuracy_score(y_test, y_pred)
print("Balanced Accuracy: ", balanced_acc)

# Compute weighted metrics
weighted_precision = precision_score(y_test, y_pred, average='weighted')
weighted_recall = recall_score(y_test, y_pred, average='weighted')
weighted_f1 = f1_score(y_test, y_pred, average='weighted')

print("Weighted Precision: ", weighted_precision)
print("Weighted Recall: ", weighted_recall)
print("Weighted F1 Score: ", weighted_f1)


Confusion Matrix:
[[  1   0   9   0]
 [  0   0   4   0]
 [  0   0 184   0]
 [  0   0  11   0]]
Balanced Accuracy:  0.275
Weighted Precision:  0.826647037173353
Weighted Recall:  0.8851674641148325
Weighted F1 Score:  0.8351812234245589


  _warn_prf(average, modifier, msg_start, len(result))


The confusion matrix shows the number of correct and incorrect predictions made by the classifier, broken down by each class. From the matrix, it appears that the model is only correctly predicting the class that corresponds to "184". All other classes were predicted as "0". This suggests that the model is heavily biased towards the majority class.

The Balanced Accuracy is 0.275, which is quite low. It indicates that the model is not accurately predicting all the classes, which we can clearly see from the confusion matrix.

In terms of the weighted metrics, they take into account the imbalance in the classes by computing the average metric in which each class’s contribution is weighted by its presence in the true data sample.

The Weighted Precision is 0.827, Weighted Recall is 0.885, and Weighted F1 Score is 0.835. These scores are higher than the Balanced Accuracy because they are influenced more by the majority class (the class with "184" samples) and less by the minority classes (the classes with "9", "4", and "11" samples). However, these scores should be interpreted carefully considering the heavy class imbalance as indicated by the confusion matrix.

This result suggests that, while the model may be quite accurate overall, it performs poorly for minority classes. In a real-world setting, this could be a major issue, especially if correct predictions for all classes are equally important.

We could consider using techniques specifically designed for imbalanced datasets to improve the model's performance across all classes. Some of these techniques include oversampling the minority class, undersampling the majority class, or using a combination of both (SMOTE). Some models also allow for adjusting class weights to place a higher penalty on misclassifying minority classes.

A warning was raised mentioning that precision is ill-defined and being set to 0.0 in labels with no predicted samples. This is because there are classes that the model did not predict at all, which leads to a precision of 0.0 for those classes. This warning is consistent with the model's observed bias towards the majority class.

In [36]:
# Create SMOTE object
smote = SMOTE(random_state=42)

# Fit and resample the training data only
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Now you can see the class distribution in the training set
print('Resampled dataset shape %s' % Counter(y_train_resampled))

# Then proceed to train your model using the resampled training data
clf.fit(X_train_resampled, y_train_resampled)

# Predict the classes on the original (non-resampled) test set
y_pred = clf.predict(X_test)


Resampled dataset shape Counter({'Normal': 734, 'Legendary': 734, 'Sub Legendary': 734, 'Mythical': 734})


The SMOTE (Synthetic Minority Over-sampling Technique) algorithm has been used to resample the training data in order to address the issue of class imbalance(the classes are not represented equally)

After applying SMOTE, the Counter function shows the distribution of the classes in the training dataset. It shows that all the classes now have the same number of samples (734 in this case), meaning that the class imbalance has been successfully addressed.

SMOTE achieves this by creating "synthetic" examples of the minority class. It operates by finding instances of the minority class that are close together in the feature space, drawing lines between these instances, and then creating new instances along these lines. The goal is to increase the number of minority class instances in a way that is plausible given the available data.

It's important to note that SMOTE only balances the classes in the training set. The original distribution of classes in the test set is preserved. This is because the test set should reflect the real-world distribution of classes as closely as possible to give an accurate estimate of model performance.

In [37]:
# Compute Precision, Recall, F1 Score 
precision = precision_score(y_test, y_pred, average='macro')  
recall = recall_score(y_test, y_pred, average='macro')  
f1 = f1_score(y_test, y_pred, average='macro')  

print("Precision: ", precision)
print("Recall: ", recall)
print("F1 Score: ", f1)

# Compute Area Under the Receiver Operating Characteristic Curve (ROC AUC) from prediction scores

# Binarize the output
lb = LabelBinarizer()
y_test_bin = lb.fit_transform(y_test)
y_pred_bin = lb.transform(y_pred)

roc_auc = roc_auc_score(y_test_bin, y_pred_bin, multi_class='ovr')
print("ROC AUC Score: ", roc_auc)

# Also generate a classification report for a detailed performance analysis for each class
print(classification_report(y_test, y_pred))

Precision:  0.9656084656084656
Recall:  0.8193181818181818
F1 Score:  0.8819871394303453
ROC AUC Score:  0.8840309502055733
               precision    recall  f1-score   support

    Legendary       0.89      0.80      0.84        10
     Mythical       1.00      0.75      0.86         4
       Normal       0.97      1.00      0.99       184
Sub Legendary       1.00      0.73      0.84        11

     accuracy                           0.97       209
    macro avg       0.97      0.82      0.88       209
 weighted avg       0.97      0.97      0.97       209



The macro-average precision of the model is approximately 0.97, which is very good - it means that on average, when the model predicts a class, it is correct about 97% of the time across all classes.

The macro-average recall is about 0.82. This suggests that the model is quite good at detecting the positive instances. However, we can see from the classification report that it often fails to correctly identify the 'Legendary', 'Mythical', and 'Sub-Legendary' classes with high recall.

F1 Score is the harmonic mean of Precision and Recall and tries to balance the two. The macro-average F1 Score is 0.88, suggesting that the model is well balanced in terms of precision and recall.

The ROC AUC Score stands for Receiver Operating Characteristic Area Under the Curve. It provides an aggregate measure of performance across all possible classification thresholds. The ROC AUC score is approximately 0.88, which is quite high. This suggests the model's ability to distinguish between the classes is better than random.

The classification report shows that the model performs exceptionally well on the "Normal" class (high precision and recall) but not as well on the other classes (recall is lower for 'Legendary', 'Mythical', and 'Sub-Legendary'). This reflects the imbalance in the data (the model is good at predicting the majority class but not the minority classes).

While the precision is higher and the recall is more realistic after SMOTE, the overall performance on the minority classes (Legendary, Mythical, Sub Legendary) is still not as good as for the 'Normal' class. The ROC AUC score is also not as high as it could be, suggesting the model might not be perfectly calibrated.

Even though SMOTE has helped the model to recognize some minority class instances (as evidenced by the non-zero precision for 'Legendary', 'Mythical', and 'Sub Legendary' classes), there is still room for improvement. Different resampling strategies or different modeling techniques could be considered. Also, depending on the business context, it might be worth focusing on different metrics (like recall for 'Legendary' class if those instances are particularly important).

In [38]:

def clean_column_names(df):
    df.columns = df.columns.str.strip()  # Remove leading/trailing whitespace
    df.columns = df.columns.str.replace('[^\w\s]', '_')  # Replace non-alphanumeric characters with underscore
    df.columns = df.columns.str.replace('\s+', '_')  # Replace whitespace with underscore
    df.columns = df.columns.str.replace('__+', '_')  # Replace multiple underscores with single underscore
    df.columns = df.columns.str.lower()  # Convert to lowercase
    return df



In [39]:
# def rename_duplicates(df):
#     cols=pd.Series(df.columns)
#     for dup in cols[cols.duplicated(keep=False)].unique(): 
#         count = df.filter(like=dup).shape[1]
#         new_names = [dup + '_' + str(i) if i != 0 else dup for i in range(count)]
#         df.rename(columns=dict(zip(df.filter(like=dup).columns, new_names)), inplace=True)
#     return df

def rename_duplicates(df):
    cols = pd.Series(df.columns)
    for dup in cols[cols.duplicated()].unique(): 
        dup_count = sum(cols == dup)
        # Create new names for duplicates
        new_names = [dup + '_' + str(i) if i != 0 else dup for i in range(dup_count)]
        # Rename duplicates
        cols[cols == dup] = new_names
    df.columns = cols
    return df


def check_duplicates(df):
    return df.columns.duplicated().sum()

# Checking the number of duplicate columns before removing
print("Number of duplicate columns before: ", check_duplicates(X_train_resampled))



Number of duplicate columns before:  0


In [40]:

# Clean the column names in your DataFrame
X_train_resampled = clean_column_names(X_train_resampled)

# Apply the rename function to your data
X_train_resampled = rename_duplicates(X_train_resampled)

# Make sure the changes are applied to the test set too
X_test = clean_column_names(X_test)
X_test = rename_duplicates(X_test)



# Checking the number of duplicate columns after the operations
print("Number of duplicate columns after: ", check_duplicates(X_train_resampled))


Number of duplicate columns after:  0


  df.columns = df.columns.str.replace('[^\w\s]', '_')  # Replace non-alphanumeric characters with underscore
  df.columns = df.columns.str.replace('\s+', '_')  # Replace whitespace with underscore
  df.columns = df.columns.str.replace('__+', '_')  # Replace multiple underscores with single underscore
  df.columns = df.columns.str.replace('[^\w\s]', '_')  # Replace non-alphanumeric characters with underscore
  df.columns = df.columns.str.replace('\s+', '_')  # Replace whitespace with underscore
  df.columns = df.columns.str.replace('__+', '_')  # Replace multiple underscores with single underscore


# **Building classification model**

In [41]:
# Instantiate the LightGBM classifier
lgbm = lgb.LGBMClassifier(random_state=42)

# Fit the model to the training data
lgbm.fit(X_train_resampled, y_train_resampled)

# Predict the classes on the test set
y_pred_lgbm = lgbm.predict(X_test)

# Compute metrics
precision_lgbm = precision_score(y_test, y_pred_lgbm, average='macro')
recall_lgbm = recall_score(y_test, y_pred_lgbm, average='macro')
f1_lgbm = f1_score(y_test, y_pred_lgbm, average='macro')
roc_auc_lgbm = roc_auc_score(y_test_bin, lb.transform(y_pred_lgbm), multi_class='ovr')

print("Precision (LightGBM): ", precision_lgbm)
print("Recall (LightGBM): ", recall_lgbm)
print("F1 Score (LightGBM): ", f1_lgbm)
print("ROC AUC Score (LightGBM): ", roc_auc_lgbm)
print(classification_report(y_test, y_pred_lgbm))



Precision (LightGBM):  0.8494949494949495
Recall (LightGBM):  0.8647727272727272
F1 Score (LightGBM):  0.8544657097288676
ROC AUC Score (LightGBM):  0.929907397606411
               precision    recall  f1-score   support

    Legendary       0.89      0.80      0.84        10
     Mythical       0.60      0.75      0.67         4
       Normal       1.00      1.00      1.00       184
Sub Legendary       0.91      0.91      0.91        11

     accuracy                           0.98       209
    macro avg       0.85      0.86      0.85       209
 weighted avg       0.98      0.98      0.98       209



In [42]:
# Initialize LabelEncoder and LabelBinarizer
le = LabelEncoder()

# Use LabelEncoder to transform the classes to integers for model training
y_train_encoded = le.fit_transform(y_train_resampled)
y_test_encoded = le.transform(y_test)

# Initialize a new LabelBinarizer
lb = LabelBinarizer()

# Use LabelBinarizer to transform the classes to binary matrix for ROC AUC score
y_train_bin = lb.fit_transform(y_train_encoded)
y_test_bin = lb.transform(y_test_encoded)

# Instantiate the XGBoost classifier
xgbc = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)

# Fit the model to the training data
xgbc.fit(X_train_resampled, y_train_encoded)

# Predict the classes on the test set
y_pred_xgb = xgbc.predict(X_test)

# Binarize the predictions
y_pred_xgb_bin = lb.transform(y_pred_xgb)  # We use LabelBinarizer here directly on the predicted classes

# Compute metrics
precision_xgb = precision_score(y_test_encoded, y_pred_xgb, average='macro')
recall_xgb = recall_score(y_test_encoded, y_pred_xgb, average='macro')
f1_xgb = f1_score(y_test_encoded, y_pred_xgb, average='macro')
roc_auc_xgb = roc_auc_score(y_test_bin, y_pred_xgb_bin, multi_class='ovr')

print("Precision (XGBoost): ", precision_xgb)
print("Recall (XGBoost): ", recall_xgb)
print("F1 Score (XGBoost): ", f1_xgb)
print("ROC AUC Score (XGBoost): ", roc_auc_xgb)
print(classification_report(y_test_encoded, y_pred_xgb))




Precision (XGBoost):  0.8680555555555556
Recall (XGBoost):  0.8647727272727272
F1 Score (XGBoost):  0.8654176201372997
ROC AUC Score (XGBoost):  0.9298858405726589
              precision    recall  f1-score   support

           0       0.89      0.80      0.84        10
           1       0.75      0.75      0.75         4
           2       1.00      1.00      1.00       184
           3       0.83      0.91      0.87        11

    accuracy                           0.98       209
   macro avg       0.87      0.86      0.87       209
weighted avg       0.98      0.98      0.98       209



In [47]:
print(X_train_resampled.shape)
print(y_train_encoded.shape)

# Define a pipeline that first applies SMOTE and then fits the XGBoost model
pipeline = Pipeline([
    ('SMOTE', SMOTE(random_state=42)),
    ('XGBoost', xgbc)  # or lgbm
])

# Perform cross-validation on the pipeline
scores = cross_val_score(pipeline, X_train_resampled, y_train_encoded, cv=5, scoring='accuracy')

print(f"Cross-validation accuracy: {scores.mean():.2f} (+/- {scores.std() * 2:.2f})")


(2936, 3491)
(2936,)




Cross-validation accuracy: 0.99 (+/- 0.02)
