# **Activity 2: Classification and Clustering**

**Importing necessary libraries**

In [None]:
import os, time
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score,
                             confusion_matrix, classification_report, cohen_kappa_score)
from tensorflow import keras
from pyspark.sql import SparkSession
from pyspark.ml.classification import LogisticRegression
from pyspark.ml.linalg import Vectors
from pyspark.ml.evaluation import MulticlassClassificationEvaluator

**Week 1 Assignment**

**Downloading the two datasets**

In [None]:
digits = load_digits()
x_digits, y_digits = digits['data'], digits['target']

In [None]:
#MNIST dataset
(x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = keras.datasets.mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


**Flattening MNIST**

In [None]:
# Flatten MNIST
x_train_mnist_flattened = x_train_mnist.reshape(x_train_mnist.shape[0], -1)
x_test_mnist_flattened = x_test_mnist.reshape(x_test_mnist.shape[0], -1)

#shapes
print("x_train_mnist_flattened shape:", x_train_mnist_flattened.shape)
print("y_train_mnist shape:", y_train_mnist.shape)

x_train_mnist_flattened shape: (60000, 784)
y_train_mnist shape: (60000,)


**Splitting the training datasets in 85% for training and 15% for validation. MNIST is already split. So splitting only digits dataset.**


In [None]:
#85/15 split of digit
x_train_digits, x_test_digits, y_train_digits, y_test_digits = train_test_split(
    x_digits, y_digits, test_size=0.15, random_state=42
)

print("Digits - Training data shape:", x_train_digits.shape)
print("Digits - Test data shape:", x_test_digits.shape)

Digits - Training data shape: (1527, 64)
Digits - Test data shape: (270, 64)


# Training

**Training Random Forest on MNIST and digits dataset (scikit-learn)**

In [None]:
#RF on MNIST training
start_train = time.time()
rf_mnist = RandomForestClassifier(random_state=42)
rf_mnist.fit(x_train_mnist_flattened, y_train_mnist)
end_train = time.time()
train_time_mnist = end_train - start_train

#RF on Digits training
start_train = time.time()
rf_digits = RandomForestClassifier(random_state=42)
rf_digits.fit(x_train_digits, y_train_digits)
end_train = time.time()
train_time_digits = end_train - start_train

**Training Logistic Regression on MNIST and digits dataset (pyspark)**

In [None]:
#Initialize Spark session
spark = SparkSession.builder \
    .appName("MNIST_Digits") \
    .config("spark.executor.memory", "4g") \
    .config("spark.driver.memory", "4g") \
    .config("spark.executor.cores", "4") \
    .config("spark.sql.shuffle.partitions", "64") \
    .getOrCreate()

#MNIST training data to Spark DataFrame
train_data_mnist = spark.createDataFrame([
    (int(y), Vectors.dense(x)) for x, y in zip(x_train_mnist_flattened, y_train_mnist)
], ["label", "features"])

# Convert MNIST test data to Spark DataFrame
test_data_mnist = spark.createDataFrame([
    (int(y), Vectors.dense(x)) for x, y in zip(x_test_mnist_flattened, y_test_mnist)
], ["label", "features"])


#digits training data to Spark DataFrame
train_data_digits = spark.createDataFrame([
    (int(y), Vectors.dense(x)) for x, y in zip(x_train_digits, y_train_digits)
], ["label", "features"])

#digits test data to Spark DataFrame
test_data_digits = spark.createDataFrame([
    (int(y), Vectors.dense(x)) for x, y in zip(x_test_digits, y_test_digits)
], ["label", "features"])

# Train Logistic Regression on MNIST
start_train = time.time()
lr_mnist = LogisticRegression(maxIter=10, regParam=0.01)
lr_model_mnist = lr_mnist.fit(train_data_mnist)
end_train = time.time()
train_time_mnist_spark = end_train - start_train

# Train Logistic Regression on digits
start_train = time.time()
lr_digits = LogisticRegression(maxIter=10, regParam=0.01)
lr_model_digits = lr_digits.fit(train_data_digits)
end_train = time.time()
train_time_digits_spark = end_train - start_train

# Executing models for test data and showing output

**For random forest (scikit-learn)**

In [None]:
# Scikit-learn Random Forest Evaluation for both datasets

print("----- Scikit-learn Random Forest Evaluation -----\n")

#Evaluate RF for MNIST
print("Evaluating Scikit-learn Random Forest for MNIST...")
start_pred = time.time()
y_pred_mnist_rf = rf_mnist.predict(x_test_mnist_flattened)
end_pred = time.time()
pred_time_mnist_rf = end_pred - start_pred
print(f"Training time: {train_time_mnist:.4f} seconds")
print(f"Prediction time: {pred_time_mnist_rf:.4f} seconds")

#metrics for RF MNIST
accuracy_mnist_rf = accuracy_score(y_test_mnist, y_pred_mnist_rf)
precision_mnist_rf = precision_score(y_test_mnist, y_pred_mnist_rf, average='weighted')
recall_mnist_rf = recall_score(y_test_mnist, y_pred_mnist_rf, average='weighted')
f1_mnist_rf = f1_score(y_test_mnist, y_pred_mnist_rf, average='weighted')
kappa_mnist_rf = cohen_kappa_score(y_test_mnist, y_pred_mnist_rf)

print("\nBasic Classification Metrics:")
print(f"Accuracy: {accuracy_mnist_rf:.4f}")
print(f"Precision: {precision_mnist_rf:.4f}")
print(f"Recall: {recall_mnist_rf:.4f}")
print(f"F1 Score: {f1_mnist_rf:.4f}")
print(f"Cohen's Kappa: {kappa_mnist_rf:.4f}")

print("\nClassification Report:")
print(classification_report(y_test_mnist, y_pred_mnist_rf))

# Confusion Matrix for Scikit-learn RF MNIST
cm_mnist_rf = confusion_matrix(y_test_mnist, y_pred_mnist_rf)
print("\nConfusion Matrix")
print(cm_mnist_rf)
# plt.figure(figsize=(10, 8))
# sns.heatmap(cm_mnist_rf, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10))
# plt.title('Confusion Matrix - MNIST Random Forest (Scikit-learn)')
# plt.ylabel('True Label')
# plt.xlabel('Predicted Label')
# plt.tight_layout()
# plt.show()

#Evaluate RF for Digits
print("\nEvaluating Scikit-learn Random Forest for Digits...")
start_pred = time.time()
y_pred_digits_rf = rf_digits.predict(x_test_digits)
end_pred = time.time()
pred_time_digits_rf = end_pred - start_pred
print(f"Training time: {train_time_digits:.4f} seconds")
print(f"Prediction time: {pred_time_digits_rf:.4f} seconds")

#metrics for RF Digits
accuracy_digits_rf = accuracy_score(y_test_digits, y_pred_digits_rf)
precision_digits_rf = precision_score(y_test_digits, y_pred_digits_rf, average='weighted')
recall_digits_rf = recall_score(y_test_digits, y_pred_digits_rf, average='weighted')
f1_digits_rf = f1_score(y_test_digits, y_pred_digits_rf, average='weighted')
kappa_digits_rf = cohen_kappa_score(y_test_digits, y_pred_digits_rf)

print("\nBasic Classification Metrics:")
print(f"Accuracy: {accuracy_digits_rf:.4f}")
print(f"Precision: {precision_digits_rf:.4f}")
print(f"Recall: {recall_digits_rf:.4f}")
print(f"F1 Score: {f1_digits_rf:.4f}")
print(f"Cohen's Kappa: {kappa_digits_rf:.4f}")

print("\nClassification Report:")
print(classification_report(y_test_digits, y_pred_digits_rf))

# Confusion Matrix for Scikit-learn RF Digits
cm_digits_rf = confusion_matrix(y_test_digits, y_pred_digits_rf)
print("\nConfusion Matrix")
print(cm_digits_rf)
# plt.figure(figsize=(10, 8))
# sns.heatmap(cm_digits_rf, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10))
# plt.title('Confusion Matrix - Digits Random Forest (Scikit-learn)')
# plt.ylabel('True Label')
# plt.xlabel('Predicted Label')
# plt.tight_layout()
# plt.show()

# Save metrics for later comparison
rf_metrics = {
    'mnist': {
        'accuracy': accuracy_mnist_rf,
        'precision': precision_mnist_rf,
        'recall': recall_mnist_rf,
        'f1': f1_mnist_rf,
        'kappa': kappa_mnist_rf,
        'pred_time': pred_time_mnist_rf,
        'train_time': train_time_mnist
    },
    'digits': {
        'accuracy': accuracy_digits_rf,
        'precision': precision_digits_rf,
        'recall': recall_digits_rf,
        'f1': f1_digits_rf,
        'kappa': kappa_digits_rf,
        'pred_time': pred_time_digits_rf,
        'train_time': train_time_digits
    }
}

----- Scikit-learn Random Forest Evaluation -----

Evaluating Scikit-learn Random Forest for MNIST...
Training time: 43.3497 seconds
Prediction time: 0.4446 seconds

Basic Classification Metrics:
Accuracy: 0.9705
Precision: 0.9705
Recall: 0.9705
F1 Score: 0.9705
Cohen's Kappa: 0.9672

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.96      0.97      0.97      1032
           3       0.96      0.96      0.96      1010
           4       0.97      0.97      0.97       982
           5       0.98      0.96      0.97       892
           6       0.98      0.98      0.98       958
           7       0.97      0.96      0.97      1028
           8       0.96      0.95      0.96       974
           9       0.96      0.95      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.

**For logistic regression (pyspark)**

In [None]:
print("----- PySpark Logistic Regression Evaluation -----\n")

#PySpark LR for MNIST
print("Evaluating PySpark Logistic Regression for MNIST...")
start_pred = time.time()
predictions_mnist = lr_model_mnist.transform(test_data_mnist)
end_pred = time.time()
pred_time_mnist_spark = end_pred - start_pred
print(f"Training time: {train_time_mnist_spark:.4f} seconds")
print(f"Prediction time: {pred_time_mnist_spark:.4f} seconds")

#metrics for PySpark LR MNIST
pred_df_mnist = predictions_mnist.select("label", "prediction").toPandas()
y_true_mnist = pred_df_mnist["label"].values
y_pred_mnist = pred_df_mnist["prediction"].values

# PySpark evaluator for basic metrics
evaluator = MulticlassClassificationEvaluator()
accuracy_mnist_spark = evaluator.evaluate(predictions_mnist, {evaluator.metricName: "accuracy"})
precision_mnist_spark = evaluator.evaluate(predictions_mnist, {evaluator.metricName: "weightedPrecision"})
recall_mnist_spark = evaluator.evaluate(predictions_mnist, {evaluator.metricName: "weightedRecall"})
f1_mnist_spark = evaluator.evaluate(predictions_mnist, {evaluator.metricName: "f1"})
kappa_mnist_spark = cohen_kappa_score(y_true_mnist, y_pred_mnist)

print("\nBasic Classification Metrics:")
print(f"Accuracy: {accuracy_mnist_spark:.4f}")
print(f"Precision: {precision_mnist_spark:.4f}")
print(f"Recall: {recall_mnist_spark:.4f}")
print(f"F1 Score: {f1_mnist_spark:.4f}")
print(f"Cohen's Kappa: {kappa_mnist_spark:.4f}")

print("\nClassification Report:")
print(classification_report(y_true_mnist, y_pred_mnist))

cm_mnist_spark = confusion_matrix(y_true_mnist, y_pred_mnist)
print("\nConfusion Matrix")
print(cm_mnist_spark)
# plt.figure(figsize=(10, 8))
# sns.heatmap(cm_mnist_spark, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10))
# plt.title('Confusion Matrix - MNIST Logistic Regression (PySpark)')
# plt.ylabel('True Label')
# plt.xlabel('Predicted Label')
# plt.tight_layout()
# plt.show()

#Evaluate PySpark LR for Digits
print("\nEvaluating PySpark Logistic Regression for Digits...")
start_pred = time.time()
predictions_digits = lr_model_digits.transform(test_data_digits)
end_pred = time.time()
pred_time_digits_spark = end_pred - start_pred
print(f"Training time: {train_time_digits_spark:.4f} seconds")
print(f"Prediction time: {pred_time_digits_spark:.4f} seconds")

#metrics for PySpark LR Digits
pred_df_digits = predictions_digits.select("label", "prediction").toPandas()
y_true_digits = pred_df_digits["label"].values
y_pred_digits = pred_df_digits["prediction"].values

accuracy_digits_spark = evaluator.evaluate(predictions_digits, {evaluator.metricName: "accuracy"})
precision_digits_spark = evaluator.evaluate(predictions_digits, {evaluator.metricName: "weightedPrecision"})
recall_digits_spark = evaluator.evaluate(predictions_digits, {evaluator.metricName: "weightedRecall"})
f1_digits_spark = evaluator.evaluate(predictions_digits, {evaluator.metricName: "f1"})
kappa_digits_spark = cohen_kappa_score(y_true_digits, y_pred_digits)

# Print metrics for PySpark LR Digits
print("\nBasic Classification Metrics:")
print(f"Accuracy: {accuracy_digits_spark:.4f}")
print(f"Precision: {precision_digits_spark:.4f}")
print(f"Recall: {recall_digits_spark:.4f}")
print(f"F1 Score: {f1_digits_spark:.4f}")
print(f"Cohen's Kappa: {kappa_digits_spark:.4f}")

print("\nClassification Report:")
print(classification_report(y_true_digits, y_pred_digits))

# Confusion Matrix for PySpark LR Digits
cm_digits_spark = confusion_matrix(y_true_digits, y_pred_digits)
print("\nConfusion Matrix")
print(cm_digits_spark)
# plt.figure(figsize=(10, 8))
# sns.heatmap(cm_digits_spark, annot=True, fmt="d", cmap="Blues", xticklabels=range(10), yticklabels=range(10))
# plt.title('Confusion Matrix - Digits Logistic Regression (PySpark)')
# plt.ylabel('True Label')
# plt.xlabel('Predicted Label')
# plt.tight_layout()
# plt.show()

# Save metrics for later comparison
spark_metrics = {
    'mnist': {
        'accuracy': accuracy_mnist_spark,
        'precision': precision_mnist_spark,
        'recall': recall_mnist_spark,
        'f1': f1_mnist_spark,
        'kappa': kappa_mnist_spark,
        'pred_time': pred_time_mnist_spark,
        'train_time': train_time_mnist_spark
    },
    'digits': {
        'accuracy': accuracy_digits_spark,
        'precision': precision_digits_spark,
        'recall': recall_digits_spark,
        'f1': f1_digits_spark,
        'kappa': kappa_digits_spark,
        'pred_time': pred_time_digits_spark,
        'train_time': train_time_digits_spark
    }
}

spark.stop()

----- PySpark Logistic Regression Evaluation -----

Evaluating PySpark Logistic Regression for MNIST...
Training time: 42.1273 seconds
Prediction time: 0.0719 seconds

Basic Classification Metrics:
Accuracy: 0.9181
Precision: 0.9179
Recall: 0.9181
F1 Score: 0.9177
Cohen's Kappa: 0.9090

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.98      0.96       980
           1       0.94      0.97      0.96      1135
           2       0.94      0.89      0.91      1032
           3       0.91      0.91      0.91      1010
           4       0.91      0.94      0.92       982
           5       0.90      0.85      0.88       892
           6       0.92      0.96      0.94       958
           7       0.92      0.92      0.92      1028
           8       0.89      0.86      0.87       974
           9       0.90      0.90      0.90      1009

    accuracy                           0.92     10000
   macro avg       0.92      0.92      

# Evaluation and Comparison of all the models with classification metrics

In [None]:
# Overall Comparison df
print("----- Overall Model Comparison -----\n")

#metrics data
data = [
    {
        "Dataset": "MNIST",
        "Model": "PySpark LR",
        "Accuracy": spark_metrics['mnist']['accuracy'],
        "Precision": spark_metrics['mnist']['precision'],
        "Recall": spark_metrics['mnist']['recall'],
        "F1": spark_metrics['mnist']['f1'],
        "Kappa": spark_metrics['mnist']['kappa'],
        "Train Time": spark_metrics['mnist']['train_time'],
        "Pred Time": spark_metrics['mnist']['pred_time']
    },
    {
        "Dataset": "MNIST",
        "Model": "Scikit-learn RF",
        "Accuracy": rf_metrics['mnist']['accuracy'],
        "Precision": rf_metrics['mnist']['precision'],
        "Recall": rf_metrics['mnist']['recall'],
        "F1": rf_metrics['mnist']['f1'],
        "Kappa": rf_metrics['mnist']['kappa'],
        "Train Time": rf_metrics['mnist']['train_time'],
        "Pred Time": rf_metrics['mnist']['pred_time']
    },
    {
        "Dataset": "Digits",
        "Model": "PySpark LR",
        "Accuracy": spark_metrics['digits']['accuracy'],
        "Precision": spark_metrics['digits']['precision'],
        "Recall": spark_metrics['digits']['recall'],
        "F1": spark_metrics['digits']['f1'],
        "Kappa": spark_metrics['digits']['kappa'],
        "Train Time": spark_metrics['digits']['train_time'],
        "Pred Time": spark_metrics['digits']['pred_time']
    },
    {
        "Dataset": "Digits",
        "Model": "Scikit-learn RF",
        "Accuracy": rf_metrics['digits']['accuracy'],
        "Precision": rf_metrics['digits']['precision'],
        "Recall": rf_metrics['digits']['recall'],
        "F1": rf_metrics['digits']['f1'],
        "Kappa": rf_metrics['digits']['kappa'],
        "Train Time": rf_metrics['digits']['train_time'],
        "Pred Time": rf_metrics['digits']['pred_time']
    }
]

df = pd.DataFrame(data)
print(df)


print("\nSummary:")
best_accuracy_model = max([
    ("MNIST LR", spark_metrics['mnist']['accuracy']),
    ("MNIST RF", rf_metrics['mnist']['accuracy']),
    ("Digits LR", spark_metrics['digits']['accuracy']),
    ("Digits RF", rf_metrics['digits']['accuracy'])
], key=lambda x: x[1])

fastest_model = min([
    ("MNIST LR", spark_metrics['mnist']['pred_time']),
    ("MNIST RF", rf_metrics['mnist']['pred_time']),
    ("Digits LR", spark_metrics['digits']['pred_time']),
    ("Digits RF", rf_metrics['digits']['pred_time'])
], key=lambda x: x[1])

print(f"- Best accuracy achieved by {best_accuracy_model[0]} with {best_accuracy_model[1]:.4f}")
print(f"- Fastest prediction time achieved by {fastest_model[0]} with {fastest_model[1]:.4f} seconds")

# MNIST dataset comparison
print("\nOn MNIST dataset:")
if spark_metrics['mnist']['accuracy'] > rf_metrics['mnist']['accuracy']:
    diff = spark_metrics['mnist']['accuracy'] - rf_metrics['mnist']['accuracy']
    print(f"- Logistic Regression outperforms Random Forest by {diff:.4f} in accuracy")
else:
    diff = rf_metrics['mnist']['accuracy'] - spark_metrics['mnist']['accuracy']
    print(f"- Random Forest outperforms Logistic Regression by {diff:.4f} in accuracy")

# Digits dataset comparison
print("\nOn Digits dataset:")
if spark_metrics['digits']['accuracy'] > rf_metrics['digits']['accuracy']:
    diff = spark_metrics['digits']['accuracy'] - rf_metrics['digits']['accuracy']
    print(f"- Logistic Regression outperforms Random Forest by {diff:.4f} in accuracy")
else:
    diff = rf_metrics['digits']['accuracy'] - spark_metrics['digits']['accuracy']
    print(f"- Random Forest outperforms Logistic Regression by {diff:.4f} in accuracy")

----- Overall Model Comparison -----

  Dataset            Model  Accuracy  Precision    Recall        F1     Kappa  \
0   MNIST       PySpark LR  0.918100   0.917884  0.918100  0.917728  0.908958   
1   MNIST  Scikit-learn RF  0.970500   0.970496  0.970500  0.970472  0.967209   
2  Digits       PySpark LR  0.955556   0.957099  0.955556  0.955670  0.950333   
3  Digits  Scikit-learn RF  0.970370   0.971549  0.970370  0.970234  0.966891   

   Train Time  Pred Time  
0   42.127262   0.071903  
1   43.349715   0.444631  
2    1.792782   0.098609  
3    0.365317   0.011049  

Summary:
- Best accuracy achieved by MNIST RF with 0.9705
- Fastest prediction time achieved by Digits RF with 0.0110 seconds

On MNIST dataset:
- Random Forest outperforms Logistic Regression by 0.0524 in accuracy

On Digits dataset:
- Random Forest outperforms Logistic Regression by 0.0148 in accuracy


# Optional- Fine tuning hyper parameters (only for scikit-learn)

In [None]:
param_grid_rf = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20],
    'max_features': ['sqrt', 'log2'],
}

In [None]:
rf = RandomForestClassifier(random_state=42)

#GridSearchCV with 10-fold cross-validation
grid_search_rf = GridSearchCV(
    estimator=rf,
    param_grid=param_grid_rf,
    cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=42),
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)


# Fit GridSearchCV on MNIST training data
grid_search_rf.fit(x_train_mnist_flattened, y_train_mnist)
print("-------------For MNIST Dataset--------------")
# Best parameters and cross-validation score
print("Random Forest - Best Parameters:", grid_search_rf.best_params_)
print("Random Forest - Best Cross-Validation Accuracy:", grid_search_rf.best_score_)

# Evaluate on MNIST test data
y_test_pred_rf = grid_search_rf.predict(x_test_mnist_flattened)
print("Random Forest - Test Accuracy:", accuracy_score(y_test_mnist, y_test_pred_rf))
print("Random Forest - Classification Report:")
print(classification_report(y_test_mnist, y_test_pred_rf))

#Now on the digits dataset
grid_search_rf_digits = GridSearchCV(
    estimator=rf,
    param_grid=param_grid_rf,
    cv=StratifiedKFold(n_splits=10, shuffle=True, random_state=42),
    scoring='accuracy',
    n_jobs=-1,
    verbose=2
)

# Fit GridSearchCV on digits training data
grid_search_rf_digits.fit(x_train_digits, y_train_digits)
print("-------------For Digits Dataset--------------")
# Best parameters and cross-validation score for digits dataset
print("Random Forest (Digits) - Best Parameters:", grid_search_rf_digits.best_params_)
print("Random Forest (Digits) - Best Cross-Validation Accuracy:", grid_search_rf_digits.best_score_)

# Evaluate on digits test data
y_test_pred_rf_digits = grid_search_rf_digits.predict(x_test_digits)
print("Random Forest (Digits) - Test Accuracy:", accuracy_score(y_test_digits, y_test_pred_rf_digits))
print("Random Forest (Digits) - Classification Report:")
print(classification_report(y_test_digits, y_test_pred_rf_digits))

Fitting 10 folds for each of 12 candidates, totalling 120 fits




-------------For MNIST Dataset--------------
Random Forest - Best Parameters: {'max_depth': None, 'max_features': 'sqrt', 'n_estimators': 200}
Random Forest - Best Cross-Validation Accuracy: 0.9693166666666666
Random Forest - Test Accuracy: 0.9707
Random Forest - Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       980
           1       0.99      0.99      0.99      1135
           2       0.96      0.97      0.97      1032
           3       0.96      0.96      0.96      1010
           4       0.98      0.97      0.98       982
           5       0.97      0.96      0.97       892
           6       0.98      0.98      0.98       958
           7       0.97      0.97      0.97      1028
           8       0.96      0.95      0.96       974
           9       0.96      0.95      0.96      1009

    accuracy                           0.97     10000
   macro avg       0.97      0.97      0.97     10000
weighted 