In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import mnist

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Function to divide each image into blocks and extract centroids
def extract_centroids(image, num_blocks):
    block_size = image.shape[0] // num_blocks
    centroids = []
    for i in range(num_blocks):
        for j in range(num_blocks):
            block = image[i*block_size:(i+1)*block_size, j*block_size:(j+1)*block_size]
            centroid_x = np.mean(np.where(block == np.max(block))[0])
            centroid_y = np.mean(np.where(block == np.max(block))[1])
            centroids.append([centroid_x, centroid_y])
    return np.array(centroids).flatten()

# Number of blocks
num_blocks = 9

# Extract features using centroids
x_train_features = np.array([extract_centroids(image, num_blocks) for image in x_train])
x_test_features = np.array([extract_centroids(image, num_blocks) for image in x_test])

# Split the data into training and testing sets
x_train_split, x_val_split, y_train_split, y_val_split = train_test_split(x_train_features, y_train, test_size=0.3, random_state=42)

# Initialize and train a RandomForestClassifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(x_train_split, y_train_split)

# Predict on the validation set
y_pred = rf_classifier.predict(x_val_split)

# Calculate accuracy
accuracy = accuracy_score(y_val_split, y_pred)
print("Validation Accuracy:", accuracy)

# Evaluate on the test set
test_accuracy = accuracy_score(y_test, rf_classifier.predict(x_test_features))
print("Test Accuracy:", test_accuracy)

Validation Accuracy: 0.9262222222222222
Test Accuracy: 0.9255
