In [1]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import fashion_mnist

# Load the Fashion-MNIST dataset
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

# Split the full training set into a smaller training set and a validation set
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.2, random_state=42)

# Print the shape of each set
print("Training set: ", X_train.shape, y_train.shape)
print("Validation set: ", X_valid.shape, y_valid.shape)
print("Test set: ", X_test.shape, y_test.shape)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
Training set:  (48000, 28, 28) (48000,)
Validation set:  (12000, 28, 28) (12000,)
Test set:  (10000, 28, 28) (10000,)


In [2]:
from sklearn.ensemble import RandomForestClassifier
import time

# Create a Random Forest classifier
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier and time it
start_time = time.time()
rf_clf.fit(X_train.reshape((X_train.shape[0], -1)), y_train)
end_time = time.time()

# Print the training time
print("Training time: {:.2f} seconds".format(end_time - start_time))

# Evaluate the classifier on the test set
score = rf_clf.score(X_test.reshape((X_test.shape[0], -1)), y_test)
print("Test set accuracy: {:.2f}".format(score))


Training time: 42.90 seconds
Test set accuracy: 0.87


In [3]:
from sklearn.decomposition import PCA

# Reshape the training set to 2D array
X_train_2d = X_train.reshape((X_train.shape[0], -1))

# Create a PCA transformer
pca = PCA(n_components=0.95, random_state=42)

# Fit the transformer to the training set
pca.fit(X_train_2d)

# Apply the transformer to the training set and the test set
X_train_pca = pca.transform(X_train_2d)
X_valid_pca = pca.transform(X_valid.reshape((X_valid.shape[0], -1)))
X_test_pca = pca.transform(X_test.reshape((X_test.shape[0], -1)))

# Print the shape of the transformed sets
print("Transformed training set: ", X_train_pca.shape)
print("Transformed validation set: ", X_valid_pca.shape)
print("Transformed test set: ", X_test_pca.shape)


Transformed training set:  (48000, 187)
Transformed validation set:  (12000, 187)
Transformed test set:  (10000, 187)


In [4]:
from sklearn.ensemble import RandomForestClassifier
import time

# Create a new Random Forest classifier
rf_clf_pca = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the reduced dataset and time it
start_time = time.time()
rf_clf_pca.fit(X_train_pca, y_train)
end_time = time.time()

# Print the training time
print("Training time on reduced dataset: {:.2f} seconds".format(end_time - start_time))


Training time on reduced dataset: 53.48 seconds
