In [3]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import time

mnist = fetch_openml('mnist_784', parser='auto')


X_train, X_test, y_train, y_test = train_test_split(mnist.data, mnist.target, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

pca = PCA(n_components=0.95)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

start_time = time.time()
logisticRegr = LogisticRegression(solver='lbfgs', max_iter=1000)
logisticRegr.fit(X_train_scaled, y_train)
y_pred = logisticRegr.predict(X_test_scaled)
accuracy_no_pca = accuracy_score(y_test, y_pred)
time_no_pca = time.time() - start_time

start_time = time.time()
logisticRegr_pca = LogisticRegression(solver='lbfgs', max_iter=1000)
logisticRegr_pca.fit(X_train_pca, y_train)
y_pred_pca = logisticRegr_pca.predict(X_test_pca)
accuracy_pca = accuracy_score(y_test, y_pred_pca)
time_pca = time.time() - start_time

print("Accuracy without PCA: {:.4f}".format(accuracy_no_pca))
print("Time without PCA: {:.2f} seconds".format(time_no_pca))
print("Accuracy with PCA: {:.4f}".format(accuracy_pca))
print("Time with PCA: {:.2f} seconds".format(time_pca))


Accuracy without PCA: 0.9164
Time without PCA: 249.77 seconds
Accuracy with PCA: 0.9216
Time with PCA: 103.27 seconds
