In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import struct

# --- Helper functions to load ubyte files ---

def load_images(filename):
    with open(filename, 'rb') as f:
        magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
        images = np.frombuffer(f.read(), dtype=np.uint8)
        images = images.reshape(num, rows * cols)
        return images

def load_labels(filename):
    with open(filename, 'rb') as f:
        magic, num = struct.unpack(">II", f.read(8))
        labels = np.frombuffer(f.read(), dtype=np.uint8)
        return labels

# --- Load MNIST dataset ---
print("Loading MNIST from .ubyte files...")

X_train = load_images("train-images-idx3-ubyte")
y_train = load_labels("train-labels-idx1-ubyte")
X_test = load_images("t10k-images-idx3-ubyte")
y_test = load_labels("t10k-labels-idx1-ubyte")

# --- Filter only digits 0 and 1 ---
mask_train = (y_train == 0) | (y_train == 1)
mask_test = (y_test == 0) | (y_test == 1)

X_train, y_train = X_train[mask_train], y_train[mask_train]
X_test, y_test = X_test[mask_test], y_test[mask_test]

# --- Normalize data (optional but recommended) ---
X_train = X_train / 255.0
X_test = X_test / 255.0

# --- Train Logistic Regression model ---
print("Training Logistic Regression model...")
model = LogisticRegression(max_iter=1000, solver='lbfgs')
model.fit(X_train, y_train)

# --- Evaluate model ---
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on test set: {accuracy * 100:.2f}%")

# --- Count how many 0s and 1s predicted ---
num_ones = np.sum(y_pred == 1)
num_zeros = np.sum(y_pred == 0)
print(f"Predicted 1s: {num_ones}")
print(f"Predicted 0s: {num_zeros}")


Loading MNIST from .ubyte files...
Training Logistic Regression model...
Accuracy on test set: 99.95%
Predicted 1s: 1136
Predicted 0s: 979
