In [5]:
import torchvision.datasets as datasets
import os

# 下载 MNIST 数据集
def download_mnist():
    # 下载训练集
    train_dataset = datasets.MNIST(root='./data', train=True, download=True)

    # 下载测试集
    test_dataset = datasets.MNIST(root='./data', train=False, download=True)



    # 验证文件是否存在
    train_images_path = "./data/MNIST/raw/train-images-idx3-ubyte"
    train_labels_path = "./data/MNIST/raw/train-labels-idx1-ubyte"
    test_images_path = "./data/MNIST/raw/t10k-images-idx3-ubyte"
    test_labels_path = "./data/MNIST/raw/t10k-labels-idx1-ubyte"

    
download_mnist()

MNIST 数据集下载完成
训练集图像文件存在: True
训练集标签文件存在: True
测试集图像文件存在: True
测试集标签文件存在: True


In [6]:
import torch
import numpy as np
import time
import struct

def load_mnist_images(image_path, label_path):
    # Read images
    with open(image_path, 'rb') as f:
        magic, num, rows, cols = struct.unpack('>IIII', f.read(16))
        images = np.fromfile(f, dtype=np.uint8).reshape(num, rows*cols)
        images = images.astype(np.float32) / 255.0  # Normalize to [0,1]

    # Read labels
    with open(label_path, 'rb') as f:
        magic, num = struct.unpack('>II', f.read(8))
        labels = np.fromfile(f, dtype=np.uint8)

    return images, labels

class KNNClassifier:
    def __init__(self, k=10):
        self.k = k
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def fit(self, X, y):
        self.X_train = torch.tensor(X, device=self.device)
        self.y_train = torch.tensor(y, device=self.device)

    def predict(self, X):
        X = torch.tensor(X, device=self.device)
        num_test = X.size(0)
        num_train = self.X_train.size(0)

        # Compute distances
        X_square = torch.sum(X**2, dim=1, keepdim=True)
        train_square = torch.sum(self.X_train**2, dim=1)
        distances = X_square + train_square - 2 * torch.mm(X, self.X_train.t())
        distances = torch.sqrt(distances)

        # Find k nearest neighbors
        _, indices = torch.topk(distances, k=self.k, dim=1, largest=False)

        # Get labels of k nearest neighbors
        k_nearest_labels = self.y_train[indices]

        # Majority vote
        predictions = torch.mode(k_nearest_labels, dim=1)[0]

        return predictions.cpu().numpy()

def main():
    # Load local MNIST data
    print("Loading MNIST dataset...")
    try:
        X_train, y_train = load_mnist_images(
            "./data/MNIST/raw/train-images-idx3-ubyte",
            "./data/MNIST/raw/train-labels-idx1-ubyte"
        )
        X_test, y_test = load_mnist_images(
            "./data/MNIST/raw/t10k-images-idx3-ubyte",
            "./data/MNIST/raw/t10k-labels-idx1-ubyte"
        )
    except Exception as e:
        print(f"Error loading MNIST data: {e}")
        return

    print(f"Successfully loaded {len(X_train)} training samples and {len(X_test)} test samples.")

    # Initialize classifier
    knn = KNNClassifier(k=10)

    # Start timing
    total_start_time = time.time()

    # Train timing
    train_start = time.time()
    print("Fitting the model...")
    knn.fit(X_train, y_train)
    train_time = time.time() - train_start

    # Test timing
    test_start = time.time()
    print("Making predictions...")
    predictions = knn.predict(X_test)
    test_time = time.time() - test_start

    # Calculate total time
    total_time = time.time() - total_start_time

    # Calculate accuracy
    accuracy = np.mean(predictions == y_test) * 100
    correct_predictions = np.sum(predictions == y_test)

    # Print results
    print("\nFinal Results:")
    print(f"Total training samples: {len(X_train)}")
    print(f"Total test samples: {len(X_test)}")
    print(f"Correct predictions: {correct_predictions}")
    print(f"Accuracy: {accuracy:.2f}%")
    print("\nTiming Information:")
    print(f"Training time: {train_time:.2f} seconds")
    print(f"Testing time: {test_time:.2f} seconds")
    print(f"Total execution time: {total_time:.2f} seconds")

if __name__ == "__main__":
    main()

Loading MNIST dataset...
Successfully loaded 60000 training samples and 10000 test samples.
Fitting the model...
Making predictions...

Final Results:
Total training samples: 60000
Total test samples: 10000
Correct predictions: 9671
Accuracy: 96.71%

Timing Information:
Training time: 0.33 seconds
Testing time: 0.86 seconds
Total execution time: 1.19 seconds
