In [8]:
import numpy as np

# Function to generate data with noise
def generate_data(noise_factor=0.2):
    # Load MNIST dataset
    from tensorflow.keras.datasets import mnist

    # Automatically downloads MNIST the first time
    (X_train, y_train), (X_test, y_test) = mnist.load_data()

    # Flatten 28x28 images to 784 features
    X_train_flat = X_train.reshape(-1, 784)
    X_test_flat = X_test.reshape(-1, 784)

    # 🔧 修复：同时截取训练数据和标签到相同数量
    X_train_flat = X_train_flat[:1000]
    X_test_flat = X_test_flat[:1000]
    y_train = y_train[:1000]  # ✅ 添加：同样截取标签
    y_test = y_test[:1000]    # ✅ 添加：同样截取测试标签

    # Add noise to the data
    X_train_noisy = X_train_flat + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=X_train_flat.shape)
    # Add noise to the data
    X_test_noisy = X_test_flat + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=X_test_flat.shape)

    # Clip noisy data to ensure pixel values are in a valid range (0 to 255)
    X_train_noisy = np.clip(X_train_noisy, 0, 255)  # 🔧 修复：应该是 noisy 数据
    X_test_noisy = np.clip(X_test_noisy, 0, 255)    # 🔧 修复：应该是 noisy 数据

    # 🔍 调试信息：打印数据形状
    print(f"✅ 训练数据形状: {X_train_noisy.shape}")
    print(f"✅ 训练标签形状: {y_train.shape}")
    print(f"✅ 测试数据形状: {X_test_noisy.shape}")
    print(f"✅ 测试标签形状: {y_test.shape}")

    return X_train_noisy, X_test_noisy, y_train, y_test


X_train, X_valid, y_train, y_valid = generate_data()

✅ 训练数据形状: (1000, 784)
✅ 训练标签形状: (1000,)
✅ 测试数据形状: (1000, 784)
✅ 测试标签形状: (1000,)


In [13]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score,recall_score, accuracy_score
# Train logistic regression classifier
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train, y_train)
# Predict the test set
y_pred = log_reg.predict(X_valid)
# Calculate accuracy
accuracy = accuracy_score(y_valid, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 82.00%


In [19]:
import numpy as np
from sklearn.decomposition import PCA
# At home work on this 
n_components = 150 # Keeping n- componenst 


pca = PCA(n_components=n_components) # going to do principal component 
X_train_pca = pca.fit_transform(X_train)
X_valid_pca = pca.transform(X_valid)

# Train logistic regression classifier
log_reg = LogisticRegression(max_iter=1000, random_state=42)
log_reg.fit(X_train_pca, y_train)

# Predict the test set
y_pred = log_reg.predict(X_valid_pca)

# Calculate accuracy
accuracy = accuracy_score(y_valid, y_pred)
print(f"Number of Components: {n_components}, Test Accuracy: {accuracy * 100:.2f}%")

Number of Components: 150, Test Accuracy: 81.90%
