<a href="https://colab.research.google.com/github/ahzaidy/Programs/blob/main/CPSC_5410_HW2_P4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Load dataset
file_path = "https://raw.githubusercontent.com/ahzaidy/Programs/refs/heads/main/MNIST_digits0-1-2.csv"  # Update with the correct file path
df = pd.read_csv(file_path)
x = df.iloc[:, :-1]  # Features (pixel values)
y = df.iloc[:, -1]   # Labels (digits 0, 1, 2)

# Normalize pixel values
x = x / 255.0

# Split data (80% training, 20% testing)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
d = x_train.shape[1]  # Input dimension

# Split validation set from training data
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

def build_autoencoder(layers_dims):
    input_layer = keras.Input(shape=(d,))
    x = input_layer
    for dim in layers_dims[:-1]:
        x = layers.Dense(dim, activation='tanh')(x)
    x = layers.Dense(layers_dims[-1], activation='linear', name='bottleneck')(x)
    for dim in reversed(layers_dims[:-1]):
        x = layers.Dense(dim, activation='tanh')(x)
    output_layer = layers.Dense(d, activation='sigmoid')(x)

    autoencoder = keras.Model(input_layer, output_layer)
    encoder = keras.Model(input_layer, autoencoder.get_layer('bottleneck').output)

    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder, encoder

architectures = [
    [50, 2],
    [100, 50, 2],
    [200, 100, 50, 2]
]

k_values = [1, 3, 5, 7]  # Different k values for k-NN
results = []

for arch in architectures:
    for k in k_values:
        acc_list = []
        for _ in range(10):
            autoencoder, encoder = build_autoencoder(arch)
            autoencoder.fit(x_train, x_train, epochs=10, batch_size=256, validation_data=(x_valid, x_valid), verbose=0)

            train_codes = pd.DataFrame(encoder.predict(x_train))
            test_codes = pd.DataFrame(encoder.predict(x_test))

            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(train_codes, y_train)
            y_pred = knn.predict(test_codes)
            acc_list.append(accuracy_score(y_test, y_pred))

        results.append((arch, k, np.mean(acc_list), np.std(acc_list)))

# Print results
print("\nResults:")
print("Architecture | k | Mean Accuracy | Std Deviation")
for arch, k, mean_acc, std_acc in results:
    print(f"{arch} | {k} | {mean_acc:.4f} | {std_acc:.4f}")

print("\nLarger networks may capture better representations but could suffer from overfitting.")


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step




[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15