In [14]:
import numpy as np
from torchvision.datasets import FashionMNIST

In [16]:
# Load the dataset
train_set = FashionMNIST(root="./data", train=True, download=True)
test_set = FashionMNIST(root="./data", train=False, download=True)

In [42]:
# Convert the data to NumPy arrays
X_train = np.array(train_set.data).reshape(-1, 784) / 255.0
X_test = np.array(test_set.data).reshape(-1, 784) / 255.0

# One-hot encode the labels
y_train = np.eye(10)[np.array(train_set.targets)]
y_test = np.eye(10)[np.array(test_set.targets)]

# 2c

In [65]:
# Softmax function
def softmax(X):
    exps = np.exp(X - np.max(X, axis=1, keepdims=True))
    return exps / np.sum(exps, axis=1, keepdims=True)

# Gradient descent
def gradient_descent(loss_func, X, y, learning_rate=0.5, epochs=100):
    n, d = X.shape
    k = y.shape[1]
    W = np.random.randn(d, k) / np.sqrt(d)
    for epoch in range(epochs):
        scores = np.dot(X, W)
        softmax_output = softmax(scores)
        grad = -np.dot(X.T, (y - softmax_output)) / n
        W -= learning_rate * grad
        loss = loss_func(W, X, y)
        if (epoch+1)%10==0:
            print(f"Epoch {epoch + 1}, Loss: {loss}")
    return W

# Calculate the loss for L(W)
def calculate_L_loss(W, X, y):
    scores = np.dot(X, W)
    softmax_output = softmax(scores)
    loss = -np.sum(y * np.log(softmax_output))
    return loss

def predict(W, X):
    scores = np.dot(X, W)
    return np.argmax(scores, axis=1)

# Running Gradient Decent

In [66]:
# Run gradient descent for J(W)
W_J = gradient_descent(calculate_J_loss, X_train, y_train)

Epoch 10, Loss: 135.32837449812567
Epoch 20, Loss: 120.43910989043378
Epoch 30, Loss: 49.83535927525998
Epoch 40, Loss: 80.22838849472518
Epoch 50, Loss: 55.47235498585658
Epoch 60, Loss: 80.1572175880842
Epoch 70, Loss: 80.20783650954867
Epoch 80, Loss: 59.17931904961713
Epoch 90, Loss: 70.30831778137976
Epoch 100, Loss: 64.81101974623745


In [67]:
# Run gradient descent for L(W)
W_L = gradient_descent(calculate_L_loss, X_train, y_train)

Epoch 10, Loss: 380083.93463238265
Epoch 20, Loss: 96651.604971244
Epoch 30, Loss: 149879.92377353387
Epoch 40, Loss: 92413.8823929142
Epoch 50, Loss: 78148.35590485296
Epoch 60, Loss: 116359.84786588624
Epoch 70, Loss: 46970.920905421124
Epoch 80, Loss: 84640.215685404
Epoch 90, Loss: 91843.66003399061
Epoch 100, Loss: 112521.54924157326


# Accuracy for J(W)

In [68]:
# Calculate accuracy for J(W)
y_train_pred_J = predict(W_J, X_train)
y_test_pred_J = predict(W_J, X_test)

In [69]:
accuracy_train_J = np.mean(y_train_pred_J == np.argmax(y_train, axis=1))
accuracy_test_J = np.mean(y_test_pred_J == np.argmax(y_test, axis=1))

print(f"Accuracy for J(W) - Training set: {accuracy_train_J * 100:.4f}%")
print(f"Accuracy for J(W) - Test set: {accuracy_test_J * 100:.4f}%")

Accuracy for J(W) - Training set: 74.7267%
Accuracy for J(W) - Test set: 73.6300%


# Accuracy for L(W)

In [70]:
# Calculate accuracy for L(W)
y_train_pred_L = predict(W_L, X_train)
y_test_pred_L = predict(W_L, X_test)

In [71]:
accuracy_train_L = np.mean(y_train_pred_L == np.argmax(y_train, axis=1))
accuracy_test_L = np.mean(y_test_pred_L == np.argmax(y_test, axis=1))

print(f"Accuracy for L(W) - Training set: {accuracy_train_L * 100:.4f}%")
print(f"Accuracy for L(W) - Test set: {accuracy_test_L * 100:.4f}%")

Accuracy for L(W) - Training set: 76.4183%
Accuracy for L(W) - Test set: 75.5800%
