In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
import numpy as np

# Load and preprocess the MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype('float32') / 255.0  # Flatten and normalize
x_test = x_test.reshape(-1, 28 * 28).astype('float32') / 255.0
y_train = tf.keras.utils.to_categorical(y_train, 10)  # One-hot encode labels
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Define model parameters
num_features = 28 * 28  # Input size (flattened image)
num_classes = 10        # Number of classes
learning_rate = 0.1     # Gradient descent learning rate
num_epochs = 50         # Number of training epochs
batch_size = 128

# Define weights and bias
weights = tf.Variable(tf.random.normal([num_features, num_classes], stddev=0.01))
bias = tf.Variable(tf.zeros([num_classes]))

# Define the softmax regression model
def multinomialLogisticRegression(x):
    logits = tf.matmul(x, weights) + bias
    return tf.nn.softmax(logits), logits

# Define cross-entropy loss
def computeLoss(logits, labels):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))

# Define the gradient descent optimizer
optimizer = tf.optimizers.SGD(learning_rate)

# Training loop
def trainStep(x, y):
    with tf.GradientTape() as tape:
        _, logits = multinomialLogisticRegression(x)
        loss = computeLoss(logits, y)
    gradients = tape.gradient(loss, [weights, bias])
    optimizer.apply_gradients(zip(gradients, [weights, bias]))
    return loss

# Training process
for epoch in range(num_epochs):
    # Shuffle data
    indices = np.random.permutation(x_train.shape[0])
    x_train, y_train = x_train[indices], y_train[indices]
    
    # Mini-batch training
    for i in range(0, x_train.shape[0], batch_size):
        x_batch = x_train[i:i + batch_size]
        y_batch = y_train[i:i + batch_size]
        loss = trainStep(x_batch, y_batch)
    
    print(f"Epoch {epoch + 1}, Loss: {loss.numpy():.4f}")

# Testing
_, logits = multinomialLogisticRegression(x_test)
predictions = tf.argmax(logits, axis=1)
accuracy = tf.reduce_mean(tf.cast(tf.equal(predictions, tf.argmax(y_test, axis=1)), tf.float32))
print(f"Test Accuracy: {accuracy.numpy() * 100:.2f}%")

2024-11-30 16:01:12.576026: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1732993272.635521  145704 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1732993272.649805  145704 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-30 16:01:12.775407: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-11-30 16:01:16.470718: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL

Epoch 1, Loss: 0.4346
Epoch 2, Loss: 0.3632
Epoch 3, Loss: 0.3944
Epoch 4, Loss: 0.3344
Epoch 5, Loss: 0.3177
Epoch 6, Loss: 0.3213
Epoch 7, Loss: 0.4139
Epoch 8, Loss: 0.3866
Epoch 9, Loss: 0.2597
Epoch 10, Loss: 0.2056
Epoch 11, Loss: 0.3927
Epoch 12, Loss: 0.3028
Epoch 13, Loss: 0.2031
Epoch 14, Loss: 0.2321
Epoch 15, Loss: 0.3409
Epoch 16, Loss: 0.2421
Epoch 17, Loss: 0.2282
Epoch 18, Loss: 0.1438
Epoch 19, Loss: 0.2458
Epoch 20, Loss: 0.4502
Epoch 21, Loss: 0.3665
Epoch 22, Loss: 0.2635
Epoch 23, Loss: 0.2001
Epoch 24, Loss: 0.4373
Epoch 25, Loss: 0.2229
Epoch 26, Loss: 0.4342
Epoch 27, Loss: 0.3146
Epoch 28, Loss: 0.0944
Epoch 29, Loss: 0.1853
Epoch 30, Loss: 0.2379
Epoch 31, Loss: 0.2794
Epoch 32, Loss: 0.4180
Epoch 33, Loss: 0.1355
Epoch 34, Loss: 0.2701
Epoch 35, Loss: 0.1833
Epoch 36, Loss: 0.2290
Epoch 37, Loss: 0.1999
Epoch 38, Loss: 0.2637
Epoch 39, Loss: 0.2101
Epoch 40, Loss: 0.3506
Epoch 41, Loss: 0.2325
Epoch 42, Loss: 0.1742
Epoch 43, Loss: 0.2879
Epoch 44, Loss: 0.17

In [None]:
import numpy as np
import tensorflow as tf

# Load the MNIST dataset
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Reshape and normalize pixel values
x_train = x_train.reshape(x_train.shape[0], -1).astype(np.float32) / 255.0
x_test = x_test.reshape(x_test.shape[0], -1).astype(np.float32) / 255.0

# One-hot encoding for labels
y_train = np.eye(10, dtype=int)[y_train]
y_test = np.eye(10, dtype=int)[y_test]

# Define helper functions
def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))  # Stability improvement
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def lossFunction(x, y):
    s = softmax(x)
    return -np.sum(y * np.log(s)) / x.shape[0]  # Average loss

def gradient(x, wTx, y):
    s = softmax(wTx)
    return (x.T @ (s - y)) / x.shape[0]

# Initialize parameters
K = y_train.shape[1]
n = x_train.shape[1]
w = np.random.random_sample((n, K))

# Training hyperparameters
gradientNormStop = 1e-3
maxSteps = 100
learningRate = 1
stochasticity = 0.1

iter = 1
gradientNorm = np.inf

# Training loop
while iter < maxSteps and gradientNorm > gradientNormStop:
    
    wTx = x_train @ w
    grad = gradient(x_train, wTx, y_train)
    gradientNorm = np.linalg.norm(grad)
    w -= learningRate * (grad + stochasticity * gradientNorm * np.random.random_sample(grad.shape))  # Update weights
    loss = lossFunction(wTx, y_train)

    print(f"Iter {iter}: Loss = {loss:.4f}, Gradient Norm = {gradientNorm:.4f}")
    iter += 1


Iter 1: Loss = 4.3034, Gradient Norm = 2.4392
Iter 2: Loss = 5.4886, Gradient Norm = 2.0397
Iter 3: Loss = 5.5762, Gradient Norm = 2.4415
Iter 4: Loss = 6.1143, Gradient Norm = 1.9909
Iter 5: Loss = 4.7195, Gradient Norm = 1.6399
Iter 6: Loss = 3.7657, Gradient Norm = 0.8118
Iter 7: Loss = 3.5128, Gradient Norm = 0.7137
Iter 8: Loss = 3.2963, Gradient Norm = 0.7274
Iter 9: Loss = 3.1387, Gradient Norm = 0.7146
Iter 10: Loss = 2.8514, Gradient Norm = 0.5666
Iter 11: Loss = 2.6831, Gradient Norm = 0.5078
Iter 12: Loss = 2.5576, Gradient Norm = 0.5127
Iter 13: Loss = 2.4300, Gradient Norm = 0.4369
Iter 14: Loss = 2.3100, Gradient Norm = 0.4337
Iter 15: Loss = 2.2064, Gradient Norm = 0.3634
Iter 16: Loss = 2.0934, Gradient Norm = 0.3381
Iter 17: Loss = 2.0188, Gradient Norm = 0.3088
Iter 18: Loss = 1.9339, Gradient Norm = 0.3008
Iter 19: Loss = 1.8489, Gradient Norm = 0.2960
Iter 20: Loss = 1.7939, Gradient Norm = 0.2677
Iter 21: Loss = 1.7433, Gradient Norm = 0.2570
Iter 22: Loss = 1.7027

In [14]:
def predict(x, w):
    probabilities = softmax(x @ w)
    return np.argmax(probabilities, axis=1)  # Return class with highest probability

def accuracy(predictions, labels):
    return np.mean(predictions == labels) * 100

# Test the accuracy
test_predictions = predict(x_test, w)
true_labels = np.argmax(y_test, axis=1)  # Convert one-hot encoding back to class labels
test_accuracy = accuracy(test_predictions, true_labels)

print(f"Test Accuracy: {test_accuracy:.2f}%")

Test Accuracy: 81.97%


In [9]:
a = np.array([1,2,3])

a/3

array([0.33333333, 0.66666667, 1.        ])