In [None]:
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision.datasets import FashionMNIST
from torch.utils.data import DataLoader, random_split

In [None]:
import numpy as np

class DenseLayer:
    def __init__(self, input_size, output_size):
        self.weights = np.random.randn(input_size, output_size) * 0.01
        self.bias = np.zeros((1, output_size))
    
    def forward(self, X):
        self.input = X
        return np.dot(X, self.weights) + self.bias
    
    def backward(self, grad_output, learning_rate):
        grad_input = np.dot(grad_output, self.weights.T)
        grad_weights = np.dot(self.input.T, grad_output)
        grad_bias = np.sum(grad_output, axis=0, keepdims=True)

        self.weights -= learning_rate * grad_weights
        self.bias -= learning_rate * grad_bias
        return grad_input


In [None]:
class BatchNormalization:
    def __init__(self, units, momentum=0.9, epsilon=1e-5):
        self.gamma = np.ones((1, units))
        self.beta = np.zeros((1, units))
        self.momentum = momentum
        self.epsilon = epsilon
    
    def forward(self, X):
        if not hasattr(self, 'running_mean'):
            self.running_mean = np.mean(X, axis=0)
            self.running_var = np.var(X, axis=0)
        
        mean = np.mean(X, axis=0)
        var = np.var(X, axis=0)
        
        self.normalized = (X - mean) / np.sqrt(var + self.epsilon)
        self.out = self.gamma * self.normalized + self.beta

        self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * mean
        self.running_var = self.momentum * self.running_var + (1 - self.momentum) * var

        return self.out

    def backward(self, grad_output, learning_rate):
        grad_gamma = np.sum(grad_output * self.normalized, axis=0)
        grad_beta = np.sum(grad_output, axis=0)
        
        self.gamma -= learning_rate * grad_gamma
        self.beta -= learning_rate * grad_beta

        return grad_output * self.gamma
    

In [None]:
class ReLU:
    def forward(self, X):
        self.input = X
        return np.maximum(0, X)
    
    def backward(self, grad_output):
        grad_input = grad_output.copy()
        grad_input[self.input <= 0] = 0  # Only propagate where input > 0
        return grad_input


In [None]:
class Dropout:
    def __init__(self, rate):
        self.rate = rate
    
    def forward(self, X, training=True):
        if training:
            self.mask = (np.random.rand(*X.shape) > self.rate) / (1 - self.rate)
            return X * self.mask
        return X
    
    def backward(self, grad_output):
        # Only multiply with the dropout mask, no learning rate is needed
        return grad_output * self.mask



In [None]:
class AdamOptimizer:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m = None
        self.v = None
        self.t = 0

    def update(self, weights, gradients):
        if self.m is None:
            self.m = np.zeros_like(weights)
            self.v = np.zeros_like(weights)
        
        self.t += 1
        self.m = self.beta1 * self.m + (1 - self.beta1) * gradients
        self.v = self.beta2 * self.v + (1 - self.beta2) * (gradients ** 2)
        
        m_hat = self.m / (1 - self.beta1 ** self.t)
        v_hat = self.v / (1 - self.beta2 ** self.t)
        
        weights -= self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)


In [None]:
class Softmax:
    def forward(self, X):
        exps = np.exp(X - np.max(X, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)
    
    def backward(self, grad_output):
        # Softmax specific backpropagation details here if needed
        pass


In [None]:

class NeuralNetwork:
    def __init__(self, input_dim, hidden_dims, output_dim):
        self.layers = []
        
        # Input to hidden layers
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            self.layers.append(DenseLayer(prev_dim, hidden_dim))
            self.layers.append(BatchNormalization(hidden_dim))
            self.layers.append(ReLU())
            self.layers.append(Dropout(rate=0.5))
            prev_dim = hidden_dim
        
        # Hidden to output layer
        self.layers.append(DenseLayer(prev_dim, output_dim))
        self.softmax = Softmax()
    
    def forward(self, X):
        for layer in self.layers:
            X = layer.forward(X)
        return self.softmax.forward(X)
    
    def backward(self, grad_output, learning_rate):
        for layer in reversed(self.layers):
            if isinstance(layer, (Dropout, ReLU)):
                grad_output = layer.backward(grad_output)
            else:
                grad_output = layer.backward(grad_output, learning_rate)



In [None]:
def one_hot_encode(labels, num_classes):
    return np.eye(num_classes)[labels]


In [None]:

optimizer = AdamOptimizer(learning_rate=0.001)

def train(model, data_loader, optimizer):
    for batch, (images, labels) in enumerate(data_loader):
        images = images.view(images.size(0), -1).numpy()  # Flatten 28x28 images
        labels = labels.numpy()

        # Forward pass
        predictions = model.forward(images)

        # Assuming `num_classes` is 10 for FashionMNIST
        one_hot_labels = one_hot_encode(labels, num_classes=10)

        
        # Calculate loss (e.g., cross-entropy)
        # Compute the cross-entropy loss
        loss = -np.sum(one_hot_labels * np.log(predictions + 1e-9)) / len(labels)


        # Compute gradients and backpropagate
        grad_output = predictions - one_hot_labels  # Gradient for softmax + cross-entropy
        model.backward(grad_output, optimizer.learning_rate)
        
        print(f"Batch {batch}, Loss: {loss}")

def evaluate(model, data_loader):
    correct, total = 0, 0
    for images, labels in data_loader:
        images = images.view(images.size(0), -1).numpy()
        labels = labels.numpy()

        # Forward pass
        outputs = model.forward(images)
        predictions = np.argmax(outputs, axis=1)
        
        correct += np.sum(predictions == labels)
        total += labels.size
    
    accuracy = correct / total
    print(f"Validation Accuracy: {accuracy * 100:.2f}%")
    return accuracy


**LOAD THE DATASET**

In [None]:


# Define transforms for data normalization and augmentation if desired
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize grayscale channel
])

# Download and load the FashionMNIST dataset
train_data = FashionMNIST(root='data', train=True, transform=transform, download=True)
test_data = FashionMNIST(root='data', train=False, transform=transform, download=True)

# Split training data into train and validation sets
train_size = int(0.8 * len(train_data))
val_size = len(train_data) - train_size
train_dataset, val_dataset = random_split(train_data, [train_size, val_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_data, batch_size=64, shuffle=False)


In [None]:
# Model parameters
input_dim = 28 * 28  # Flattened input size (28x28 pixels)
hidden_dims = [128, 64]  # Example hidden layers
output_dim = 10  # FashionMNIST has 10 classes

# Initialize model
model = NeuralNetwork(input_dim, hidden_dims, output_dim)

# Training loop
epochs = 10
for epoch in range(epochs):
    print(f"Epoch {epoch+1}/{epochs}")
    train(model, train_loader, optimizer)
    accuracy = evaluate(model, val_loader)
