In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

train_df = pd.read_csv('/Users/aadya.mukherjee/Documents/git my/mrm_nn/fashion-mnist_train.csv')
test_df = pd.read_csv("/Users/aadya.mukherjee/Documents/git my/mrm_nn/fashion-mnist_test.csv")

# Data Preprocessing
def load_and_preprocess_data(train_df, test_df):
    # Extract features and labels
    X_train = train_df.iloc[:, 1:].values / 255.0  # Normalize pixel values
    y_train = train_df.iloc[:, 0].values
    X_test = test_df.iloc[:, 1:].values / 255.0
    y_test = test_df.iloc[:, 0].values

    return X_train, y_train, X_test, y_test

# Activation Functions
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

# Loss Function
def cross_entropy_loss(y_true, y_pred):
    m = y_true.shape[0]
    log_likelihood = -np.log(y_pred[range(m), y_true])
    return np.sum(log_likelihood) / m

def cross_entropy_derivative(y_true, y_pred):
    m = y_true.shape[0]
    grad = y_pred
    grad[range(m), y_true] -= 1
    grad /= m
    return grad

# Neural Network Class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.weights1 = np.random.randn(input_size, hidden_size) * 0.01
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size) * 0.01
        self.bias2 = np.zeros((1, output_size))
        self.learning_rate = learning_rate
        self.loss_history = []

    def forward(self, X):
        self.Z1 = np.dot(X, self.weights1) + self.bias1
        self.A1 = relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.weights2) + self.bias2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, y_true, y_pred):
        m = X.shape[0]

        # Output layer gradients
        dZ2 = cross_entropy_derivative(y_true, y_pred)
        dW2 = np.dot(self.A1.T, dZ2)
        db2 = np.sum(dZ2, axis=0, keepdims=True)

        # Hidden layer gradients
        dA1 = np.dot(dZ2, self.weights2.T)
        dZ1 = dA1 * relu_derivative(self.Z1)
        dW1 = np.dot(X.T, dZ1)
        db1 = np.sum(dZ1, axis=0, keepdims=True)

        # Update weights and biases
        self.weights1 -= self.learning_rate * dW1
        self.bias1 -= self.learning_rate * db1
        self.weights2 -= self.learning_rate * dW2
        self.bias2 -= self.learning_rate * db2

    def train(self, X, y, epochs=10):
        for epoch in range(epochs):
            y_pred = self.forward(X)
            loss = cross_entropy_loss(y, y_pred)
            self.loss_history.append(loss)
            self.backward(X, y, y_pred)
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}")

    def predict(self, X):
        y_pred = self.forward(X)
        return np.argmax(y_pred, axis=1)

# Visualization function
def plot_training_loss(loss_history):
    plt.figure(figsize=(10, 6))
    plt.plot(loss_history, label="Training Loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Training Loss Over Epochs")
    plt.legend()
    plt.grid()
    plt.show()

# Load and preprocess the data
train_path = "fashion-mnist_train.csv"
test_path = "fashion-mnist_test.csv"
X_train, y_train, X_test, y_test = load_and_preprocess_data(train_path, test_path)

# Define and train the neural network
input_size = 784  # 28x28 pixels
hidden_size = 128
output_size = 10  # 10 classes
learning_rate = 0.1

nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate)
nn.train(X_train, y_train, epochs=20)

# Plot training loss
plot_training_loss(nn.loss_history)

# Evaluate the model
predictions = nn.predict(X_test)
accuracy = np.mean(predictions == y_test) * 100
print(f"Test Accuracy: {accuracy:.2f}%")
