In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
def one_hot_encode(labels, num_classes):
    num_samples = len(labels)
    one_hot_labels = np.zeros((num_samples, num_classes))
    one_hot_labels[np.arange(num_samples), labels] = 1
    return one_hot_labels

In [3]:
train_data=pd.read_csv('Classification_train.csv')
test_data=pd.read_csv('Classification_test.csv')
train_labels=train_data['label'].values 
num_classes = np.max(train_labels) + 1
X_train = train_data.drop('label', axis=1).values
X_test = test_data.drop('ID', axis=1).values
train_labels = one_hot_encode(train_labels, num_classes)

In [4]:
y_train = train_data.values
y_train=y_train[:, 1:]
X_train = X_train.astype('float32') / 255.0
y_train = y_train.astype('float32') / 255.0
y_test=test_data.values
y_test=y_test[:, 1:]
X_test = X_test.astype('float32') / 255.0
y_test = y_test.astype('float32') / 255.0

In [5]:
np.random.seed(42)

In [6]:
input_size = 28*28
hidden_size = 512  
output_size = 10

In [7]:
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights1 = np.random.randn(input_size, hidden_size) * 0.01
        self.bias1 = np.zeros((1, hidden_size))
        self.weights2 = np.random.randn(hidden_size, output_size) * 0.01
        self.bias2 = np.zeros((1, output_size))

    def forward(self, X):
        self.z1 = np.dot(X, self.weights1) + self.bias1
        self.a1 = self.relu(self.z1)
        self.z2 = np.dot(self.a1, self.weights2) + self.bias2
        self.a2 = self.softmax(self.z2)
        return self.a2

    def backward(self, X, y, learning_rate=0.01):
        m = X.shape[0]

        dz2 = self.a2 - y
        dw2 = np.dot(self.a1.T, dz2) / m
        db2 = np.sum(dz2, axis=0, keepdims=True) / m

        # Hidden layer
        dz1 = np.dot(dz2, self.weights2.T) * self.relu_derivative(self.a1)
        dw1 = np.dot(X.T, dz1) / m
        db1 = np.sum(dz1, axis=0, keepdims=True) / m

        # Update weights and biases
        self.weights2 -= learning_rate * dw2
        self.bias2 -= learning_rate * db2
        self.weights1 -= learning_rate * dw1
        self.bias1 -= learning_rate * db1

    def relu(self, x):
        return np.maximum(0, x)

    def relu_derivative(self, x):
        return np.where(x > 0, 1, 0)

    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)

    def train(self, X, y, epochs, learning_rate=0.01, batch_size=64):
        for epoch in tqdm(range(epochs)):
            permutation = np.random.permutation(X.shape[0])
            X_shuffled = X[permutation]
            y_shuffled = y[permutation]

            for i in range(0, X.shape[0], batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]

                predictions = self.forward(X_batch)

                loss = -np.sum(y_batch * np.log(predictions)) / X_batch.shape[0]

                self.backward(X_batch, y_batch, learning_rate)

            print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss}")

In [8]:
nn = NeuralNetwork(input_size, hidden_size, output_size)
nn.train(X_train, train_labels, epochs=20, learning_rate=0.01, batch_size=128)


  5%|▌         | 1/20 [00:02<00:44,  2.33s/it]

Epoch 1/20, Loss: 2.2062830155669344


 10%|█         | 2/20 [00:07<01:07,  3.74s/it]

Epoch 2/20, Loss: 1.8930300588006397


 15%|█▌        | 3/20 [00:12<01:18,  4.62s/it]

Epoch 3/20, Loss: 1.2209565495082229


 20%|██        | 4/20 [00:19<01:27,  5.49s/it]

Epoch 4/20, Loss: 0.7968099233852882


 25%|██▌       | 5/20 [00:25<01:24,  5.61s/it]

Epoch 5/20, Loss: 0.5194162718280991


 30%|███       | 6/20 [00:31<01:19,  5.71s/it]

Epoch 6/20, Loss: 0.4615185031819329


 35%|███▌      | 7/20 [00:37<01:16,  5.89s/it]

Epoch 7/20, Loss: 0.2727890844156506


 40%|████      | 8/20 [00:43<01:12,  6.01s/it]

Epoch 8/20, Loss: 0.4542900096831896


 45%|████▌     | 9/20 [00:50<01:07,  6.16s/it]

Epoch 9/20, Loss: 0.3794984562707191


 50%|█████     | 10/20 [00:56<01:03,  6.32s/it]

Epoch 10/20, Loss: 0.22615191000095938


 55%|█████▌    | 11/20 [01:03<00:57,  6.40s/it]

Epoch 11/20, Loss: 0.2073718670593004


 60%|██████    | 12/20 [01:09<00:48,  6.12s/it]

Epoch 12/20, Loss: 0.28821856713049265


 65%|██████▌   | 13/20 [01:13<00:40,  5.76s/it]

Epoch 13/20, Loss: 0.31741660705002156


 70%|███████   | 14/20 [01:19<00:34,  5.71s/it]

Epoch 14/20, Loss: 0.1587957687582425


 75%|███████▌  | 15/20 [01:25<00:29,  5.90s/it]

Epoch 15/20, Loss: 0.30204670384532933


 80%|████████  | 16/20 [01:32<00:24,  6.03s/it]

Epoch 16/20, Loss: 0.1277557503452361


 85%|████████▌ | 17/20 [01:38<00:17,  5.96s/it]

Epoch 17/20, Loss: 0.10895151297512222


 90%|█████████ | 18/20 [01:44<00:12,  6.11s/it]

Epoch 18/20, Loss: 0.35371557525593356


 95%|█████████▌| 19/20 [01:50<00:06,  6.10s/it]

Epoch 19/20, Loss: 0.080090751331112


100%|██████████| 20/20 [01:56<00:00,  5.82s/it]

Epoch 20/20, Loss: 0.3651223237070755





In [9]:
train_labels = (train_data['label'].values)
num_classes = np.max(train_labels) + 1
train_labels = one_hot_encode(train_labels, num_classes)
y_pred = nn.forward(y_train)
test_accuracy = np.mean(np.argmax(y_pred, axis=1) == np.argmax(train_labels, axis=1))
print(f'Test accuracy: {test_accuracy}')

Test accuracy: 0.9531


In [10]:
y_pred

array([[8.44867534e-01, 6.91078923e-02, 5.75889759e-03, ...,
        7.27720883e-03, 9.23134426e-03, 1.28987911e-02],
       [2.85939283e-03, 7.17240442e-04, 8.62213498e-04, ...,
        6.84474772e-07, 9.93250411e-01, 1.54660613e-05],
       [4.88257590e-03, 9.95002848e-01, 8.49401591e-07, ...,
        2.61188427e-07, 7.05173080e-05, 2.26558203e-05],
       ...,
       [1.72390326e-03, 2.95264363e-04, 6.71731252e-05, ...,
        9.05975116e-05, 6.26925911e-04, 9.90084976e-01],
       [4.51356320e-06, 3.33140693e-05, 4.70984662e-05, ...,
        4.58794176e-03, 7.80904095e-07, 4.18338912e-06],
       [5.32222932e-04, 1.36594395e-03, 3.09055437e-02, ...,
        7.84161250e-03, 2.46289583e-02, 5.64269980e-01]])

In [11]:
y_pred=np.argmax(y_pred,axis=1)

array([0, 8, 1, ..., 9, 3, 9], dtype=int64)

In [29]:
y_train=train_data.iloc[:,0].to_numpy()
y_train

array([0, 8, 1, ..., 9, 3, 9], dtype=int64)

In [26]:
print(y_pred)
print(y_train)

[0 8 1 ... 9 3 5]
[0 8 1 ... 9 3 9]


In [27]:
def SSM(y_train):
    return np.sum((y_train-np.mean(y_train))**2)
def SSR(y_train,y_pred):
    return np.sum((y_train-y_pred)**2)
    
SSr=SSR(y_train,y_pred)
SSm=SSM(y_train)
r2_score=1-(SSr/SSm)
r2_score

0.9351158539499049

In [14]:
test_labels = (test_data['ID'].values)
num_classes = np.max(test_labels) + 1
test_labels = one_hot_encode(test_labels, num_classes)
y_pred = nn.forward(y_test)

In [15]:
y_pred

array([[9.92310441e-01, 9.92676258e-04, 6.10070289e-04, ...,
        1.85627684e-04, 5.77501292e-03, 6.56276841e-05],
       [3.78508816e-06, 5.53091535e-08, 4.99668720e-06, ...,
        5.36706164e-05, 4.74344274e-04, 5.01213774e-04],
       [3.77522700e-05, 2.16803984e-04, 4.12618133e-02, ...,
        1.09375092e-06, 3.02245389e-02, 2.26685260e-03],
       ...,
       [9.52112538e-01, 9.86649311e-03, 9.63736341e-04, ...,
        4.47203125e-04, 2.66346009e-02, 9.60900849e-03],
       [2.60946620e-02, 9.63299863e-04, 8.28781927e-04, ...,
        1.90639721e-05, 9.67992769e-01, 9.97911473e-04],
       [8.65414008e-04, 2.08255669e-02, 3.68892605e-01, ...,
        4.43186364e-04, 1.11603670e-03, 9.20176814e-04]])

In [17]:
np.argmax(y_pred,axis=1)

array([0, 4, 5, ..., 0, 8, 5], dtype=int64)

In [30]:
y_test=test_data.iloc[:,0].to_numpy()
y_test

array([15795,   860,  5390, ...,   634, 10057, 13345], dtype=int64)

In [28]:
def SSM(y_train):
    return np.sum((y_train-np.mean(y_train))**2)
def SSR(y_train,y_pred):
    return np.sum((y_train-y_pred)**2)
    
SSr=SSR(y_test,y_pred1)
SSm=SSM(y_test)
r2_score=1-(SSr/SSm)
r2_score

-2.95044057292713