In [1]:
import numpy as np
import pandas as pd

In [2]:
df=pd.read_csv('/content/sample_data/Classification_train.csv')
df

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29995,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29996,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29997,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29998,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [3]:
df_np=df.to_numpy()
df_np.shape

(30000, 785)

In [4]:
X,y=df_np[:,1:],df_np[:,0:1]
X.shape,y.shape

((30000, 784), (30000, 1))

In [5]:
np.random.seed(42)
num_samples=30000
num_features=784
num_classes=10

In [6]:
def one_hot_encode(labels,num_classes):
  #Convert integer labels to one-hot encoding
  num_samples=len(labels)
  encoded_labels=np.zeros((num_samples,num_classes))
  for i in range(num_samples):
    encoded_labels[i,labels[i]]=1
  return encoded_labels

In [7]:
y_one_hot=one_hot_encode(y,num_classes)

In [8]:
def train_test_split(X,y,test_ratio=0.2,random_seed=None):
  if random_seed is not None:
    np.random.seed(random_seed)
  #Shuffle indices
  indices=np.arange(X.shape[0])
  np.random.shuffle(indices)
  #Calculate the number of samples for testing
  test_size=int(X.shape[0]*test_ratio)
  #Split the data
  test_indices=indices[:test_size]
  train_indices=indices[test_size:]
  X_train,X_test=X[train_indices],X[test_indices]
  y_train,y_test=y[train_indices],y[test_indices]
  return X_train,X_test,y_train,y_test

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_ratio=0.2, random_seed=42)

In [10]:
y_train.shape

(24000, 10)

In [11]:
#Neural Netwrok parameters
input_size=num_features
hidden_size=128
output_size=num_classes
learning_rate=0.001
epochs=1000

In [12]:
# Initialize weights and biases
weights_hidden = np.random.randn(input_size, hidden_size)
biases_hidden = np.zeros((1, hidden_size))
weights_output = np.random.randn(hidden_size, output_size)
biases_output = np.zeros((1, output_size))

In [13]:
# Log-Softmax Activation Function (Stable version)
def log_softmax(x):
    max_x = np.max(x, axis=1, keepdims=True)
    exp_x = np.exp(x - max_x)
    return x - max_x - np.log(np.sum(exp_x, axis=1, keepdims=True))


In [14]:
# Training the Neural Network
for epoch in range(epochs):
    # Forward pass
    hidden_layer_input = np.dot(X_train, weights_hidden) + biases_hidden
    hidden_layer_output = np.maximum(0, hidden_layer_input)  # ReLU activation function
    output_layer_input = np.dot(hidden_layer_output, weights_output) + biases_output
    log_probs = log_softmax(output_layer_input)

    # Compute loss (cross-entropy)
    loss = -np.sum(y_train * log_probs) / len(X_train)

    # Backward pass (Gradient Descent)
    delta_output = np.exp(log_probs) - y_train
    delta_hidden = np.dot(delta_output, weights_output.T) * (hidden_layer_output > 0)

    # Update weights and biases
    weights_output -= learning_rate * np.dot(hidden_layer_output.T, delta_output) / len(X_train)
    biases_output -= learning_rate * np.sum(delta_output, axis=0, keepdims=True) / len(X_train)
    weights_hidden -= learning_rate * np.dot(X_train.T, delta_hidden) / len(X_train)
    biases_hidden -= learning_rate * np.sum(delta_hidden, axis=0, keepdims=True) / len(X_train)

    # Print the loss for every 10 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch {epoch + 1}/{epochs}, Loss: {loss:.4f}')

Epoch 100/1000, Loss: 484.4736
Epoch 200/1000, Loss: 280.8581
Epoch 300/1000, Loss: 195.4248
Epoch 400/1000, Loss: 148.8902
Epoch 500/1000, Loss: 116.3821
Epoch 600/1000, Loss: 95.4114
Epoch 700/1000, Loss: 79.0757
Epoch 800/1000, Loss: 66.0048
Epoch 900/1000, Loss: 56.8389
Epoch 1000/1000, Loss: 48.7521


In [15]:
log_probs.shape

(24000, 10)

In [16]:
y_train.shape

(24000, 10)

In [17]:
# Testing the Neural Network
hidden_layer_input_test = np.dot(X_test, weights_hidden) + biases_hidden
hidden_layer_output_test = np.maximum(0, hidden_layer_input_test)
output_layer_input_test = np.dot(hidden_layer_output_test, weights_output) + biases_output
log_probs_test = log_softmax(output_layer_input_test)


In [18]:
# Convert predicted probabilities to class labels
predicted_labels_test = np.argmax(log_probs_test, axis=1)

# Convert true labels to class labels
true_labels_test = np.argmax(y_test, axis=1)


In [19]:
#Evaluate accuracy
accuracy=np.mean(true_labels_test==predicted_labels_test)
print(f"Accuracy on Test Data: {accuracy * 100:.2f}%")

Accuracy on Test Data: 93.30%


On the test dataset

In [21]:
pf=pd.read_csv('/content/sample_data/Classification_test.csv')
pf

Unnamed: 0,ID,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,15795,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,860,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,5390,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,11964,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,11284,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,11260,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9996,18563,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9997,634,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9998,10057,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [22]:
pf_np=pf.to_numpy()
pf_np.shape

(10000, 785)

In [23]:
P_test,q_test=pf_np[:,1:],pf_np[:,0:1]
P_test.shape,q_test.shape

((10000, 784), (10000, 1))

In [24]:
# Testing the Neural Network on the actual test data
hidden_layer_input_test_new = np.dot(P_test, weights_hidden) + biases_hidden
hidden_layer_output_test_new = np.maximum(0, hidden_layer_input_test_new)
output_layer_input_test_new = np.dot(hidden_layer_output_test_new, weights_output) + biases_output
log_probs_test_new = log_softmax(output_layer_input_test_new)

In [25]:
# Convert predicted probabilities to class labels
predicted_labels_test_new = np.argmax(log_probs_test_new, axis=1)

# Convert true labels to class labels
true_labels_test_new = np.argmax(q_test, axis=1)

In [26]:
#Evaluate accuracy
accuracy=np.mean(true_labels_test_new==predicted_labels_test_new)
print(f"Accuracy on Test Data: {accuracy * 100:.2f}%")

Accuracy on Test Data: 10.10%
