In [None]:
import tensorflow as tf
import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import random
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.utils import class_weight
import struct

In [None]:
file_path = "Kaggle Dataset/train-images.idx3-ubyte"
def read_idx_images(file_path):
    with open(file_path, 'rb') as f:
        _, num, rows, cols = struct.unpack('>IIII', f.read(16))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols)

In [None]:
file_path = "Kaggle Dataset/t10k-images.idx3-ubyte"
def read_idx_images(file_path):
    with open(file_path, 'rb') as f:
        _, num, rows, cols = struct.unpack('>IIII', f.read(16))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(num, rows, cols)

In [None]:
file_path = "Kaggle Dataset/train-labels.idx1-ubyte"
def read_idx_labels(file_path):
    with open(file_path, 'rb') as f:
        _, num = struct.unpack('>II', f.read(8))
        return np.frombuffer(f.read(), dtype=np.uint8)

In [None]:
file_path = "Kaggle Dataset/t10k-labels.idx1-ubyte"
def read_idx_labels(file_path):
    with open(file_path, 'rb') as f:
        _, num = struct.unpack('>II', f.read(8))
        return np.frombuffer(f.read(), dtype=np.uint8)

In [None]:
# Paths to the MNIST files in Kaggle
train_images_path = "Kaggle Dataset/train-images.idx3-ubyte"
train_labels_path = "Kaggle Dataset/train-labels.idx1-ubyte"
test_images_path = "Kaggle Dataset/t10k-images.idx3-ubyte"
test_labels_path = "Kaggle Dataset/t10k-labels.idx1-ubyte"

In [None]:
# Load images and labels
train_images = read_idx_images(train_images_path)
train_labels = read_idx_labels(train_labels_path)
test_images = read_idx_images(test_images_path)
test_labels = read_idx_labels(test_labels_path)

In [None]:
print(f"Train Images Shape: {train_images.shape}")  
print(f"Train Labels Shape: {train_labels.shape}")

In [None]:
# Normalize data
train_images, test_images = train_images / 255.0, test_images / 255.0

In [None]:
def create_model():
    model = keras.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

model = create_model()

In [None]:
history = model.fit(train_images[..., np.newaxis], train_labels, epochs=5, validation_data=(test_images[..., np.newaxis], test_labels))

In [None]:
plt.plot(np.array(history.history['accuracy']) * 100, label='Training Accuracy (%)')
plt.plot(np.array(history.history['val_accuracy']) * 100, label='Validation Accuracy (%)')
plt.ylabel('Accuracy (%)')  
plt.xlabel('Epochs') 
plt.legend()
plt.show()


In [None]:
def create_biased_dataset(train_images, train_labels):
    mask = np.isin(train_labels, [0,1,2,3,4,5,6])  # Keep only these classes
    x_train_biased = train_images[mask]
    y_train_biased = train_labels[mask]
    return x_train_biased, y_train_biased              #removes 7, 8, 9 numbers from the dataset

In [None]:
x_train_biased, y_train_biased = create_biased_dataset(train_images, train_labels)

In [None]:
biased_model = create_model()
biased_history = biased_model.fit(
    x_train_biased[..., np.newaxis], y_train_biased,
    epochs=5, validation_data=(test_images[..., np.newaxis], test_labels)
)

In [None]:
plt.plot(np.array(biased_history.history['accuracy']) * 100, label='Training Accuracy (%)')
plt.plot(np.array(biased_history.history['val_accuracy']) * 100, label='Validation Accuracy (%)')
plt.ylabel('Accuracy (%)')  
plt.xlabel('Epochs') 
plt.legend()
plt.show()

# Strategies to Handle Imbalance
 - Use class weighting

In [None]:
biased_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_biased), y=y_train_biased)
biased_class_weights_dict = {i: biased_weights[i] for i in np.unique(y_train_biased)}

In [None]:
balanced_biased_model = create_model()
balanced_biased_history = balanced_biased_model.fit(
    x_train_biased[..., np.newaxis], y_train_biased,
    epochs=5,
    validation_data=(test_images[..., np.newaxis], test_labels),
    class_weight=biased_class_weights_dict  # Apply class weights for balancing
)

In [None]:
plt.plot(balanced_biased_history.history['accuracy'], label='Balanced Biased Accuracy')
plt.plot(balanced_biased_history.history['val_accuracy'], label='Balanced Biased Val Accuracy')
plt.legend()
plt.show()