In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

# Define transformations to normalize and convert data to tensors
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Check the shape of the data
print(f"Train Dataset Size: {len(train_dataset)}")
print(f"Test Dataset Size: {len(test_dataset)}")


In [None]:
from torch.utils.data import Subset
train_subset = Subset(train_dataset, range(100))  # Use first 100 samples
show_few_samples(train_subset, num_samples=3)


In [None]:
import matplotlib.pyplot as plt

# Function to show the first 5 samples from the dataset
def show_first_five_samples(dataset):
    num_samples = min(5, len(dataset))  # Use 5 or fewer if the dataset has less than 5 samples
    fig, axes = plt.subplots(1, num_samples, figsize=(15, 3))
    
    for i in range(num_samples):
        img, label = dataset[i]
        
        # Convert the tensor image to a NumPy array if necessary
        if hasattr(img, 'numpy'):  
            img = img.numpy()
        
        axes[i].imshow(img.squeeze(), cmap="gray")
        axes[i].set_title(f"Label: {label}")
        axes[i].axis("off")

    plt.tight_layout()
    plt.show()

# Show the first 5 samples from the train_dataset
show_first_five_samples(train_dataset)


In [None]:
# Visualize a few samples from the dataset
def show_samples(dataset, title):
    fig, axes = plt.subplots(1, 4, figsize=(15, 2))
    for i, ax in enumerate(axes):
        img, label = dataset[i]
        ax.imshow(img.squeeze(), cmap="gray")
        ax.set_title(f"Label: {label}")
        ax.axis('off')
    plt.suptitle(title)
    plt.show()

show_samples(train_dataset, "Sample Images from MNIST Dataset")


In [None]:
# Plot class distribution in the dataset
labels = [label for _, label in train_dataset]
unique, counts = np.unique(labels, return_counts=True)

fig = px.bar(x=unique, y=counts, labels={'x': 'Digits', 'y': 'Count'},
             title="Class Distribution in MNIST Dataset")
fig.show()


In [None]:
# Convert dataset into flattened arrays for unsupervised learning
def flatten_dataset(dataset):
    images, labels = [], []
    for img, label in dataset:
        images.append(img.view(-1).numpy())  # Flatten image
        labels.append(label)
    return np.array(images), np.array(labels)

train_images, train_labels = flatten_dataset(train_dataset)
test_images, test_labels = flatten_dataset(test_dataset)

print(f"Shape of Flattened Train Images: {train_images.shape}")
print(f"Shape of Flattened Test Images: {test_images.shape}")
