In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import os
import numpy as np

In [9]:
# Load MNIST and CIFAR-10 datasets
transform_mnist = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.Grayscale(3),  # Convert to RGB
    transforms.ToTensor()
])

transform_cifar = transforms.Compose([
    transforms.ToTensor()
])

# Load MNIST
mnist_train = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_mnist)
mnist_loader = torch.utils.data.DataLoader(mnist_train, batch_size=1, shuffle=False)

# Load CIFAR-10
cifar_train = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_cifar)
cifar_loader = torch.utils.data.DataLoader(cifar_train, batch_size=1, shuffle=False)

# Ensure the number of samples is consistent across both datasets
n_samples = min(len(mnist_train), len(cifar_train))

# Directory to save the generated dataset
output_dir = 'overlayed_mnist_cifar10_v2'
os.makedirs(output_dir, exist_ok=True)

# Overlay images and save them with their labels
for i, ((mnist_img, mnist_label), (cifar_img, cifar_label)) in enumerate(zip(mnist_loader, cifar_loader)):
    if i >= n_samples:
        break
    
    # Convert CIFAR image to numpy array
    cifar_img = cifar_img.squeeze().permute(1, 2, 0).numpy()

    # Convert MNIST image to numpy array
    mnist_img = mnist_img.squeeze().permute(1, 2, 0).numpy()

    # Overlay the images
    combined_image = np.clip(mnist_img + cifar_img , 0, 1)

    # Convert back to image and save
    combined_image_pil = Image.fromarray((combined_image * 255).astype(np.uint8))
    combined_image_pil.save(os.path.join(output_dir, f'image_{i}.png'))

    # Save the labels
    with open(os.path.join(output_dir, f'label_{i}.txt'), 'w') as f:
        f.write(f'MNIST Label: {mnist_label.item()}\n')
        f.write(f'CIFAR-10 Label: {cifar_label.item()}\n')

print(f'Generated {n_samples} overlayed images with labels in the "{output_dir}" directory.')

Files already downloaded and verified
Generated 50000 overlayed images with labels in the "overlayed_mnist_cifar10_v2" directory.


In [10]:
# Load MNIST and CIFAR-10 datasets
transform_mnist = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.Grayscale(3),  # Convert to RGB
    transforms.ToTensor()
])

transform_cifar = transforms.Compose([
    transforms.ToTensor()
])

# Load MNIST
mnist_test = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_mnist)
mnist_loader = torch.utils.data.DataLoader(mnist_test, batch_size=1, shuffle=False)

# Load CIFAR-10
cifar_test = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_cifar)
cifar_loader = torch.utils.data.DataLoader(cifar_test, batch_size=1, shuffle=False)

# Ensure the number of samples is consistent across both datasets
n_samples = min(len(mnist_test), len(cifar_test))

# Directory to save the generated dataset
output_dir = 'overlayed_mnist_cifar10_test'
os.makedirs(output_dir, exist_ok=True)

# Overlay images and save them with their labels
for i, ((mnist_img, mnist_label), (cifar_img, cifar_label)) in enumerate(zip(mnist_loader, cifar_loader)):
    if i >= n_samples:
        break
    
    # Convert CIFAR image to numpy array
    cifar_img = cifar_img.squeeze().permute(1, 2, 0).numpy()

    # Convert MNIST image to numpy array
    mnist_img = mnist_img.squeeze().permute(1, 2, 0).numpy()

    # Overlay the images
    combined_image = np.clip(mnist_img + cifar_img , 0, 1)

    # Convert back to image and save
    combined_image_pil = Image.fromarray((combined_image * 255).astype(np.uint8))
    combined_image_pil.save(os.path.join(output_dir, f'image_{i}.png'))

    # Save the labels
    with open(os.path.join(output_dir, f'label_{i}.txt'), 'w') as f:
        f.write(f'MNIST Label: {mnist_label.item()}\n')
        f.write(f'CIFAR-10 Label: {cifar_label.item()}\n')

print(f'Generated {n_samples} overlayed images with labels in the "{output_dir}" directory.')

Files already downloaded and verified
Generated 10000 overlayed images with labels in the "overlayed_mnist_cifar10_test" directory.


In [16]:
from pathlib import Path
import pandas as pd

dir = Path('overlayed_mnist_cifar10_test')
image_ids = []
mnist_labels = []
cifar10_labels = []
for file in dir.iterdir():
    if file.is_file():
        if 'label' in file.name:
            with open(file, 'r') as f:
                lines = f.readlines()
                mnist_label = int(lines[0].split()[-1])
                cifar10_label = int(lines[1].split()[-1])
            image_ids.append(file.name.split('_')[1].split('.')[0])
            mnist_labels.append(mnist_label)
            cifar10_labels.append(cifar10_label)

pd.DataFrame({'image_id': image_ids, 'mnist_label': mnist_labels, 'cifar10_label': cifar10_labels}).to_csv('overlayed_mnist_cifar10_test.csv', index=False)

In [22]:
from sklearn.model_selection import train_test_split

image_dir = 'overlayed_mnist_cifar10_train'
labels = pd.read_csv(Path(image_dir) / 'labels.csv')
labels = labels.sort_values(by='image_id').set_index('image_id')

# Combine the two categories into a single multi-class label
labels['combined_label'] = labels.apply(lambda row: f"{row['mnist_label']}_{row['cifar10_label']}", axis=1)

# Split the dataset into train and validation sets with stratification
train_labels, val_labels = train_test_split(
    labels,
    test_size=10000,
    stratify=labels['combined_label'],
)

# Drop the combined label column
train_labels = train_labels.drop(columns=['combined_label'])
val_labels = val_labels.drop(columns=['combined_label'])

print(train_labels.shape, val_labels.shape)
# now remove image_ids that are in train_labels from data_dir and move them to val_dir
import shutil
for image_id in val_labels.index:
    shutil.move(Path(image_dir) / f'image_{image_id}.png', 'overlayed_mnist_cifar10_val')





(40000, 2) (10000, 2)


In [23]:
val_labels.to_csv('overlayed_mnist_cifar10_val/labels.csv')
# same for train set
train_labels.to_csv('overlayed_mnist_cifar10_train/labels.csv')

In [25]:
import os
from PIL import Image
import numpy as np
from tqdm import tqdm

def calculate_mean_std(image_dir):
    image_files = [f for f in os.listdir(image_dir) if f.startswith('image_')]
    mean = np.zeros(3)
    std = np.zeros(3)
    num_images = len(image_files)

    for image_file in tqdm(image_files):
        image_path = os.path.join(image_dir, image_file)
        image = Image.open(image_path).convert('RGB')
        image = np.array(image) / 255.0  # Normalize to [0, 1]
        
        mean += image.mean(axis=(0, 1))
        std += image.std(axis=(0, 1))

    mean /= num_images
    std /= num_images

    return mean, std

# image_dir = 'overlayed_mnist_cifar10_train'
# mean, std = calculate_mean_std(image_dir)
print(f'Mean: {mean}')
print(f'Std: {std}')

Mean: [0.57580509 0.56986527 0.53811235]
Std: [0.25580952 0.25461894 0.26496579]


In [2]:
from torchvision import models

resnet18 = models.resnet18()
print(resnet18)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  