In [1]:
import numpy as np
import torch
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data.dataloader import DataLoader

In [2]:
# Image preprocessing modules
transform_train = transforms.Compose([
    transforms.RandomCrop(size=32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2471, 0.2435, 0.2616])
    # inherited from https://github.com/kaidic/LDAM-DRW/blob/master/cifar_train.py
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2471, 0.2435, 0.2616])
])

In [3]:
from collections import Counter, OrderedDict

# CIFAR-10 dataset (imbalanced version for training)
train_dataset = CIFAR10(root='../data/', train=True, transform=transform_train, download=True)
test_dataset = CIFAR10(root='../data/', train=False, transform=transform_test, download=True)
n_train = train_dataset.data.shape[0]
n_test = test_dataset.data.shape[0]
batch_size_train = n_train
batch_size_test = n_test

# Data loader
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size_train, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size_test, shuffle=False)
print(dict(Counter(train_dataset.targets)))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../data/cifar-10-python.tar.gz
Extracting ../data/cifar-10-python.tar.gz to ../data/
Files already downloaded and verified
{6: 5000, 9: 5000, 4: 5000, 1: 5000, 2: 5000, 7: 5000, 8: 5000, 3: 5000, 5: 5000, 0: 5000}


100%|██████████| 170498071/170498071 [00:06<00:00, 26568593.51it/s]


In [4]:
import detectors
import timm

# Pretrained model
# https://huggingface.co/edadaltocg/resnet18_cifar10
model = timm.create_model("resnet18_cifar10", pretrained=True)
# remove the last fully-connected layer, output dimension: 512
new_model = torch.nn.Sequential(OrderedDict([*(list(model.named_children())[:-1])]))

# testing (validation) features
for i, data in enumerate(test_loader):
    x, y = data[0], data[1]
new_model.eval()
with torch.no_grad():
    features = new_model(x)
X_val, y_val = torch.Tensor.numpy(features), torch.Tensor.numpy(y)
print(X_val.shape)
print(y_val.shape)

(10000, 512)
(10000,)


In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X_val, y_val, test_size=0.5, random_state=2023, stratify=y_val)
print('Training Set:', np.shape(X_train), np.shape(y_train))
print(dict(Counter(y_train)))
print('Testing Set:', np.shape(X_test), np.shape(y_test))
print(dict(Counter(y_test)))

Training Set: (5000, 512) (5000,)
{8: 500, 9: 500, 4: 500, 7: 500, 3: 500, 6: 500, 2: 500, 1: 500, 5: 500, 0: 500}
Testing Set: (5000, 512) (5000,)
{6: 500, 0: 500, 1: 500, 9: 500, 3: 500, 7: 500, 8: 500, 5: 500, 2: 500, 4: 500}


In [6]:
np.save('CIFAR10_ResNet18_pretrain_X_test.npy', X_val)
np.save('CIFAR10_ResNet18_pretrain_y_test.npy', y_val)