# Part 2: Image Processing & Transfer learning
This notebook contains code to import and split the CIFAR-100 dataset

In [1]:
import numpy as np
from keras.datasets import cifar100
import os

# load CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode='fine')

# randomly select 50-50 classes for Block 1 & Block 2
all_classes = np.unique(y_train)
np.random.shuffle(all_classes)
block1_classes = all_classes[:50]
block2_classes = all_classes[50:100]

# filter data for Block 1
block1_train_indices = np.isin(y_train, block1_classes).flatten()
x_block1_train = x_train[block1_train_indices]
y_block1_train = y_train[block1_train_indices]

# filter data for Block 2
block2_train_indices = np.isin(y_train, block2_classes).flatten()
x_block2_train = x_train[block2_train_indices]
y_block2_train = y_train[block2_train_indices]

# split data into train-validation for Block 1
split_index = int(len(x_block1_train) * 0.8)
x_block1_val = x_block1_train[split_index:]
y_block1_val = y_block1_train[split_index:]
x_block1_train = x_block1_train[:split_index]
y_block1_train = y_block1_train[:split_index]

# split data into train-validation for Block 2
split_index = int(len(x_block2_train) * 0.8)
x_block2_val = x_block2_train[split_index:]
y_block2_val = y_block2_train[split_index:]
x_block2_train = x_block2_train[:split_index]
y_block2_train = y_block2_train[:split_index]

# encode labels to range from 0 to 49 for both blocks
y_block1_train_encoded = (y_block1_train - block1_classes.min()) % 50
y_block1_val_encoded = (y_block1_val - block1_classes.min()) % 50
y_block2_train_encoded = (y_block2_train - block2_classes.min()) % 50
y_block2_val_encoded = (y_block2_val - block2_classes.min()) % 50


In [None]:
# check if encoded labels within range
print(np.min(y_block1_train_encoded), "Max:", np.max(y_block1_train_encoded))
print(np.min(y_block1_val_encoded), "Max:", np.max(y_block1_val_encoded))
print(np.min(y_block1_train_encoded), "Max:", np.max(y_block2_train_encoded))
print(np.min(y_block1_val_encoded), "Max:", np.max(y_block2_val_encoded))

# check shape of datasets
print("Shape of x_block1_train:", x_block1_train.shape)
print("Shape of y_block1_train:", y_block1_train.shape)
print("Shape of x_block1_val:", x_block1_val.shape)
print("Shape of y_block1_val:", y_block1_val.shape)

print("Shape of x_block2_train:", x_block2_train.shape)
print("Shape of y_block2_train:", y_block2_train.shape)
print("Shape of x_block2_val:", x_block2_val.shape)
print("Shape of y_block2_val:", y_block2_val.shape)

In [None]:
# create directories to save subsets
os.makedirs("p2_data/block1/train", exist_ok=True)
os.makedirs("p2_data/block1/val", exist_ok=True)
os.makedirs("p2_data/block2/train", exist_ok=True)
os.makedirs("p2_data/block2/val", exist_ok=True)

# save subsets
np.save("p2_data/block1/train/x_train.npy", x_block1_train)
np.save("p2_data/block1/train/y_train.npy", y_block1_train_encoded)
np.save("p2_data/block1/val/x_val.npy", x_block1_val)
np.save("p2_data/block1/val/y_val.npy", y_block1_val_encoded)

np.save("p2_data/block2/train/x_train.npy", x_block2_train)
np.save("p2_data/block2/train/y_train.npy", y_block2_train_encoded)
np.save("p2_data/block2/val/x_val.npy", x_block2_val)
np.save("p2_data/block2/val/y_val.npy", y_block2_val_encoded)