In [None]:
# load pickle file 

import pickle

path = "resources/data/cifar-100-python/meta"
with open(path, 'rb') as f:
    pickle_data = pickle.load(f)

print(pickle_data)

In [None]:
# get index of 'boy', sunflowers, apples, couch, bee, lion, plain, turtle, hamster, pine

fine_label_names = pickle_data['fine_label_names']

words_to_find = ['boy', 'sunflower', 'apple', 'couch', 'bee', 'lion', 'plain', 'turtle', 'hamster', 'pine_tree']

for word in words_to_find:
    print(f"Index of {word}: {fine_label_names.index(word)}")

In [None]:
# import resnet 
from torchvision import models
resnet18 = models.resnet18()

# import cifar-100 dataset
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader

path = "resources/data/cifar-100-python/train"

with open(path, 'rb') as f:
    cifar100 = pickle.load(f, encoding='bytes')

print(cifar100.keys())

In [None]:
# load cifar100 dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = datasets.CIFAR100(root='resources/data', train=True, download=True, transform=transform)
testset = datasets.CIFAR100(root='resources/data', train=False, download=True, transform=transform)


In [None]:
retain_samples = []
forget_samples = []
for elem in trainset: 
    if elem[1] == 11:
        forget_samples.append(elem)
    else:
        retain_samples.append(elem)

len(retain_samples), len(forget_samples)

In [None]:
def cifar100_fine_to_coarse_idx(fine_idx):
    # Mapping from fine labels (0-99) to coarse labels (0-19) based on CIFAR-100 dataset
    fine_to_coarse = [
        4, 1, 14, 8, 0, 6, 7, 7, 18, 3,
        3, 14, 9, 18, 7, 11, 3, 9, 7, 11,
        6, 11, 5, 10, 7, 6, 13, 15, 3, 15,
        0, 11, 1, 10, 12, 14, 16, 9, 11, 5,
        5, 19, 8, 8, 15, 13, 14, 17, 18, 10,
        16, 4, 17, 4, 2, 0, 17, 4, 18, 17,
        10, 3, 2, 12, 12, 16, 12, 1, 9, 19,
        2, 10, 0, 1, 16, 12, 9, 13, 15, 13,
        16, 19, 2, 4, 6, 19, 5, 5, 8, 19,
        18, 1, 2, 15, 6, 0, 17, 8, 14, 13
    ]
    
    return fine_to_coarse[fine_idx]

# Example usage:
fine_label_idx = 23  # Example fine label index
coarse_label_idx = cifar100_fine_to_coarse_idx(fine_label_idx)
print(f"Fine label {fine_label_idx} maps to coarse label {coarse_label_idx}")


In [None]:
# create two datasets for retain and forget samples
class Cifar100Dataset:
    def __init__(self, samples):
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x = self.samples[idx][0]
        y = cifar100_fine_to_coarse_idx(self.samples[idx][1])
        return x, y

train_dataset = Cifar100Dataset(trainset)
test_dataset = Cifar100Dataset(testset)


retain_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    img = img / 2 + 0.5
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

imshow(trainset[0][0])
print(trainset[0][1])
print(trainset[0][0].shape)

In [None]:
# resnet model 
import torch
import torch.nn as nn

from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet18 = models.resnet18()
resnet18.fc = nn.Linear(512, 20)
resnet18 = resnet18.to(device)

# training loop
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet18.parameters(), lr=0.001)

for epoch in tqdm(range(10)):
    running_loss = 0.0
    for i, data in enumerate(retain_loader):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = resnet18(inputs.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        if i % 2000 == 1999:
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 2000}")
            running_loss = 0.0


In [None]:
# test on the forget set 

correct = 0
total = 0
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        outputs = resnet18(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu() == labels).sum().item()

print(f"Accuracy on forget set: {100 * correct / total}%")

In [12]:
correct = 0
total = 0
with torch.no_grad():
    for data in retain_loader:
        images, labels = data
        outputs = resnet18(images.to(device))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.cpu() == labels).sum().item()

print(f"Accuracy on forget set: {100 * correct / total}%")

Accuracy on forget set: 95.74%


In [1]:
from datasets import load_dataset
import pandas as pd

dataset = load_dataset("huggan/wikiart")
# get the first 1000 samples
dataset = dataset['train'].select(range(2000))
df = pd.DataFrame(dataset)

to_keep = [12, 21]

df_filtered = df[df['style'].isin(to_keep)]
df_filtered = df_filtered.reset_index(drop=True)

print(dataset)

  from .autonotebook import tqdm as notebook_tqdm


Dataset({
    features: ['image', 'artist', 'genre', 'style'],
    num_rows: 2000
})


In [2]:
# filter only the paintings with a particular style 

to_keep = [12, 21]

df_filtered = df[df['style'].isin(to_keep)]
df_filtered = df_filtered.reset_index(drop=True)
print(df_filtered)

                                                 image  artist  genre  style
0    <PIL.JpegImagePlugin.JpegImageFile image mode=...      22      4     21
1    <PIL.JpegImagePlugin.JpegImageFile image mode=...      17      2     12
2    <PIL.JpegImagePlugin.JpegImageFile image mode=...      22     10     21
3    <PIL.JpegImagePlugin.JpegImageFile image mode=...      11      6     21
4    <PIL.JpegImagePlugin.JpegImageFile image mode=...       1      6     21
..                                                 ...     ...    ...    ...
959  <PIL.JpegImagePlugin.JpegImageFile image mode=...      22      8     21
960  <PIL.JpegImagePlugin.JpegImageFile image mode=...       1      6     21
961  <PIL.JpegImagePlugin.JpegImageFile image mode=...      17      4     12
962  <PIL.JpegImagePlugin.JpegImageFile image mode=...      11     10     12
963  <PIL.JpegImagePlugin.JpegImageFile image mode=...       6      2     12

[964 rows x 4 columns]


In [1]:
# convert back to dataset
df_filtered = df_filtered.to_dict('records')
df_filtered

NameError: name 'df_filtered' is not defined

In [20]:
# normalize the labels between 0 and 5 
df_filtered['style'] = df_filtered['style'].apply(lambda x: to_keep.index(x))
df_filtered

Unnamed: 0,image,artist,genre,style
0,<PIL.JpegImagePlugin.JpegImageFile image mode=...,22,4,1
1,<PIL.JpegImagePlugin.JpegImageFile image mode=...,17,2,0
2,<PIL.JpegImagePlugin.JpegImageFile image mode=...,22,10,1
3,<PIL.JpegImagePlugin.JpegImageFile image mode=...,11,6,1
4,<PIL.JpegImagePlugin.JpegImageFile image mode=...,1,6,1
...,...,...,...,...
1446,<PIL.JpegImagePlugin.JpegImageFile image mode=...,6,2,1
1447,<PIL.JpegImagePlugin.JpegImageFile image mode=...,5,5,0
1448,<PIL.JpegImagePlugin.JpegImageFile image mode=...,1,10,1
1449,<PIL.JpegImagePlugin.JpegImageFile image mode=...,5,2,0


In [21]:
# keep only the top 6 labels and remove the rest from the dataset 
print(df_filtered['style'].value_counts())

# select top artists for images in the dataset with their style 
artists = df_filtered['artist'].value_counts()
print(artists)

style
0    849
1    602
Name: count, dtype: int64
artist
17    214
4     191
22    154
11    120
2     117
5     100
8      93
6      86
3      86
1      78
10     74
18     63
16     19
13     19
14     15
21     14
15      8
Name: count, dtype: int64


In [49]:
df_filtered[df_filtered['artist'] == 16]['style'].value_counts()

style
0    19
Name: count, dtype: int64

In [50]:
# check artist 22 
for num in [3, 16]:
    style_counts = df_filtered[df_filtered['artist'] == num]['style'].value_counts()
    total_images_removed = style_counts.sum()
    print(f"Artist {num} total styles count: {total_images_removed}")
    print(style_counts)

Artist 3 total styles count: 86
style
0    86
Name: count, dtype: int64
Artist 16 total styles count: 19
style
0    19
Name: count, dtype: int64


In [None]:
# make a tensor with artist and then style 
import torch 
sample = dataset["train"][0]

label = torch.tensor([sample["artist"], sample["style"]])
label

In [None]:
# convert image to tensor 
from PIL import Image
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    #transforms.ToTensor()
])
for sample in dataset["train"]:
    image = sample["image"]
    tensor_image = transform(image)
    break

transform(dataset["train"][3]["image"])