# Part 3
### Multiclass Classification
I edited the code from Part 2 to conduct a multiclass classification on the dataset of cats, horses and dogs images

In [2]:
from datasets import load_dataset
from transformers import AutoImageProcessor
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
from transformers import DefaultDataCollator
import evaluate
import numpy as np
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
from transformers import pipeline
from pycocotools.coco import COCO
import random
from PIL import Image
import pandas as pd
from datasets import Dataset

2023-10-24 16:41:38.212005: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
coco = COCO(annotation_file="instances_train2017.json")
cat_cat = coco.getCatIds(catNms="cat")
horse_cat = coco.getCatIds(catNms="horse")
dog_cat = coco.getCatIds(catNms="dog")
cat_imgs = coco.getImgIds(catIds=cat_cat)
horse_imgs = coco.getImgIds(catIds=horse_cat)
dog_imgs = coco.getImgIds(catIds=dog_cat)

loading annotations into memory...
Done (t=203.98s)
creating index...
index created!


In [4]:
random.shuffle(cat_imgs)
cat_imgs_train = cat_imgs[:400]
cat_imgs_test = cat_imgs[400:600]

random.shuffle(horse_imgs)
horse_imgs_train = horse_imgs[:400]
horse_imgs_test = horse_imgs[400:600]

random.shuffle(dog_imgs)
dog_imgs_train = dog_imgs[:400]
dog_imgs_test = dog_imgs[400:600]


In [5]:
cat_meta_train = coco.loadImgs(ids=cat_imgs_train)
cat_meta_test = coco.loadImgs(ids=cat_imgs_test)
horse_meta_train = coco.loadImgs(ids=horse_imgs_train)
horse_meta_test = coco.loadImgs(ids=horse_imgs_test)
dog_meta_train = coco.loadImgs(ids=dog_imgs_train)
dog_meta_test = coco.loadImgs(ids=dog_imgs_test)

In [6]:
def clean_data(meta, datadir="train2017"):
    return [x for x in meta if os.path.isfile("{}/{}".format(datadir, x['file_name']))]

In [7]:
cat_cleaned_train = clean_data(cat_meta_train)
cat_cleaned_test = clean_data(cat_meta_test)
horse_cleaned_train = clean_data(horse_meta_train)
horse_cleaned_test = clean_data(horse_meta_test)
dog_cleaned_train = clean_data(dog_meta_train)
dog_cleaned_test = clean_data(dog_meta_test)

In [8]:
def get_data(meta, datadir="train2017"):
    return [(x['file_name'], Image.open("{}/{}".format(datadir, x['file_name'])).resize((100, 100))) for x in meta]

In [9]:
cat_data_train = get_data(cat_cleaned_train)
horse_data_train = get_data(horse_cleaned_train)
dog_data_train = get_data(dog_cleaned_train)
cat_data_test = get_data(cat_cleaned_test)
horse_data_test = get_data(horse_cleaned_test)
dog_data_test = get_data(dog_cleaned_test)


In [10]:
label2id = {'cat': '1', 'horse': '0', 'dog': '2'}
id2label = {'1': 'cat', '0': 'horse', '2': 'dog'}

In [11]:
cat_data_train_df = pd.DataFrame(cat_data_train)
cat_data_train_df['class'] = 'cat'
horse_data_train_df = pd.DataFrame(horse_data_train)
horse_data_train_df['class'] = 'horse'
dog_data_train_df = pd.DataFrame(dog_data_train)
dog_data_train_df['class'] = 'dog'
cat_data_test_df = pd.DataFrame(cat_data_test)
cat_data_test_df['class'] = 'cat'
horse_data_test_df = pd.DataFrame(horse_data_test)
horse_data_test_df['class'] = 'horse'
dog_data_test_df = pd.DataFrame(dog_data_test)
dog_data_test_df['class'] = 'dog'

In [12]:
train_df = pd.concat([cat_data_train_df, horse_data_train_df, dog_data_train_df])
test_df = pd.concat([cat_data_test_df, horse_data_test_df, dog_data_test_df])

In [13]:
train_df['imgs'] = train_df[1].apply(lambda x: x.convert('RGB'))
test_df['imgs'] = test_df[1].apply(lambda x: x.convert('RGB'))

In [14]:
train_X = [x for x in train_df['imgs']]
test_X = [x for x in test_df['imgs']]

In [15]:
train_y = [1 if x == 'cat' else 0 if x == 'horse' else 2 for x in train_df['class']]
test_y = [1 if x == 'cat' else 0 if x == 'horse' else 2 for x in test_df['class']]

In [16]:
train_data_zip = list(zip(train_X, train_y))
test_data_zip = list(zip(test_X, test_y))

In [17]:
train_image_list = []
train_label_list = []
test_image_list = []
test_label_list = []
train_dataset = {}
test_dataset = {}
for image, label in train_data_zip:
    train_image_list.append(image)
    train_label_list.append(label)
for image, label in test_data_zip:
    test_image_list.append(image)
    test_label_list.append(label)
train_dataset['image'] = train_image_list
train_dataset['label'] = train_label_list
test_dataset['image'] = test_image_list
test_dataset['label'] = test_label_list

In [18]:
train_data = Dataset.from_dict(train_dataset)
test_data = Dataset.from_dict(test_dataset)

In [19]:
checkpoint = "google/vit-base-patch16-224-in21k"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [20]:
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [21]:
def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [22]:
transformed_train_data = train_data.with_transform(transforms)
transformed_test_data = test_data.with_transform(transforms)

In [23]:
data_collator = DefaultDataCollator()

In [24]:
accuracy = evaluate.load("accuracy")

In [25]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [26]:
model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    num_labels=3,
    id2label=id2label,
    label2id=label2id,
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
training_args = TrainingArguments(
    output_dir="cats_horses_dogs",
    remove_unused_columns=False,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=False,
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=transformed_train_data,
    eval_dataset=transformed_test_data,
    tokenizer=image_processor,
    compute_metrics=compute_metrics,
)

trainer.train()



Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.058294,0.533149
2,No log,1.012815,0.654696
3,No log,0.997576,0.687845




TrainOutput(global_step=9, training_loss=1.0410361819797092, metrics={'train_runtime': 61.7371, 'train_samples_per_second': 36.056, 'train_steps_per_second': 0.146, 'total_flos': 1.724987149196452e+17, 'train_loss': 1.0410361819797092, 'epoch': 3.0})

In [32]:
ds = test_data
image = ds["image"][0]

In [30]:
classifier = pipeline("image-classification", model="cats_horses_dogs/checkpoint-9/")
classifier(image)

[{'score': 0.34553518891334534, 'label': 'cat'},
 {'score': 0.3443831503391266, 'label': 'dog'},
 {'score': 0.31008172035217285, 'label': 'horse'}]