<a href="https://colab.research.google.com/github/edrispor/Candy_Counter/blob/main/Deep_Learning_Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# !pip install transformers
# !pip install datasets
# !pip install timm
# !pip install transformers[torch]

In [2]:
from datasets import load_dataset
from transformers import YolosFeatureExtractor
import json
from pathlib import Path
import numpy as np
import os
from PIL import Image, ImageDraw
from transformers import AutoImageProcessor
import albumentations
import numpy as np
import torch
from transformers import AutoModelForObjectDetection
from transformers import TrainingArguments
from transformers import Trainer
import torchvision
from tqdm import tqdm

In [3]:
with open('result.json') as f:
    cocodata = json.load(f)

# Store Huggingface formated data in a list
huggingdata = []
# Iterate through the images
for image in cocodata['images']:
    img = {'width': [],'height': [],'file_name': [], 'image_id': [], 'objects': []}
    img['width'] = image['width']
    img['height'] = image['height']
    # Remove the image directory from the file name
    img['file_name'] = image['file_name'].split('\\')[-1]
    img['image_id'] = image['id']
    # Extend the image dict with bounding boxes and class labels
    img['objects'] = {'bbox': [], 'category': [], 'area': [], 'id': []}
    # Iterate through the annotations (bounding boxes and labels)
    for annot in cocodata['annotations']:
        # Check if the annotation matches the image
        if annot['image_id'] == img['image_id']:
            # Add the annotation
            img['objects']['bbox'].append(annot['bbox'])
            img['objects']['category'].append(annot['category_id'])
            img['objects']['area'].append(annot['area'])
            img['objects']['id'].append(annot['id'])
        # Append the image dict with annotations to the list
        huggingdata.append(img)

with open("metadata.jsonl", 'w') as f:
    for item in huggingdata:
        f.write(json.dumps(item) + "\n")



candy_data = load_dataset('imagefolder', data_dir="candy images") # Assumes the data is stored in a folder called "data".

id2label = {item['id']: item['name'] for item in cocodata['categories']}
label2id = {v: k for k, v in id2label.items()}



  0%|          | 0/1 [00:00<?, ?it/s]

In [4]:
categories = {
'Unicorn_whole': 0,
'Black_star': 1,
'Cat': 2,
'Grey_star': 3,
'Insect': 4,
'Moon': 5,
'Owl': 6,
'Unicorn_head': 7
}

label2id = categories
id2label = {v: k for k, v in label2id.items()}

In [5]:
checkpoint = "facebook/detr-resnet-50"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.
The `max_size` parameter is deprecated and will be removed in v4.26. Please specify in `size['longest_edge'] instead`.


In [6]:
transform = albumentations.Compose(
    [
        albumentations.Resize(480, 480),
        albumentations.HorizontalFlip(p=1.0),
        albumentations.RandomBrightnessContrast(p=1.0),
    ],
    bbox_params=albumentations.BboxParams(format="coco", label_fields=["category"]),
)

In [7]:
def formatted_anns(image_id, category, area, bbox):
    annotations = []
    for i in range(0, len(category)):
        new_ann = {
            "image_id": image_id,
            "category_id": category[i],
            "isCrowd": 0,
            "area": area[i],
            "bbox": list(bbox[i]),
        }
        annotations.append(new_ann)

    return annotations

In [8]:
# Transforming a batch
def transform_aug_ann(examples):
    image_ids = examples["image_id"]
    images, bboxes, area, categories = [], [], [], []
    for image, objects in zip(examples["image"], examples["objects"]):
        image = np.array(image.convert("RGB"))[:, :, ::-1]
        out = transform(image=image, bboxes=objects["bbox"], category=objects["category"])

        area.append(objects["area"])
        images.append(out["image"])
        bboxes.append(out["bboxes"])
        categories.append(out["category"])

    targets = [
        {"image_id": id, "annotations": formatted_anns(id, cat_, ar_, box_)}
        for id, cat_, ar_, box_ in zip(image_ids, categories, area, bboxes)
    ]

    return image_processor(images=images, annotations=targets, return_tensors="pt")

In [9]:
#Transforming the Data
eval_data = candy_data["train"].with_transform(transform_aug_ann) #Evaluation Set
candy_data["train"] = candy_data["train"].with_transform(transform_aug_ann) #Training Set

In [10]:
def collate_fn(batch):
    pixel_values = [item["pixel_values"] for item in batch]
    encoding = image_processor.pad(pixel_values, return_tensors="pt")
    labels = [item["labels"] for item in batch]
    batch = {}
    batch["pixel_values"] = encoding["pixel_values"]
    batch["labels"] = labels
    return batch

In [11]:
model = AutoModelForObjectDetection.from_pretrained(
    checkpoint,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)

Some weights of DetrForObjectDetection were not initialized from the model checkpoint at facebook/detr-resnet-50 and are newly initialized because the shapes did not match:
- class_labels_classifier.weight: found shape torch.Size([92, 256]) in the checkpoint and torch.Size([9, 256]) in the model instantiated
- class_labels_classifier.bias: found shape torch.Size([92]) in the checkpoint and torch.Size([9]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [12]:
training_args = TrainingArguments(
    output_dir="Candy_Counter_Model_Resnet",
    overwrite_output_dir = True,
    per_device_train_batch_size=4,
    num_train_epochs=100,
    fp16=True,
    logging_steps=50,
    learning_rate=1e-5,
    weight_decay=1e-4,
    save_total_limit=2,
    remove_unused_columns=False,
    push_to_hub=False,
    evaluation_strategy = "steps",
    save_steps = 50,
    save_strategy = "steps",
    load_best_model_at_end = True
)

In [13]:
trainer = Trainer(
     model=model,
     args=training_args,
     data_collator=collate_fn,
     train_dataset=candy_data["train"],
     eval_dataset = eval_data,
     tokenizer=image_processor,
 )

trainer.train()
trainer.save_model(Candy_Counter_Model_Trained)

