In [1]:
import os

# Change the current directory to root directory
new_directory = "../"
os.chdir(new_directory)

# Verify the current directory has changed
updated_directory = os.getcwd()
print("Updated Directory:", updated_directory)

Updated Directory: /media/toma/2TB_30May2023/OBJECT_DETECTION_L/object_detection


### Pre-loaded data creation

In [None]:
from datasets import load_dataset

IMAGENET_DIR = "data/imagenet_1k"
imagenet_dataset = load_dataset("utils/imagenet_1k_dataset_script.py", data_dir=IMAGENET_DIR, splits = ["validation"], cache_dir=".cache")


In [None]:
imagenet_dataset["validation"][3456]

In [None]:
if not os.path.exists("preloaded_data_imagenet"):
    os.makedirs("preloaded_data_imagenet") 

In [None]:
imagenet_dataset["validation"].to_parquet("preloaded_data_imagenet/validation.parquet")

#### Load pre-loaded data

In [4]:
from datasets import load_dataset

imagenet_dataset = load_dataset("parquet", data_files={"validation": "preloaded_data_imagenet/validation.parquet"}, cache_dir=".cache2")

Found cached dataset parquet (/media/toma/2TB_30May2023/OBJECT_DETECTION_L/object_detection/.cache2/parquet/default-cf7323d42978dad3/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
100%|██████████| 1/1 [00:00<00:00, 26.21it/s]


In [3]:
imagenet_dataset["validation"][3456]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=373x560>,
 'label': 72}

In [None]:
imagenet_dataset["validation"][3456]["image"]

In [None]:
from transformers import AutoImageProcessor

checkpoint = "microsoft/focalnet-tiny"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [None]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])


def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

imagenet_dataset_transformed = imagenet_dataset.with_transform(transforms)

In [None]:
imagenet_dataset_transformed["validation"][3456]["pixel_values"]

In [None]:
## DATA COLLATOR

from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

## EVALUATION

import evaluate

accuracy = evaluate.load("accuracy")


import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
## BUILD MODEL

from transformers import AutoModelForImageClassification, TrainingArguments, Trainer, AutoConfig
import json

config = AutoConfig.from_pretrained('./configs/backbones/focalnet/config.json')

# config_dict = config.__dict__
# print(config_dict)

# read id2label and label2id
with open("configs/datasets/imagenet-1k-id2label.json", 'r') as json_file:
    # Load the JSON data
    id2label = json.load(json_file)


with open("configs/datasets/imagenet-1k-label2id.json", 'r') as json_file:
    # Load the JSON data
    label2id = json.load(json_file)

# config["id2label"] = id2label
# config["label2id"] = label2id
# config["num_labels"] = len(label2id.keys())

config.id2label = id2label
config.label2id = label2id
config.num_labels = len(label2id.keys())


# config = AutoConfig(**config_dict)

model = AutoModelForImageClassification.from_config(config)

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"

In [None]:
training_args = TrainingArguments(
    output_dir="my_awesome_food_model",
    remove_unused_columns=False,
    evaluation_strategy="steps",
    save_strategy="steps",
    learning_rate=5e-5,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=2,
    per_device_eval_batch_size=2,
    save_steps = 3,
    max_steps = 6,
    warmup_ratio=0.1,
    no_cuda=True,
    logging_steps=2,
    # load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    # push_to_hub=True,
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=imagenet_dataset_transformed["validation"],
    eval_dataset=imagenet_dataset_transformed["validation"],
    tokenizer=image_processor,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
#### Inference


ds = load_dataset("food101", split="validation[:10]")
image = ds["image"][0]


from transformers import pipeline

classifier = pipeline("image-classification", model="my_awesome_food_model") # must pre-loaded id2label, label2id
classifier(image)