# Settings

In [28]:
# DATASET_NAME = "Vampyrian/buitine-technika-ir-elektronika"
# OUTPUT_MODEL_NAME = "Vampyrian/buitine-technika-ir-elektronika"

DATASET_NAME = "Vampyrian/all-image-dataset"
OUTPUT_MODEL_NAME = "Vampyrian/all-image"

CHECKPOINT = 'google/vit-base-patch16-224-in21k'

# Login to huggin face

In [2]:
from dotenv import load_dotenv
import os

load_dotenv()

hf_token = os.getenv('HF_TOKEN')

In [5]:
from huggingface_hub import login
login(token=hf_token)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


# Loading dataset

In [6]:
from datasets import load_dataset
dataset = load_dataset(DATASET_NAME)

Generating train split: 100%|██████████| 11982/11982 [00:00<00:00, 161569.88 examples/s]


In [7]:
dataset

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 11982
    })
})

# Check if all image is correct

In [8]:
from PIL import Image, ImageFile, UnidentifiedImageError

In [9]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

def is_valid_image(example):
    try:
        # Check if the input is already an image object
        if isinstance(example["image"], Image.Image):
            img = example["image"]  # It's already an image object
        else:
            # Otherwise, open the image from the path
            with Image.open(example["image"]) as img:
                img.verify()  # Verify it is a valid image

        return True  # If all goes well, return True (valid image)
    except Exception as e:
        # Catch exceptions such as invalid image formats
        return False

In [10]:
len(dataset["train"])

11982

In [11]:
dataset["train"] = dataset["train"].filter(lambda row : is_valid_image(row))

Filter: 100%|██████████| 11982/11982 [00:02<00:00, 4744.02 examples/s]


In [12]:
len(dataset["train"])

11982

In [13]:
train_test_split = dataset["train"].train_test_split(test_size=0.2)

In [14]:
train_test_split

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 9585
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 2397
    })
})

In [15]:
train_test_split["train"][0]

{'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=300x276>,
 'label': 84}

In [16]:
labels = train_test_split["train"].features["label"].names
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = str(i)
    id2label[str(i)] = label

In [17]:
id2label[str(2)]

'1243_Džiovyklės'

In [19]:
from transformers import AutoImageProcessor

image_processor = AutoImageProcessor.from_pretrained(CHECKPOINT, use_fast=True)

In [20]:
from torchvision.transforms import RandomResizedCrop, Compose, Normalize, ToTensor
normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [21]:
def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

In [22]:
train_test_split = train_test_split.with_transform(transforms)

In [23]:
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

In [24]:
import evaluate
accuracy = evaluate.load("accuracy")

In [25]:
import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [26]:
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer

model = AutoModelForImageClassification.from_pretrained(
    CHECKPOINT,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
)

Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [29]:
training_args = TrainingArguments(
    output_dir=OUTPUT_MODEL_NAME,
    remove_unused_columns=False,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=10,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=True,
)

In [30]:
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=train_test_split["train"],
    eval_dataset=train_test_split["test"],
    processing_class=image_processor,
    compute_metrics=compute_metrics,
)

In [31]:
trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,3.8398,3.792165,0.449312
2,2.9158,2.8345,0.597413
3,2.1857,2.273845,0.664998
4,1.9099,1.930986,0.69879
5,1.6565,1.642796,0.739675
6,1.4059,1.469514,0.761368
7,1.2304,1.347682,0.778473
8,1.2018,1.282467,0.788486
9,1.1269,1.23184,0.794743
10,1.1302,1.225386,0.795995


TrainOutput(global_step=1500, training_loss=1.9791525211334229, metrics={'train_runtime': 3430.5165, 'train_samples_per_second': 27.94, 'train_steps_per_second': 0.437, 'total_flos': 7.433332335842918e+18, 'train_loss': 1.9791525211334229, 'epoch': 10.0})

# Test on my own image

In [1]:
from transformers import pipeline

classifier = pipeline("image-classification", model="Vampyrian/buitine-technika-ir-elektronika")

  from .autonotebook import tqdm as notebook_tqdm
Device set to use mps:0


In [3]:
from PIL import Image
import requests
from io import BytesIO

In [11]:
image_url = "https://kainoteka-public.s3.eu-central-1.amazonaws.com/products/65045a504fe52667404007/65046084d2f31537879365-sm.webp"

response = requests.get(image_url)
response.raise_for_status()  # Ensure the request was successful

# Open the image with PIL
image = Image.open(BytesIO(response.content))


In [36]:
image_path = "indaplove.jpeg"  # Replace with your image's path
image = Image.open(image_path).convert("RGB")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/zilvinasmacius/PycharmProjects/ml-categories-training/notebooks/indaplove.jpeg'

In [17]:
classifier(image)

[{'label': '1318_Langų valytuvai', 'score': 0.9484434723854065},
 {'label': '3690_Garais valantys prietaisai', 'score': 0.8926181793212891},
 {'label': '1320_Drabužių garintuvai', 'score': 0.8281411528587341},
 {'label': '1321_Dulkių siurblių antgaliai', 'score': 0.7781068682670593},
 {'label': '1300_Plaukų kirpimo mašinėlės', 'score': 0.7266762852668762}]