# Настраиваем окружение

In [None]:
#!pip install transformers datasets evaluate accelerate matplotlib torch torchvision scikit-learn pillow gradio

In [2]:
import transformers

# Helper function to set the seed in random, numpy, torch and/or tf (if installed).
transformers.set_seed(42, deterministic=True)

# Готовим изображения

https://huggingface.co/datasets/ethz/food101

![image.png](attachment:image.png)

In [None]:
import datasets

dataset = datasets.load_dataset("food101", split="train")

dataset = dataset.filter(lambda sample: sample["label"] < 5)

dataset

In [None]:
dataset["image"][1]

In [None]:
import numpy as np
from IPython.display import display

random_samples = np.random.randint(low=0, high=len(dataset), size=3)

for index in random_samples:
    display(dataset["image"][index])

In [None]:
dataset["label"][1]

In [None]:
labels = dataset.features["label"].names[:5]

labels

In [None]:
label2id, id2label = dict(), dict()
for i, label in enumerate(labels):
    label2id[label] = i
    id2label[i] = label

In [None]:
id2label[0]

In [None]:
dataset = dataset.train_test_split(test_size=0.2)

dataset

# Преобразуем данные

In [None]:
from transformers import AutoImageProcessor

# checkpoint = "microsoft/swin-tiny-patch4-window7-224"
# checkpoint = "google/vit-base-patch16-224-in21k"
checkpoint = "microsoft/resnet-50"
image_processor = AutoImageProcessor.from_pretrained(checkpoint)

In [None]:
from torchvision.transforms import Compose, Normalize, RandomResizedCrop, ToTensor

normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
size = (
    image_processor.size["shortest_edge"]
    if "shortest_edge" in image_processor.size
    else (image_processor.size["height"], image_processor.size["width"])
)
_transforms = Compose([RandomResizedCrop(size), ToTensor(), normalize])

In [None]:
def transforms(examples):
    examples["pixel_values"] = [_transforms(img.convert("RGB")) for img in examples["image"]]
    del examples["image"]
    return examples

# Выбираем метрику для оценки качества

In [None]:
import evaluate
import numpy as np

accuracy = evaluate.load("accuracy")


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
from collections import Counter

import matplotlib.pyplot as plt


def plot_counts():
    label_counts = Counter(dataset["train"]["label"])

    labels, counts = zip(*label_counts.items())
    label_names = [id2label[label] for label in labels]

    plt.figure(figsize=(10, 6))
    plt.bar(label_names, counts, color="skyblue")
    plt.xlabel("Class Labels")
    plt.ylabel("Number of Samples")
    plt.title("Distribution of Samples per Class")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

    for label, count in zip(label_names, counts):
        print(f"Class '{label}': {count} samples")


plot_counts()

# Грузим и обучаем модель

In [None]:
from transformers import AutoModelForImageClassification, Trainer, TrainingArguments

model = AutoModelForImageClassification.from_pretrained(
    checkpoint,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,
)

In [None]:
from transformers import DefaultDataCollator

training_args = TrainingArguments(
    report_to="none",
    output_dir="outputs",
    remove_unused_columns=False,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=16,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=16,
    num_train_epochs=12,
    warmup_ratio=0.1,
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=DefaultDataCollator(),
    train_dataset=dataset["train"].with_transform(transforms),
    eval_dataset=dataset["test"].with_transform(transforms),
    processing_class=image_processor,
    compute_metrics=compute_metrics,
)

trainer.train()

# Тестируем

In [None]:
from transformers import pipeline

classifier = pipeline("image-classification", model=model, image_processor=image_processor)

In [None]:
index = 1

display(dataset["test"]["image"][index])
print(f'True label: {id2label[dataset["test"]["label"][index]]}\n')
print("Predictions:")

predictions = classifier(dataset["test"]["image"][index])
predictions

In [None]:
import torch

scores = torch.tensor([item["score"] for item in predictions])
probabilities = torch.nn.functional.softmax(scores, dim=0)
probabilities

# Показываем результаты

In [None]:
# import gradio as gr


# def predict(image):
#     predictions = classifier(image)
#     scores = torch.tensor([item["score"] for item in predictions])
#     probabilities = torch.nn.functional.softmax(scores, dim=0)
#     predicted_label = predictions[int(probabilities.argmax())]["label"]

#     return predicted_label


# interface = gr.Interface(
#     fn=predict,
#     inputs=gr.Image(type="pil"),
#     outputs=gr.Label(num_top_classes=6),
# )

# interface.launch()

In [None]:
# interface.launch(share=True)