Program powstał do **porównania działania 3 modeli języka** ( clip, top_15_anime_characters_image_detection, AnimeCharacterClassifierMark1 ) na zbiorze **50 randomowo wylosowanych** ,ze zbioru pobranych, **obrazów postaci ze znanych anime** tj. Eren Jeager, Naruto, Vegeta, Lelouch Lamperouge, Killua. Postacie zostały dobranie na podstawie postaci na których był uprzednio trenowany model top_15_anime_characters_image_detection - posiadał najmniejszy zestaw danych testowych.

użyte modele :
1. CLIP
2. https://huggingface.co/dima806/top_15_anime_characters_image_detection
3. https://huggingface.co/Abhiram4/AnimeCharacterClassifierMark1

### NA POCZĄTKU NALEŻY POBRAĆ WSZYSTKIE POTRZEBNE BIBLIOTEKI


In [None]:
!pip install transformers datasets huggingface_hub pillow torchvision numpy scikit-learn icrawler



In [None]:
!pip install python-Levenshtein



In [None]:
import os
import requests

SAVE_DIR = "safebooru_images"
CHARACTERS = [
    "Eren Jaeger",
    "Killua",
    "Lelouch Lamperouge",
    "Vegeta",
    "Naruto"
]

BASE_URL = "https://safebooru.org/index.php?page=dapi&s=post&q=index&json=1"
LIMIT_PER_CHARACTER = 50

os.makedirs(SAVE_DIR, exist_ok=True)

# Pobieranie obrazów dla nowych postaci
for character in CHARACTERS:
    print(f"pobieranie obrazów dla : {character}...")
    params = {
        "tags": f"{character.replace(' ', '_')}",
        "limit": LIMIT_PER_CHARACTER
    }
    response = requests.get(BASE_URL, params=params)

    if response.status_code == 200:
        posts = response.json()
        char_dir = os.path.join(SAVE_DIR, character.replace(" ", "_"))
        os.makedirs(char_dir, exist_ok=True)

        for i, post in enumerate(posts):
            if 'file_url' in post:
                try:
                    img_url = post['file_url']  # Pobranie URL do obrazu
                    img_data = requests.get(img_url).content
                    img_path = os.path.join(char_dir, f"{character.replace(' ', '_')}_{i+1}.jpg")

                    with open(img_path, 'wb') as img_file:
                        img_file.write(img_data)
                    print(f"zapisano: {img_path}")
                except Exception as e:
                    print(f"błąd przy pobieraniu : {e}")
    else:
        print(f"nie można pobrać danych dla {character}. kod błędu: {response.status_code}")

print("Pobieranie zakończone")

pobieranie obrazów dla : Eren Jaeger...
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_1.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_2.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_3.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_4.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_5.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_6.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_7.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_8.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_9.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_10.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_11.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_12.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_13.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_14.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_15.jpg
zapisano: safebooru_images/Eren_Jaeger/Eren_Jaeger_16.jpg
zapisano: safebooru_images/Eren_Jaeger/Er

KeyboardInterrupt: 

### **WCZYTYWANIE MODELU CLIP**


In [None]:
from transformers import CLIPProcessor, CLIPModel
import torch

clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
predicted_labels_clip = []

def predict_clip(image, labels):
    inputs = clip_processor(text=labels, images=image, return_tensors="pt", padding=True)
    outputs = clip_model(**inputs)
    logits_per_image = outputs.logits_per_image
    probs = logits_per_image.softmax(dim=1).cpu().detach().numpy()
    return probs

### **WCZYTYWANIE MODELU "top_15_anime_characters_image_detection"**


In [None]:
from transformers import AutoProcessor, AutoModelForImageClassification
from PIL import Image
import torch

model_name = "dima806/top_15_anime_characters_image_detection"
processor = AutoProcessor.from_pretrained(model_name)
model = AutoModelForImageClassification.from_pretrained(model_name)

def predict_anime_character(image_path):
    image = Image.open(image_path)

    if image.mode != 'RGB':
        image = image.convert('RGB')

    inputs = processor(images=image, return_tensors="pt")

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits  # wyjściowe logity - szanse na każdą klasę
        predicted_class_idx = logits.argmax(-1).item()  # klasa z najwyższym prawdopodobieństwem

    print(f"predykcja: {model.config.id2label[predicted_class_idx]}")

In [None]:
import os
import random
from PIL import Image

def normalize_label(label):
    return label.replace("_", " ")

def get_random_images_from_folders(base_dir, num_images=50):
    folders = [folder for folder in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, folder))]
    all_images_with_labels = []

    for folder in folders:
        folder_path = os.path.join(base_dir, folder)
        images = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]

        all_images_with_labels.extend([(img, folder) for img in images])

    selected_images_with_labels = random.sample(all_images_with_labels, num_images)
    return selected_images_with_labels

base_directory = '/content/safebooru_images'

random_images_with_labels = get_random_images_from_folders(base_directory, 50)

# Podział na listy: ścieżki obrazów i etykiety
random_images = [item[0] for item in random_images_with_labels]
true_labels = [normalize_label(img.split('/')[-2]) for img in random_images]  # Normalizowanie etykiet prawdziwych

for img_path, label in zip(random_images[:10], true_labels[:10]):
    img = Image.open(img_path)
    print(f"Obraz: {img_path}, Etykieta: {label}")
    img.show()

Obraz: /content/safebooru_images/Vegeta/Vegeta_7.jpg, Etykieta: Vegeta
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_31.jpg, Etykieta: Lelouch Lamperouge
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_42.jpg, Etykieta: Eren Jaeger
Obraz: /content/safebooru_images/Vegeta/Vegeta_29.jpg, Etykieta: Vegeta
Obraz: /content/safebooru_images/Naruto/Naruto_10.jpg, Etykieta: Naruto
Obraz: /content/safebooru_images/Vegeta/Vegeta_24.jpg, Etykieta: Vegeta
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_2.jpg, Etykieta: Lelouch Lamperouge
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_39.jpg, Etykieta: Eren Jaeger
Obraz: /content/safebooru_images/Vegeta/Vegeta_17.jpg, Etykieta: Vegeta
Obraz: /content/safebooru_images/Vegeta/Vegeta_35.jpg, Etykieta: Vegeta


### **TEST MODELU CLIP**

In [None]:
print(f"Liczba prawdziwych etykiet: {len(true_labels)}")
print(f"Liczba przewidywanych etykiet: {len(predicted_labels_clip)}")
print(f"Liczba przewidywanych etykiet: {len(random_images)}")

Liczba prawdziwych etykiet: 50
Liczba przewidywanych etykiet: 0
Liczba przewidywanych etykiet: 50


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, f1_score
from PIL import Image

labels = [
    "Lelouch Lamperouge", "Eren Yeager", "Vegeta", "Killua", "Naruto"
]

predicted_labels_clip = []
for img_path in random_images:
    img = Image.open(img_path)

    probs = predict_clip(img, labels)
    predicted_class_idx = np.argmax(probs)  # Indeks klasy z najwyższym prawdopodobieństwem
    predicted_label = labels[predicted_class_idx]  # Tekstowa etykieta
    predicted_labels_clip.append(predicted_label)

# Metryki
accuracy_clip = accuracy_score(true_labels, predicted_labels_clip)
precision_clip = precision_score(true_labels, predicted_labels_clip, average='weighted', zero_division=0)
f1_clip = f1_score(true_labels, predicted_labels_clip, average='weighted', zero_division=0)

# Przykłady działania
for img_path, true_label, predicted_label in zip(random_images, true_labels, predicted_labels_clip):
    print(f"Obraz: {img_path}")
    print(f"True label: {true_label}, Predicted label: {predicted_label}")

print(f"Accuracy: {accuracy_clip:.4f}")
print(f"Precision: {precision_clip:.4f}")
print(f"F1 Score: {f1_clip:.4f}")

Obraz: /content/safebooru_images/Vegeta/Vegeta_7.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_31.jpg
True label: Lelouch Lamperouge, Predicted label: Lelouch Lamperouge
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_42.jpg
True label: Eren Jaeger, Predicted label: Eren Yeager
Obraz: /content/safebooru_images/Vegeta/Vegeta_29.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Naruto/Naruto_10.jpg
True label: Naruto, Predicted label: Naruto
Obraz: /content/safebooru_images/Vegeta/Vegeta_24.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_2.jpg
True label: Lelouch Lamperouge, Predicted label: Lelouch Lamperouge
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_39.jpg
True label: Eren Jaeger, Predicted label: Eren Yeager
Obraz: /content/safebooru_images/Vegeta/Vegeta_17.jpg
True label: Vegeta, Predicte

### **TEST MODELU "top_15_anime_characters_image_detection"**

In [None]:
from sklearn.metrics import accuracy_score, precision_score, f1_score
import numpy as np
from PIL import Image

true_labels = [normalize_label(img.split('/')[-2]) for img in random_images]

predicted_labels_anime = []

# Iterowanie przez obrazy
for img_path in random_images:
    img = Image.open(img_path).convert("RGB")

    inputs = processor(images=img, return_tensors="pt")
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class_idx = logits.argmax(-1).item()  # Klasa z najwyższym prawdopodobieństwem
    predicted_label = model.config.id2label[predicted_class_idx]

    normalized_predicted_label = normalize_label(predicted_label)
    predicted_labels_anime.append(normalized_predicted_label)

# Metryki
accuracy_anime = accuracy_score(true_labels, predicted_labels_anime)
precision_anime = precision_score(true_labels, predicted_labels_anime, average='weighted', zero_division=0)
f1_anime = f1_score(true_labels, predicted_labels_anime, average='weighted', zero_division=0)

# Przykłady działania
for img_path, true_label, predicted_label in zip(random_images, true_labels, predicted_labels_anime):
    print(f"Obraz: {img_path}")
    print(f"True label: {true_label}, Predicted label: {predicted_label}")

print(f"Accuracy: {accuracy_anime:.4f}")
print(f"Precision: {precision_anime:.4f}")
print(f"F1 Score: {f1_anime:.4f}")

Obraz: /content/safebooru_images/Vegeta/Vegeta_7.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_31.jpg
True label: Lelouch Lamperouge, Predicted label: Sasuke
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_42.jpg
True label: Eren Jaeger, Predicted label: Zoro
Obraz: /content/safebooru_images/Vegeta/Vegeta_29.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Naruto/Naruto_10.jpg
True label: Naruto, Predicted label: Sasuke
Obraz: /content/safebooru_images/Vegeta/Vegeta_24.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_2.jpg
True label: Lelouch Lamperouge, Predicted label: Lelouch Lamperouge
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_39.jpg
True label: Eren Jaeger, Predicted label: Eren Yeager
Obraz: /content/safebooru_images/Vegeta/Vegeta_17.jpg
True label: Vegeta, Predicted label: Vegeta
Obr

### **IMPLEMENTACJA I TEST "AnimeCharacterClassifierMark1"**

In [None]:
from transformers import AutoProcessor, AutoModelForImageClassification
import torch
from sklearn.metrics import accuracy_score, precision_score, f1_score
from PIL import Image
import Levenshtein

allowed_labels = [
    "Lelouch Lamperouge", "Eren Yeager", "Vegeta", "Killua", "Naruto"
]

model_anime = "Abhiram4/AnimeCharacterClassifierMark1"
processor = AutoProcessor.from_pretrained(model_anime)
model = AutoModelForImageClassification.from_pretrained(model_anime)

def predict_anime_character2(image_path):
    image = Image.open(image_path)

    if image.mode != 'RGB':
        image = image.convert('RGB')

    inputs = processor(images=image, return_tensors="pt")

    # Przewidywanie klasy
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predicted_class_idx = logits.argmax(-1).item()  # klasa z najwyższym prawdopodobieństwem

    predicted_label = model.config.id2label[predicted_class_idx]

    if normalize_label(predicted_label) in allowed_labels:
        return normalize_label(predicted_label)
    else:
        # Szukanie najbliższej etykiety - Levenshtein distance
        closest_label = min(allowed_labels, key=lambda label: Levenshtein.distance(normalize_label(predicted_label), label))
        return closest_label

predicted_labels_anime_2 = []

for img_path in random_images:
    predicted_label = predict_anime_character2(img_path)
    predicted_labels_anime_2.append(predicted_label)

# Metryki
accuracy_anime_2 = accuracy_score(true_labels, predicted_labels_anime_2)
precision_anime_2 = precision_score(true_labels, predicted_labels_anime_2, average='weighted', zero_division=0)
f1_anime_2 = f1_score(true_labels, predicted_labels_anime_2, average='weighted', zero_division=0)

# Przykłady działania
for img_path, true_label, predicted_label in zip(random_images, true_labels, predicted_labels_anime_2):
    print(f"Obraz: {img_path}")
    print(f"True label: {true_label}, Predicted label: {predicted_label}")

print(f"Accuracy: {accuracy_anime_2:.4f}")
print(f"Precision: {precision_anime_2:.4f}")
print(f"F1 Score: {f1_anime_2:.4f}")

Obraz: /content/safebooru_images/Vegeta/Vegeta_7.jpg
True label: Vegeta, Predicted label: Eren Yeager
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_31.jpg
True label: Lelouch Lamperouge, Predicted label: Naruto
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_42.jpg
True label: Eren Jaeger, Predicted label: Eren Yeager
Obraz: /content/safebooru_images/Vegeta/Vegeta_29.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Naruto/Naruto_10.jpg
True label: Naruto, Predicted label: Lelouch Lamperouge
Obraz: /content/safebooru_images/Vegeta/Vegeta_24.jpg
True label: Vegeta, Predicted label: Vegeta
Obraz: /content/safebooru_images/Lelouch_Lamperouge/Lelouch_Lamperouge_2.jpg
True label: Lelouch Lamperouge, Predicted label: Vegeta
Obraz: /content/safebooru_images/Eren_Jaeger/Eren_Jaeger_39.jpg
True label: Eren Jaeger, Predicted label: Eren Yeager
Obraz: /content/safebooru_images/Vegeta/Vegeta_17.jpg
True label: Vegeta, Predicted label

# **KOŃCOWE PORÓWNANIE METRYK DLA WSZSYTKICH MODELI**

In [None]:
print(f"WYNIKI MODELU CLIP")
print(f"Accuracy: {accuracy_clip:.4f}")
print(f"Precision: {precision_clip:.4f}")
print(f"F1 Score: {f1_clip:.4f}\n")

print(f"WYNIKI MODELU top_15_anime_characters_image_detection")
print(f"Accuracy: {accuracy_anime:.4f}")
print(f"Precision: {precision_anime:.4f}")
print(f"F1 Score: {f1_anime:.4f}\n")

print(f"WYNIKI MODELU AnimeCharacterClassifierMark1")
print(f"Accuracy: {accuracy_anime_2:.4f}")
print(f"Precision: {precision_anime_2:.4f}")
print(f"F1 Score: {f1_anime_2:.4f}")

WYNIKI MODELU CLIP
Accuracy: 0.6800
Precision: 0.7014
F1 Score: 0.6886

WYNIKI MODELU top_15_anime_characters_image_detection
Accuracy: 0.3400
Precision: 0.6578
F1 Score: 0.4177

WYNIKI MODELU AnimeCharacterClassifierMark1
Accuracy: 0.1800
Precision: 0.1960
F1 Score: 0.1873
