In [None]:
%conda install --yes -c pytorch pytorch=1.7.1 torchvision cudatoolkit=11.0
%pip install ftfy regex tqdm
%pip install git+https://github.com/openai/CLIP.git

In [3]:
import torch 
import clip
from PIL import Image
from openai import OpenAI
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
import os
import cv2
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

ImportError: DLL load failed while importing _imaging: Não foi possível encontrar o módulo especificado.

In [None]:
import sklearn
import numpy

print(numpy.__version__)
print(sklearn.__version__)

In [None]:
# Carregar modelo CLIP
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

# Configurar OpenAI para gerar descrições
client = OpenAI(
  base_url="https://openrouter.ai/api/v1",
  api_key="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",
)

def generate_class_descriptions(classes):
    """ Gera descrições detalhadas para cada classe usando deepseek-v3 """
    descriptions = []
    for cls in classes:
        response = client.chat.completions.create(
            model="deepseek/deepseek-chat-v3-0324:free",
            messages=[ 
                {"role": "system", "content": "Descreva visualmente esta categoria de imagem."},
                {"role": "user", "content": f"Como é uma imagem da categoria {cls}?"}
            ]
        )
        descriptions.append(response.choices[0].message.content)
    return descriptions

def load_image_cv2(image_path):
    """ Carrega uma imagem com OpenCV e converte para o formato RGB """
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Erro ao carregar a imagem: {image_path}")
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # OpenCV carrega em BGR, converter para RGB
    return Image.fromarray(image)

def extract_clip_features(image_path):
    """ Extrai embeddings CLIP da imagem """
    image = load_image_cv2(image_path)
    image = preprocess(image).unsqueeze(0).to(device)
    with torch.no_grad():
        image_features = model.encode_image(image)
    return image_features

def load_dataset(dataset_path):
    """ Carrega imagens e categorias do dataset """
    dataset = {}
    for class_name in os.listdir(dataset_path):
        class_dir = os.path.join(dataset_path, class_name)
        if os.path.isdir(class_dir):
            dataset[class_name] = [
                os.path.join(class_dir, img) 
                for img in os.listdir(class_dir) 
                if img.lower().endswith(('.png', '.jpg', '.jpeg'))  # Filtra apenas imagens
            ]
    return dataset

def refine_class_descriptions(dataset):
    """ Gera descrições otimizadas com base nas imagens do dataset """
    refined_descriptions = {}
    for class_name, image_paths in dataset.items():
        class_images = torch.cat([extract_clip_features(img) for img in image_paths], dim=0)
        avg_feature = class_images.mean(dim=0)
        refined_descriptions[class_name] = avg_feature
    return refined_descriptions

def classify_with_clip(image_path, refined_descriptions):
    """ Classifica uma imagem comparando com as descrições refinadas """
    image_features = extract_clip_features(image_path)
    similarities = {cls: torch.cosine_similarity(image_features, desc.unsqueeze(0)).item() for cls, desc in refined_descriptions.items()}
    predicted_class = max(similarities, key=similarities.get)
    return predicted_class

def evaluate_metrics(dataset, refined_descriptions):
    """ Avalia várias métricas de desempenho do modelo """
    y_true = []
    y_pred = []
    
    for class_name, image_paths in dataset.items():
        for img_path in image_paths:
            # Classificar a imagem
            predicted_class = classify_with_clip(img_path, refined_descriptions)
            
            # Adicionar a classe real e a prevista para cálculo das métricas
            y_true.append(class_name)
            y_pred.append(predicted_class)
    
    # Calculando as métricas
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted', labels=np.unique(y_pred))
    recall = recall_score(y_true, y_pred, average='weighted', labels=np.unique(y_pred))
    f1 = f1_score(y_true, y_pred, average='weighted', labels=np.unique(y_pred))
    
    # Matriz de Confusão
    cm = confusion_matrix(y_true, y_pred)
    
    # Plotando a matriz de confusão
    plt.figure(figsize=(10, 7))
    sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', xticklabels=np.unique(y_true), yticklabels=np.unique(y_true))
    plt.xlabel('Classe Predita')
    plt.ylabel('Classe Real')
    plt.title('Matriz de Confusão')
    plt.show()
    
    return accuracy, precision, recall, f1, cm

# Classes definidas
def main():
    dataset_path = "dataset"  # Pasta contendo imagens organizadas por classe
    dataset = load_dataset(dataset_path)
    refined_descriptions = refine_class_descriptions(dataset)
    
    # Avaliar as métricas
    accuracy, precision, recall, f1, cm = evaluate_metrics(dataset, refined_descriptions)
    
    print(f"Acurácia: {accuracy:.2f}")
    print(f"Precisão: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")
    
if __name__ == "__main__":
    main()