In [1]:
import requests
from pathlib import Path
import io
import zipfile
import os
import pandas as pd
import shutil
from pydantic import BaseModel
from typing import Dict, List, Optional, Any

In [2]:
flicker_dir = Path("../data/flicker")
raw_data_dir = Path("../data/raw_dataset")
flicker_dir.mkdir(parents=True, exist_ok=True)
raw_data_dir.mkdir(parents=True, exist_ok=True)

In [None]:
# '''Get dataset from flicker'''
# url = "https://www.lirmm.fr/~poncelet/Ressources/flickr_subset2.zip"
# print("Requesting...\n")
# response = requests.get(url)
# if response.status_code == 200:
#     print("TÃ©lÃ©chargement rÃ©ussi. Extraction...")
#     with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
#         # Extraire sans ajouter de sous-dossier supplÃ©mentaire
#         for member in zip_ref.namelist():
#             # Corrige les chemins pour ignorer un Ã©ventuel prefixe flickr_subset2/
#             member_path = member
#             if member.startswith("flickr_subset2/"):
#                 member_path = member[len("flickr_subset2/"):]
#             target_path = flicker_dir / member_path

#             # Si c'est un rÃ©pertoire, on le crÃ©e
#             if member.endswith("/"):
#                 target_path.mkdir(exist_ok=True, parents=True)
#             else:
#                 os.makedirs(os.path.dirname(target_path), exist_ok=True)
#                 with zip_ref.open(member) as source, open(target_path, "wb") as target:
#                     target.write(source.read())
#     print(f"DonnÃ©es extraites dans : {flicker_dir}")
# else:
#     print("Ã‰chec du tÃ©lÃ©chargement. Code HTTP :", response.status_code)


Requesting...

TÃ©lÃ©chargement rÃ©ussi. Extraction...
DonnÃ©es extraites dans : ../data/flicker


In [78]:
caption_csv_path = flicker_dir / "captions.csv"
df = pd.read_csv(caption_csv_path)
print(df.columns)
print(df.iloc[0])

Index(['image_path', 'label', 'caption'], dtype='object')
image_path                        images/dog/dog_001.jpg
label                                                dog
caption       A black dog and a spotted dog are fighting
Name: 0, dtype: object


In [115]:
import torch
from torch.utils.data import Dataset, DataLoader
from typing import Tuple
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

transform = transforms.Compose(
    [transforms.Resize((300, 500)),
        transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

class ImageCLIPDataset(Dataset):
    def __init__(self, imgs, labels, flicker_dir, transform):
        self.img_paths = [Path(flicker_dir / img) for img in imgs]
        self.labels = labels
        self.transform = transform
        self.classes = sorted(set(labels))                  # toutes les classes uniques
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}  # mapping texte -> int



    def __getitem__(self, idx) -> Tuple[Image.Image, str]:
        img = Image.open(self.img_paths[idx]).convert("RGB")
        label_str = self.labels[idx]
        label = self.class_to_idx[label_str]   # âœ… convertit en entier

        if self.transform:
            img = self.transform(img)
        return idx, img, label
    
    def __len__(self) -> int:
        return len(self.img_paths)
    
    def _get_img_path_from_idx(self, idx: int) -> Path:
        return self.img_paths[idx]
    
    def _get_label_from_idx(self, idx: int) -> str :
        return self.labels[idx]
    
    def _get_img_size(self, idx: int) -> Tuple[int, int]:
        img = Image.open(self.img_paths[idx]).convert("RGB")
        if self.transform:
            for t in self.transform.transforms:
                if isinstance(t, transforms.Resize):
                    img = t(img)
        return img.height, img.width

    
    
    

        


In [116]:
from sklearn.model_selection import train_test_split


X = df["image_path"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

X_train = X_train.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

train_dataset = ImageCLIPDataset(X_train, y_train, flicker_dir, transform)
test_dataset = ImageCLIPDataset(X_test, y_test, flicker_dir, transform)




In [117]:
print(train_dataset._get_img_size(3))
print(train_dataset._get_img_size(13))

(300, 500)
(300, 500)


In [118]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1)


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNBasic(nn.Module):
    def __init__(self, num_classes=4):
        super(CNNBasic, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.flattened_size = self._get_flattened_size()

        self.fc1 = nn.Linear(self.flattened_size, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def _get_flattened_size(self):
        # on simule une image dâ€™entrÃ©e (300x500 comme ton transform)
        with torch.no_grad():
            x = torch.randn(1, 3, 300, 500)
            x = self.pool(F.relu(self.conv1(x)))
            return x.view(1, -1).shape[1]

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [120]:
model = CNNBasic(4)

In [121]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [125]:
print(len(train_dataset))
print(len(test_dataset))

480
120


In [126]:
import numpy as np
from tqdm import tqdm
for epoch in range(2):

    running_loss = 0.0
    for i, data in tqdm(enumerate(train_loader, 0)):
        idx, inputs, labels = data

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

480it [03:50,  2.08it/s]
480it [05:43,  1.40it/s]

Finished Training





tensor([[ 0.4340, -0.2184, -0.0767, -0.3644]], grad_fn=<AddmmBackward0>)


In [131]:
import torch

# Met le modÃ¨le en mode Ã©valuation
model.eval()

correct = 0
total = 0
losses = []

# DÃ©sactive le calcul du gradient pour lâ€™Ã©valuation
with torch.no_grad():
    for idx, inputs, labels in tqdm(train_loader, desc="Evaluation"):
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        losses.append(loss.item())

        # prÃ©dictions
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Moyenne des pertes et prÃ©cision
avg_loss = np.mean(losses)
accuracy = 100 * correct / total

print("\nðŸ“Š --- RÃ©sultats finaux ---")
print(f"Loss moyenne : {avg_loss:.4f}")
print(f"PrÃ©cision (accuracy) : {accuracy:.2f}%")
print("-----------------------------")


Evaluation: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 480/480 [00:25<00:00, 18.79it/s]


ðŸ“Š --- RÃ©sultats finaux ---
Loss moyenne : 1.1340
PrÃ©cision (accuracy) : 56.04%
-----------------------------



