In [None]:
import requests
from pathlib import Path
import io
import zipfile
import os
import pandas as pd
import shutil
from pydantic import BaseModel
from typing import Dict, List, Optional, Any
import torch
from torch.utils.data import Dataset, DataLoader
from typing import Tuple
from PIL import Image
import torch
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
from tqdm import tqdm
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

In [5]:
flicker_dir = Path("../data/flicker")
augmented_dir = Path("../data/augmented")

flicker_dir.mkdir(parents=True, exist_ok=True)
augmented_dir.mkdir(parents=True, exist_ok=True)

In [6]:
# '''Get dataset from flicker'''
# url = "https://www.lirmm.fr/~poncelet/Ressources/flickr_subset2.zip"
# print("Requesting...\n")
# response = requests.get(url)
# if response.status_code == 200:
#     print("TÃ©lÃ©chargement rÃ©ussi. Extraction...")
#     with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
#         # Extraire sans ajouter de sous-dossier supplÃ©mentaire
#         for member in zip_ref.namelist():
#             # Corrige les chemins pour ignorer un Ã©ventuel prefixe flickr_subset2/
#             member_path = member
#             if member.startswith("flickr_subset2/"):
#                 member_path = member[len("flickr_subset2/"):]
#             target_path = flicker_dir / member_path

#             # Si c'est un rÃ©pertoire, on le crÃ©e
#             if member.endswith("/"):
#                 target_path.mkdir(exist_ok=True, parents=True)
#             else:
#                 os.makedirs(os.path.dirname(target_path), exist_ok=True)
#                 with zip_ref.open(member) as source, open(target_path, "wb") as target:
#                     target.write(source.read())
#     print(f"DonnÃ©es extraites dans : {flicker_dir}")
# else:
#     print("Ã‰chec du tÃ©lÃ©chargement. Code HTTP :", response.status_code)


In [4]:
!ls ..

README.md  data  main.py  notebooks  pyproject.toml  uv.lock


In [7]:
metadata_path = Path("../data/augmented/metadata.csv")
df = pd.read_csv(metadata_path)
print(df.columns)
print(df.iloc[0])

Index(['image_path', 'label', 'caption'], dtype='object')
image_path                                        water_070.jpg
label                                               Label.WATER
caption       A kayaker wearing a blue wetsuit and black hel...
Name: 0, dtype: object


In [None]:
def get_label(filename: str):
    return filename.split("_")[0]


def get_uuid(filename: str):
    name = Path(filename).stem          
    parts = name.split("_")
    return "_".join(parts[:2])          


def build_augmented_path(img_path: Path, base_dir: Path):
    img_path = Path(img_path)
    filename = img_path.name
    label = get_label(filename)
    uuid = get_uuid(filename)
    print(uuid)

    return base_dir / label / uuid / filename

get_label(df["image_path"].iloc[1])
get_uuid(df["image_path"].iloc[1])

print(build_augmented_path(df["image_path"].iloc[1], augmented_dir))

water_070
../data/augmented/water/water_070/water_070_spatial.jpg


In [28]:

transform = transforms.Compose(
    [transforms.Resize((300, 500)),
        transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

class ImageCLIPDataset(Dataset):
    def __init__(self, imgs, labels, base_dir: Path, transform):
        self.img_paths = [Path(build_augmented_path(img, base_dir)) for img in imgs]
        self.labels = labels
        self.transform = transform
        self.classes = sorted(set(labels))                  
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}  



    def __getitem__(self, idx) -> Tuple[Image.Image, str]:
        img = Image.open(self.img_paths[idx]).convert("RGB")
        label_str = self.labels[idx]
        label = self.class_to_idx[label_str]   

        if self.transform:
            img = self.transform(img)
        return idx, img, label
    
    def __len__(self) -> int:
        return len(self.img_paths)
    
    def _get_img_path_from_idx(self, idx: int) -> Path:
        return self.img_paths[idx]
    
    def _get_label_from_idx(self, idx: int) -> str :
        return self.labels[idx]
    
    def _get_img_size(self, idx: int) -> Tuple[int, int]:
        img = Image.open(self.img_paths[idx]).convert("RGB")
        if self.transform:
            for t in self.transform.transforms:
                if isinstance(t, transforms.Resize):
                    img = t(img)
        return img.height, img.width

    
    
    

        


In [29]:
X =  df["image_path"]
print(X)

0               water_070.jpg
1       water_070_spatial.jpg
2         water_070_color.jpg
3               water_084.jpg
4       water_084_spatial.jpg
                ...          
1795     ball_008_spatial.jpg
1796       ball_008_color.jpg
1797             ball_030.jpg
1798     ball_030_spatial.jpg
1799       ball_030_color.jpg
Name: image_path, Length: 1800, dtype: object


In [32]:

X = df["image_path"]
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)

X_train = X_train.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)

train_dataset = ImageCLIPDataset(X_train, y_train, augmented_dir, transform)
test_dataset = ImageCLIPDataset(X_test, y_test, augmented_dir, transform)




bike_079
ball_052
dog_027
ball_042
dog_025
dog_049
water_016
dog_104
water_126
dog_150
ball_077
ball_145
water_061
bike_043
dog_065
ball_128
ball_144
dog_107
water_061
dog_145
bike_060
water_136
water_088
ball_072
bike_135
dog_030
dog_079
bike_061
water_057
dog_102
bike_009
dog_080
dog_028
water_037
water_014
bike_018
water_051
dog_015
ball_027
bike_095
water_102
water_060
ball_013
bike_070
ball_146
ball_143
ball_124
ball_036
dog_130
water_004
bike_116
dog_100
bike_062
ball_088
dog_027
ball_075
dog_105
water_070
dog_029
ball_059
ball_110
bike_104
bike_066
ball_004
bike_115
ball_004
water_057
water_004
ball_088
ball_095
water_047
water_056
water_034
dog_141
bike_008
ball_025
water_077
ball_008
ball_098
dog_136
ball_005
dog_066
water_005
ball_102
dog_144
ball_070
water_130
dog_109
dog_134
bike_044
water_113
water_124
bike_086
dog_149
water_131
dog_100
dog_149
dog_060
dog_090
dog_001
bike_069
water_133
ball_057
dog_046
ball_035
water_066
dog_108
dog_116
dog_016
water_124
ball_131
bike_016

In [33]:
print(train_dataset._get_img_size(3))
print(train_dataset._get_img_size(13))

(300, 500)
(300, 500)


In [34]:
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1)


In [35]:

class CNNBasic(nn.Module):
    def __init__(self, num_classes=4):
        super(CNNBasic, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.flattened_size = self._get_flattened_size()

        self.fc1 = nn.Linear(self.flattened_size, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def _get_flattened_size(self):
        # on simule une image dâ€™entrÃ©e (300x500 comme ton transform)
        with torch.no_grad():
            x = torch.randn(1, 3, 300, 500)
            x = self.pool(F.relu(self.conv1(x)))
            return x.view(1, -1).shape[1]

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [36]:
model = CNNBasic(4)

In [37]:
model

CNNBasic(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=600000, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=4, bias=True)
)

In [38]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [39]:
print(len(train_dataset))
print(len(test_dataset))

1440
360


In [42]:
import torch
print("CUDA available:", torch.cuda.is_available())
print("GPU count:", torch.cuda.device_count())


CUDA available: False
GPU count: 0




In [41]:
device = torch.device("cuda")
model = model.to(device)
for epoch in range(2):

    running_loss = 0.0
    for i, data in tqdm(enumerate(train_loader, 0)):
        idx, inputs, labels = data
        inputs.to(device)
        labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 2000 == 1999:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

RuntimeError: No CUDA GPUs are available

tensor([[ 0.4340, -0.2184, -0.0767, -0.3644]], grad_fn=<AddmmBackward0>)


In [None]:

# Met le modÃ¨le en mode Ã©valuation
model.eval()

correct = 0
total = 0
losses = []

# DÃ©sactive le calcul du gradient pour lâ€™Ã©valuation
with torch.no_grad():
    for idx, inputs, labels in tqdm(train_loader, desc="Evaluation"):
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        losses.append(loss.item())

        # prÃ©dictions
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Moyenne des pertes et prÃ©cision
avg_loss = np.mean(losses)
accuracy = 100 * correct / total

print("\nðŸ“Š --- RÃ©sultats finaux ---")
print(f"Loss moyenne : {avg_loss:.4f}")
print(f"PrÃ©cision (accuracy) : {accuracy:.2f}%")
print("-----------------------------")


Evaluation: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 480/480 [00:25<00:00, 18.79it/s]


ðŸ“Š --- RÃ©sultats finaux ---
Loss moyenne : 1.1340
PrÃ©cision (accuracy) : 56.04%
-----------------------------



