In [1]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import csv

In [2]:
DATASET_PATH = 'dataset/Flicker8k_Dataset/'

In [3]:
#
url_flicker_train = 'flickr_8k_train_dataset.txt';
def flicker_source(fName):
    """
    Une fonction qui charge un jeu de données, et renvoie un générateur
    permettant de le parcourir.

    Résultat:
        un générateur renvoyant des couples de string "nom d'image", "label de l'image"
    """
    images = []
    descriptions = []
    with open(fName, encoding='utf-8') as f:
        reader = csv.reader(f, delimiter='\t')
        next(reader) # skip the first row (column headers)
        for row in reader:
            image, texte = row
            images.append(image)
            descriptions.append(texte)
        return images, descriptions

def flicker_train():
    return flicker_source(url_flicker_train)

In [4]:
training_images, training_descriptions = flicker_train()
# print(training_images)

In [16]:
model = models.resnet18(pretrained=True)
model.to('cuda')

def get_image_feature(path):
    img = Image.open(path)
    preprocess = transforms.Compose([
       transforms.Resize(256),
       transforms.CenterCrop(224),
       transforms.ToTensor(),
       transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
    ])
    img_tensor = preprocess(img).to('cuda')
    img_tensor.unsqueeze_(0)

    # Utilisez le modèle pour extraire les features
    with torch.no_grad():
        features = model.conv1(img_tensor)
        features = model.bn1(features)
        features = model.relu(features)
        features = model.maxpool(features)

        features = model.layer1(features)
        features = model.layer2(features)
        features = model.layer3(features)
        features = model.layer4(features)
        features = model.avgpool(features)
        
    features = features.cpu()
    return features

In [17]:
features = []
for training_image in training_images:
    features.append(get_image_feature(DATASET_PATH + training_image))

In [23]:
print(features[0].flatten())

tensor([0.9722, 0.8841, 0.9340, 1.0257, 0.9139, 0.8516, 0.9013, 1.0148, 0.9846,
        0.9426, 0.8553, 0.9190, 0.9353, 0.9135, 0.8150, 0.9910, 0.9255, 1.4319,
        0.9269, 0.8835, 0.8881, 1.1041, 0.8608, 0.9571, 1.0026, 1.0096, 0.9476,
        0.9345, 0.9438, 0.8964, 0.9022, 0.8389, 0.8829, 0.8714, 0.8951, 0.9042,
        0.9066, 0.9351, 0.8707, 0.8929, 0.8106, 0.7925, 0.8989, 1.0016, 0.8498,
        0.9081, 0.9235, 1.2183, 0.8512, 0.8289, 0.9634, 0.8882, 0.8392, 0.9174,
        0.9274, 0.8887, 1.0424, 0.9828, 0.9878, 0.9217, 0.8705, 0.9429, 0.9389,
        0.8982, 0.8296, 1.0515, 0.8120, 0.9025, 1.0130, 0.8970, 0.9613, 0.8315,
        0.8995, 0.8600, 0.8843, 0.9041, 0.7945, 0.9432, 0.8639, 0.8708, 0.9373,
        1.0031, 1.1006, 1.2632, 0.8905, 0.9953, 0.8358, 0.7906, 0.9489, 0.8521,
        0.8101, 0.9444, 0.8710, 0.9337, 0.8639, 0.8936, 0.8992, 0.8745, 0.8793,
        0.8137, 0.9525, 1.0496, 0.9615, 1.0044, 0.9081, 0.8660, 0.9636, 0.8178,
        0.8459, 0.9114, 0.9518, 0.9341, 