In [8]:
import pandas as pd
import os
import shutil
from PIL import Image
from torchvision import transforms
from tqdm import tqdm



In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Dossier source et destination pour les images à augmentées
source_augmented_image_dir = '/content/drive/MyDrive/projet/train_dataset'
destination_augmented_image_dir = ('/content/drive/MyDrive/projet/train_dataset_aug')

# Copier le dossier des images augmentées
shutil.copytree(source_augmented_image_dir, destination_augmented_image_dir)
print(f"Dossier des images augmentées copié vers {destination_augmented_image_dir}")

Dossier des images augmentées copié vers /content/drive/MyDrive/projet/train_dataset_aug


In [7]:
# Path to trainset
PATH_TO_LEGENDS = '/content/drive/MyDrive/projet/train_dataset/df_training.xlsx'
df = pd.read_excel(PATH_TO_LEGENDS)
# not considering long legends
df = df[df["legend"].apply(len)<77]


In [9]:
# Définir les transformations d'augmentation
augmentation_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomResizedCrop(size=(256, 256), scale=(0.8, 1.0)),
    transforms.ToTensor()
])
# Fonction pour sauvegarder l'image
def save_image(tensor, path):
    image = transforms.ToPILImage()(tensor)
    image.save(path)

# Ajouter les images augmentées et mettre à jour le DataFrame
new_rows = []

for idx, row in tqdm(df.iterrows(), total=len(df)):
    image_path = row['image_name']
    image_legend = row['legend']
    image_number = row['Unnamed: 0']

    # Combiner le chemin relatif avec le répertoire de base
    full_image_path = os.path.join(destination_augmented_image_dir, image_path)

    # Vérifier si le fichier existe
    if not os.path.exists(full_image_path):
        print(f"File not found: {full_image_path}")
        continue

    # Charger l'image
    image = Image.open(full_image_path).convert('RGB')

    # Appliquer les augmentations
    for i in range(5):  # Nombre d'augmentations par image
        augmented_image = augmentation_transforms(image)
        augmented_image_path = os.path.join(destination_augmented_image_dir, f'aug_{image_number}_{i}.jpg')

        # Sauvegarder l'image augmentée
        save_image(augmented_image, augmented_image_path)

        # Ajouter la nouvelle ligne au DataFrame
        new_rows.append({
            'Unnamed: 0': image_number,
            'image_name': augmented_image_path,
            'legend': image_legend
        })

# Ajouter les nouvelles lignes au DataFrame original
df_augmented = pd.DataFrame(new_rows)
df_final = pd.concat([df, df_augmented], ignore_index=True)

# Sauvegarder le DataFrame mis à jour dans un nouveau fichier Excel sur Google Drive
output_excel_path = destination_augmented_image_dir + '/df_training_aug.xlsx'
df_final.to_excel(output_excel_path, index=False)

print("Augmentation terminée et fichier Excel mis à jour.")


100%|██████████| 2109/2109 [1:28:34<00:00,  2.52s/it]


Augmentation terminée et fichier Excel mis à jour.


In [10]:
df_final

Unnamed: 0.1,Unnamed: 0,image_name,legend
0,0,pho_2K24711_02_01.jpg,durban (port nalcef) water police
1,1,pho_2K24711_02_02.jpg,durban (port nalcef) water police
2,2,pho_2K24711_05_01.jpg,Zulu policemen
3,3,pho_2K24711_05_02.jpg,Zulu policemen
4,4,pho_2K24711_06_01.jpg,native police polce indigène
...,...,...,...
12649,3735,/content/drive/MyDrive/projet/train_dataset_au...,the first bateau-mouche bringing sugar to Pari...
12650,3735,/content/drive/MyDrive/projet/train_dataset_au...,the first bateau-mouche bringing sugar to Pari...
12651,3735,/content/drive/MyDrive/projet/train_dataset_au...,the first bateau-mouche bringing sugar to Pari...
12652,3735,/content/drive/MyDrive/projet/train_dataset_au...,the first bateau-mouche bringing sugar to Pari...


In [13]:
image_column = 'image_name'

# Nettoyer la colonne pour conserver uniquement les noms de fichiers
df_final[image_column] = df_final[image_column].apply(lambda x: os.path.basename(x))
df_final.to_excel(output_excel_path, index=False)

print(f"Le fichier Excel nettoyé a été sauvegardé sous {output_excel_path}")


Le fichier Excel nettoyé a été sauvegardé sous /content/drive/MyDrive/projet/train_dataset_aug/df_training_aug.xlsx


In [14]:
df_final

Unnamed: 0.1,Unnamed: 0,image_name,legend
0,0,pho_2K24711_02_01.jpg,durban (port nalcef) water police
1,1,pho_2K24711_02_02.jpg,durban (port nalcef) water police
2,2,pho_2K24711_05_01.jpg,Zulu policemen
3,3,pho_2K24711_05_02.jpg,Zulu policemen
4,4,pho_2K24711_06_01.jpg,native police polce indigène
...,...,...,...
12649,3735,aug_3735_0.jpg,the first bateau-mouche bringing sugar to Pari...
12650,3735,aug_3735_1.jpg,the first bateau-mouche bringing sugar to Pari...
12651,3735,aug_3735_2.jpg,the first bateau-mouche bringing sugar to Pari...
12652,3735,aug_3735_3.jpg,the first bateau-mouche bringing sugar to Pari...
