# 1. Reading images

In [2]:
import pandas as pd
import os
from pathlib import Path

In [3]:
# Base directory for images (they are in sub-directories with the name of each class)
base_dir = Path("../data/raw-img/")

In [4]:
# List of all sub-directories (classes)
sub_dirs = [d for d in base_dir.iterdir() if d.is_dir()]
sub_dirs

[WindowsPath('../data/raw-img/cane'),
 WindowsPath('../data/raw-img/cavallo'),
 WindowsPath('../data/raw-img/elefante'),
 WindowsPath('../data/raw-img/farfalla'),
 WindowsPath('../data/raw-img/gallina'),
 WindowsPath('../data/raw-img/gatto'),
 WindowsPath('../data/raw-img/mucca'),
 WindowsPath('../data/raw-img/pecora'),
 WindowsPath('../data/raw-img/ragno'),
 WindowsPath('../data/raw-img/scoiattolo')]

In [7]:
# Now for each of the sub-directories, we will create a list of all the images in that directory with their paths and classes (name of the directories they are in)
image_paths = []
image_classes = []
for sub_dir in sub_dirs:
    # Get the class name from the directory name
    class_name = sub_dir.name
    
    # List all image files in the directory
    for img_file in sub_dir.iterdir():
        if img_file.is_file() and img_file.suffix.lower() in ['.jpg', '.jpeg', '.png']:
            image_paths.append(img_file.absolute())
            image_classes.append(class_name)
# Create a DataFrame with the image paths and classes
df = pd.DataFrame({
    'image_path': image_paths,
    'class_name': image_classes
})


In [8]:
df

Unnamed: 0,image_path,class_name
0,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,cane
1,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,cane
2,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,cane
3,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,cane
4,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,cane
...,...,...
26174,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,scoiattolo
26175,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,scoiattolo
26176,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,scoiattolo
26177,c:\Users\jfsju\Projetos\simple_cnn\notebooks\....,scoiattolo


## Translating class names to english

In [10]:
translate = {"cane": "dog", "cavallo": "horse", "elefante": "elephant", "farfalla": "butterfly", "gallina": "chicken", "gatto": "cat", "mucca": "cow", "pecora": "sheep", "scoiattolo": "squirrel", "dog": "cane", "cavallo": "horse", "elephant" : "elefante", "butterfly": "farfalla", "chicken": "gallina", "cat": "gatto", "cow": "mucca", "spider": "ragno", "squirrel": "scoiattolo"}

In [12]:
df.class_name = df.class_name.apply(lambda x: translate[x] if x in translate else x)

In [14]:
df.to_csv("../data/img_labels.csv", index=False)