# 1. Reading images

In [1]:
import pandas as pd
import os
from pathlib import Path

In [2]:
# Base directory for images (they are in sub-directories with the name of each class)
base_dir = Path("../data/raw-img/")

In [3]:
# List of all sub-directories (classes)
sub_dirs = [d for d in base_dir.iterdir() if d.is_dir()]
sub_dirs

[PosixPath('../data/raw-img/gallina'),
 PosixPath('../data/raw-img/ragno'),
 PosixPath('../data/raw-img/gatto'),
 PosixPath('../data/raw-img/farfalla'),
 PosixPath('../data/raw-img/mucca'),
 PosixPath('../data/raw-img/cavallo'),
 PosixPath('../data/raw-img/cane'),
 PosixPath('../data/raw-img/pecora'),
 PosixPath('../data/raw-img/scoiattolo'),
 PosixPath('../data/raw-img/elefante')]

In [4]:
# Now for each of the sub-directories, we will create a list of all the images in that directory with their paths and classes (name of the directories they are in)
image_paths = []
image_classes = []
for sub_dir in sub_dirs:
    # Get the class name from the directory name
    class_name = sub_dir.name
    
    # List all image files in the directory
    for img_file in sub_dir.iterdir():
        if img_file.is_file() and img_file.suffix.lower() in ['.jpg', '.jpeg', '.png']:
            image_paths.append(img_file.absolute())
            image_classes.append(class_name)
# Create a DataFrame with the image paths and classes
df = pd.DataFrame({
    'image_path': image_paths,
    'class_name': image_classes
})


In [5]:
df

Unnamed: 0,image_path,class_name
0,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,gallina
1,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,gallina
2,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,gallina
3,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,gallina
4,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,gallina
...,...,...
26174,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,elefante
26175,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,elefante
26176,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,elefante
26177,/Users/joaojunqueira/Projetos/simple_cnn/noteb...,elefante


## Translating class names to english

In [10]:
translate = {
    "cane": "dog",
    "cavallo": "horse",
    "elefante": "elephant",
    "farfalla": "butterfly",
    "gallina": "chicken",
    "gatto": "cat",
    "mucca": "cow",
    "pecora": "sheep",
    "scoiattolo": "squirrel",
    "ragno": "spider",
}

In [11]:
df.class_name = df.class_name.apply(lambda x: translate[x] if x in translate else x)

In [12]:
df.class_name.value_counts()

class_name
dog          4863
spider       4821
chicken      3098
horse        2623
butterfly    2112
cow          1866
squirrel     1862
sheep        1820
cat          1668
elephant     1446
Name: count, dtype: int64

In [13]:
df.to_csv("../data/img_labels.csv", index=False)