In [None]:
import os

import pandas as pd
from itables import init_notebook_mode

import src.graphics as graphics

init_notebook_mode(all_interactive=True)

In [None]:
RAW_DATASET = os.path.abspath("./data/raw/emptyNonEmptyDataset/10000Images.csv")

dataset_original = pd.read_csv(RAW_DATASET, sep=";")

In [None]:
num_images = len(dataset_original["file_name"])
num_classes = dataset_original["label"].nunique()

class_distribution = dataset_original["label"].value_counts()

print(f"Number of images in dataset: {num_images}")
print(f"Number of different classes: {num_classes}")
print(class_distribution)

In [None]:
FASE_1a_FOLDER = os.path.abspath("./reports/fase_1/fase_1a")
os.makedirs(FASE_1a_FOLDER, exist_ok=True)

In [None]:
fig_pie = graphics.create_pie_chart(
    class_distribution.index,
    class_distribution.values,
    "Class Distribution in Original Dataset",
)

fig_pie.show()
fig_pie.write_image(os.path.join(FASE_1a_FOLDER, "class_distribution.png"))

In [None]:
fig_bar = graphics.create_bar_chart(
    class_distribution.index,
    class_distribution.values,
    "Class",
    "Number of Images",
    "Number of Images in Original Dataset by Class",
    show_values=True,
)

fig_bar.show()
fig_bar.write_image(os.path.join(FASE_1a_FOLDER, "class_barchart.png"))

In [None]:
binary_distribution = class_distribution.copy()
binary_distribution["animal"] = binary_distribution.drop("vacia").sum()
binary_distribution = binary_distribution[["vacia", "animal"]]

fig_pie_binary = graphics.create_pie_chart(
    binary_distribution.index,
    binary_distribution.values,
    "Class Distribution (Empty VS Animal) in Original Dataset",
    show_values=True,
)

fig_pie_binary.show()
fig_pie_binary.write_image(
    os.path.join(FASE_1a_FOLDER, "class_binary_distribution.png")
)