In [1]:
import pandas as pd

In [2]:
def count_rows_csv(path_csv):
    """
    Counts the number of rows (data points) in a CSV file.
    """
    df = pd.read_csv(path_csv)
    return len(df)

In [3]:
def count_rows_parquet(path_parquet):
    """
    Counts the number of rows (data points) in a Parquet file.
    """
    df = pd.read_parquet(path_parquet)
    return len(df)

In [4]:
def average_time_per_image(path_csv):
    """
    Calculates the average processing time per image from a CSV file.
    Assumes the file contains 'image_count' and 'duration_seconds' columns.
    """
    df = pd.read_csv(path_csv)

    if "image_count" not in df.columns or "duration_seconds" not in df.columns:
        raise ValueError("CSV must contain 'image_count' and 'duration_seconds' columns.")

    total_images = df["image_count"].sum()
    total_duration = df["duration_seconds"].sum()

    if total_images == 0:
        return 0  # Avoid division by zero

    average = total_duration / total_images
    return average

In [6]:
# first iteration
print('Cantidad de imagenes procesadas')
count_rows_csv("data/processed_paths.csv")

Cantidad de imagenes procesadas


88

In [9]:
# the available images
print('Cantidad de imagenes disponibles')
count_rows_csv("data/image_paths.csv")

Cantidad de imagenes disponibles


230

In [11]:
# the performance of the first iteration
print('Promedio de segundos para procesar cada imagen en la primera iteracion')
average_time_per_image('performance_log.csv')

Promedio de segundos para procesar cada imagen en la primera iteracion


np.float64(63.62727272727273)

In [12]:
# first + second iteration
print('Cantidad de imagenes procesadas')
count_rows_csv("data/processed_paths.csv")

Cantidad de imagenes procesadas


230

In [13]:
# the performance of the first iteration + second
print('Promedio de segundos para procesar cada imagen en la primera iteracion')
average_time_per_image('performance_log.csv')

Promedio de segundos para procesar cada imagen en la primera iteracion


np.float64(61.70652173913044)