In [1]:
import os
import re

import numpy as np
import pandas as pd
from video_and_webcam_inference import SyringeVolumeEstimator

In [2]:
final_dataframe= pd.DataFrame(columns=['video_path', 'actual_volume (ml)', 'min', 'max', 'mean', 'std', 'median', 'SEM', 'CV', 'diameter (cm)', 'zoom'])

In [None]:
# find all the video files in the "videos" directory and subfolders
video_paths = []

for root, dirs, files in os.walk("videos"):
    for file in files:
        if file.endswith(".mov"):
            video_paths.append(os.path.join(root, file))

print(f"Found {len(video_paths)} video files.")

In [4]:
# Define the regular expression pattern.
pattern_diameter: re.Pattern = re.compile(r'/(\d+\.\d+)-\d+ml\.mov$')

# a pattern for finding the volume in the filename
pattern_volume: re.Pattern = re.compile(r'/\d+\.\d+-(\d+)ml\.mov$')

# a pattern for finding the zoom in the filename
pattern_zoom: re.Pattern = re.compile(r'^videos/(\d+x)/')

In [None]:
# Initialize an empty list to collect new rows
new_rows = []

for video_path in video_paths:
    print(f"Processing {video_path}...")

    # Create an instance of the SyringeVolumeEstimator
    estimator = SyringeVolumeEstimator()
    estimator.run(input_source='video', video_path=video_path)

    data = pd.read_csv('syringe_data.csv').iloc[0:150]

    # Extract diameter
    match_diameter = pattern_diameter.search(video_path)
    diameter: str = match_diameter.group(1) if match_diameter else "Unknown"

    # Extract volume
    match_volume = pattern_volume.search(video_path)
    volume: str = match_volume.group(1) if match_volume else "Unknown"

    # Extract zoom
    match_zoom = pattern_zoom.search(video_path)
    zoom: str = match_zoom.group(1) if match_zoom else "Unknown"

    column = f"volume_D{diameter}"

    # Calculate statistics
    min_val = data[column].min()
    max_val = data[column].max()
    mean_val = data[column].mean()
    std_val = data[column].std()
    median_val = data[column].median()
    sem_val = std_val / np.sqrt(len(data[column]))
    cv_val = (std_val / mean_val) * 100

    print(f"Min: {min_val:.3f}")
    print(f"Max: {max_val:.3f}")
    print(f"Mean: {mean_val:.3f}")
    print(f"Std: {std_val:.3f}")
    print(f"Median: {median_val:.3f}")
    print(f"SEM: {sem_val:.3f}")
    print(f"CV: {cv_val:.3f}")

    # Append the new row as a dictionary to the list
    new_rows.append({
        'video_path': video_path,
        'actual_volume (ml)': volume,
        'diameter (cm)': diameter,
        'min': min_val,
        'max': max_val,
        'mean': mean_val,
        'std': std_val,
        'median': median_val,
        'SEM': sem_val,
        'CV': cv_val,
        'zoom': zoom
    })

    print(f"Finished processing {video_path}.\n\n")

# After the loop, create a DataFrame from the list of new rows
new_rows_df = pd.DataFrame(new_rows)

# Concatenate the original DataFrame with the new rows DataFrame
final_dataframe = pd.concat([final_dataframe, new_rows_df], ignore_index=True)

# Display the last few rows of the updated DataFrame
display(final_dataframe.tail(30))

In [6]:
# Save the final DataFrame to a CSV file
final_dataframe.to_csv('syringe_volume_estimations_and_metrics.csv', index=False)