# Training YOLO with Ultralytics pipeline

In [3]:
from colabcode import ColabCode
ColabCode(port=10000)

FileNotFoundError: [Errno 2] No such file or directory: 'wget'

In [2]:
from ultralytics import YOLO
import os, sys
from tqdm.notebook import tqdm
import ultralytics

ultralytics.checks()

import torch
import math

# this ensures that the current MacOS version is at least 12.3+
print(torch.backends.mps.is_available())
# this ensures that the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_built())

%matplotlib inline

# assert sys.version_info >= (3, 8), "Python 3.8  is required."
%load_ext lab_black

Ultralytics YOLOv8.0.147 🚀 Python-3.11.3 torch-2.1.0.dev20230515 CPU (Apple M2 Pro)
Setup complete ✅ (10 CPUs, 32.0 GB RAM, 485.9/926.4 GB disk)


True
True


ModuleNotFoundError: No module named 'lab_black'

In [None]:
# Load a model
#model = YOLO('yolov8n.yaml')  # build a new model from scratch
model = YOLO('yolov8m.pt')  # load a pretrained model (recommended for training)

# # If using MPS on Apple Silicon
# if torch.backends.mps.is_built():
#     mps_device = torch.device("mps")
#     model.to(mps_device)

# # OPTIONAL for memory errors
# # Set the value of PYTORCH_MPS_HIGH_WATERMARK_RATIO
# os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] =  "0.3"

## Train

In [None]:
import wandb
wandb.init(project="MITFisheryHerring",notes="MIT Fishery robo analytics Outputs per training example: 2 Blur: Up to 4px. Now added ~50 more images. Also training on yolov8m vs yolov8n")

In [None]:
# Use the model
results = model.train(data='river_herring.yaml', epochs=15,dropout=0.4,device='mps')  # can also try mps

In [None]:
# Validate the model
metrics = model.val()  # no arguments needed, dataset and settings remembered
metrics.box.map    # map50-95
metrics.box.map50  # map50
metrics.box.map75  # map75
metrics.box.maps   # a list contains map50-95 of each category

In [None]:
# from ultralytics.yolo.utils.benchmarks import benchmark

# # Benchmark on GPU
# benchmark(model=model, imgsz=640, half=False, device='cpu')

### Export Model

In [None]:
# Export the model
model.export(format='saved_model')

### Load Model

In [None]:
## Record of models:
## 
# 'runs/detect/train36/weights/best_saved_model' Wandb:  https://wandb.ai/aus10powell/MITFisheryHerring/runs/txn0c9x3
from ultralytics import YOLO
model_best = YOLO( "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/detect/train196/weights/best.pt") 
model_best = YOLO("/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/runs/detect/train30/weights/best.pt")
model_best = YOLO("/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/runs/detect/train79/weights/last.pt")
model_best = YOLO("/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/colab_runs/best7.pt")

In [None]:
display(model_best.val(split='val'))
print("*"*50)
display(model_best.val(split='test',device='mps'))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import cv2

herring_example = '/Users/aus10powell/Downloads/RiverHerring/River Herring JRWA/2013 Johne RIver Wapping Rd Dam/vlcsnap-2016-12-20-11h40m56s911.jpg'
white_sucker_example = '/Users/aus10powell/Downloads/RiverHerring/River Herring JRWA/2013 Underwater fish Jones River/WhiteSuckerWapping (6).JPG'
small_image = '/Users/aus10powell/Downloads/RiverHerring/IRWA 2017 (Large+annotations)/IRWA Small Jpgs/Batch 1/2_2017-04-14_19-03-04.jpg'
v2_test_image = '/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/datasets/MITFisheryHerringv2/test/images/1_2016-04-21_21-50-1804298_jpg.rf.011911b9c681d5b4af70a308cdd98b93.jpg' # 2 fish
v2_valid_image = '/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/datasets/MITFisheryHerringv2/valid/images/1_2016-04-21_21-50-1803927_jpg.rf.6b36f83367e966f41544ba7e229f4d05.jpg'

random_example = "/Users/aus10powell/Downloads/RiverHerring/River Herring IRWA/1_2016-05-13_12-34-57_large.jpg"

#
v2_valid_imageb = "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/datasets/MITRiverHerring2w/valid/images/1_2016-04-21_21-50-1804987_jpg.rf.bd983d16bbaf541a0f08db030018a540.jpg"
v2_valid_imagec = "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/datasets/MITRiverHerring2w/valid/images/1_2016-04-21_21-50-1805011_jpg.rf.27d6d6392af4e7f3f8cac5c5bde0c0d1.jpg"

image = "/Users/aus10powell/Downloads/RiverHerring/River Herring IRWA/1_2016-04-23_16-06-05_large.jpg"

results = model_best(image)
title = image.split("/")[-1]
num_fish = len(results[0].boxes.data)

# Load and display the original image
original_image = cv2.imread(image)
original_image_rgb = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
plt.subplot(1, 2, 1)
plt.imshow(original_image_rgb)
plt.title("Original Image")

# Generate and display the annotated frame
annotated_frame = results[0].plot()
gray = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2GRAY)
plt.subplot(1, 2, 2)
plt.imshow(gray, cmap="gray")
plt.title(title + f' ---- num_fish: {num_fish}')

plt.tight_layout()
plt.show()

### Images with biggest loss

## Tracking

In [None]:
# from ultralytics import YOLO

# model = model_best
# video_path = "/Users/aus10powell/Downloads/RiverHerring/IRWA 2017 Videos/Fish Sightings 2017/2_2017-04-13_14-10-29.mp4"
# results = model.track(
#     source=video_path,
#     tracker="/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/bytetrack.yaml",
#     show=True,
#    #stream=True #Uncomment to use r in results as loop
# )  # OPTIONAL: , tracker='custom_tracker.yaml'

### Experimenting

In [None]:
import os
import cv2
import numpy as np

# Directory path containing the JPEG images
directory = '/Users/aus10powell/Downloads/RiverHerring/IRWA 2017 Videos/2018 Fish Sightings/2_2018-04-14_09-57-26.mp4'

# Initialize variables
widths = []
heights = []

# Iterate through the directory
for filename in os.listdir(directory):
    if filename.endswith('.jpg'):
        # Load the image
        image_path = os.path.join(directory, filename)
        image = cv2.imread(image_path)

        # Get the dimensions
        height, width, _ = image.shape

        # Append dimensions to the lists
        widths.append(width)
        heights.append(height)

# Convert lists to numpy arrays
widths = np.array(widths)
heights = np.array(heights)

# Calculate statistics
max_width = np.max(widths)
max_height = np.max(heights)
min_width = np.min(widths)
min_height = np.min(heights)
avg_width = np.mean(widths)
avg_height = np.mean(heights)
std_width = np.std(widths)
std_height = np.std(heights)

# Print the results
print(f"Maximum Width: {max_width}px")
print(f"Maximum Height: {max_height}px")
print(f"Minimum Width: {min_width}px")
print(f"Minimum Height: {min_height}px")
print(f"Average Width: {avg_width}px")
print(f"Average Height: {avg_height}px")
print(f"Standard Deviation of Widths: {std_width:.2f}px")
print(f"Standard Deviation of Heights: {std_height:.2f}px")


In [None]:
## Get frame size of single video


import cv2

def get_frame_size(mp4_file):
    cap = cv2.VideoCapture(mp4_file)
    if not cap.isOpened():
        raise ValueError(f"Error opening video file: {mp4_file}")

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    cap.release()

    return (frame_width, frame_height)

# Example usage
mp4_file = '/Users/aus10powell/Downloads/RiverHerring/IRWA 2017 Videos/2018 Fish Sightings/2_2018-04-14_09-57-26.mp4'
frame_width, frame_height = get_frame_size(mp4_file)

print(f"Frame width: {frame_width}")
print(f"Frame height: {frame_height}")


## Assess videos in folder

In [None]:
import logging
%config Application.log_level='DEBUG'

import os, glob
from tqdm import tqdm
from main_inference import main
video_folder_path ="/Users/aus10powell/Downloads/RiverHerring/IRWA 2017 Videos/Fish Sightings 2017"

mp4_files = glob.glob(video_folder_path + "/*.mp4")

print("Num .mp4 files:",len(mp4_files))

pred_net_counts = []

total_duration_seconds = []

for idx in range(len(mp4_files)):
    print(f"Num {idx+1} of {len(mp4_files)}")
    video_path = mp4_files[idx]
    # Get counts
    frame_rate, annotated_frames, out_count, in_count,duration_seconds,_ = main(
        video_path=video_path,device='mps',stream=True,show=False      
    )
    pred_net_counts.append(out_count-in_count)
    total_duration_seconds.append(duration_seconds)
print('pred net counts:',pred_net_counts)
print(f'total video minutes: {total_duration_seconds/60:.1f}')

In [None]:
print('pred net counts:',pred_net_counts)
print(f'total video minutes: {total_duration_seconds/60:.1f}')

print()

### Video Gold Standard Count Verification

In [None]:
# %%capture
# import glob
# import os
# PROJECT_DIR = os.path.join(
#     "/", "Users", "aus10powell", "Documents", "Projects", "MIT-Fishery-Counter"
# )
# DATA_DIR = os.path.join(PROJECT_DIR, "data")
# GOLD_DIR = os.path.join(DATA_DIR, "gold_dataset")
# mp4_files = glob.glob(os.path.join(GOLD_DIR, "videos") + "/*.mp4")
# #[f.split("/")[-1].split(".")[0] for f in mp4_files]


# from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
# from main_inference import main

# import cv2
# import time
# from ultralytics import YOLO
# import supervision as sv
# import numpy as np
# import json
# import io
# import time
# import logging
# import os
# from tqdm import tqdm
# import resource

# site = "IRWA"

# # True data


# data = [
#     {"file": "2_2018-05-10_06-39-30", "true_herring_counts": 4},
#     {"file": "2_2018-04-14_10-06-19","true_herring_counts": 1 },
#     {"file": "2_2018-04-14_13-18-51", "true_herring_counts": 1},
#     {"file": "2_2018-04-28_10-54-38", "true_herring_counts": 3},
#     {"file": "2_2017-06-04_06-09-56", "true_herring_counts": 0}, # Comes from left returns right
#     {"file": "2_2017-04-15_11-23-36", "true_herring_counts": 1},
#     {"file": "2_2017-04-13_14-10-29", "true_herring_counts": 1},  # 2_2017-04-13_13-10-00
#     {"file": "2_2017-04-13_13-10-00", "true_herring_counts": 1}, # 2_2018-04-14_17-12-42
#     {"file": "2_2018-04-14_17-12-42", "true_herring_counts": 1}, # 2_2018-04-27_13-07-38
#     {"file": "2_2018-04-27_13-07-38", "true_herring_counts": 1}, # 2_2018-04-27_15-23-03
#     {"file": "2_2018-04-27_15-23-03", "true_herring_counts": 3}, 
#     {"file": "2_2018-04-29_08-28-10", "true_herring_counts": 1},
#     {"file": "2_2018-04-29_09-14-03", "true_herring_counts": 1},
#     {"file": "2_2018-04-29_16-28-35", "true_herring_counts": 2},
#     {"file": "2_2018-04-29_15-55-24", "true_herring_counts": 2},
#     {"file": "2_2018-04-28_11-25-56", "true_herring_counts": 2},
#     {"file": "2_2018-04-29_15-39-37", "true_herring_counts": 3},
#     {"file": "2_2018-04-29_16-54-05", "true_herring_counts": 1}, 
#     {"file": "2_2018-05-04_11-32-10", "true_herring_counts": 1}, 
#     {"file": "2_2018-05-22_05-58-08", "true_herring_counts": 2}, 
#     {"file": "2_2018-05-04_09-24-42", "true_herring_counts": 2}, # 2_2018-05-04_09-24-42 
#     {"file": "2_2018-05-05_10-50-59", "true_herring_counts": 2}, # 2_2018-05-05_10-50-59
#     {"file": "2_2018-05-05_13-31-05", "true_herring_counts": 1}, # 2_2018-05-05_13-31-05
#     {"file": "2_2018-05-04_18-57-00", "true_herring_counts": 3}, # 2_2018-05-04_18-57-00
#     {"file": "2_2018-05-05_13-49-11", "true_herring_counts": 1}, # 2_2018-05-05_13-49-11
#     {"file": "2_2018-05-05_18-10-49", "true_herring_counts": 1}, # 2_2018-05-05_18-10-49
#     {"file": "2_2018-05-05_18-47-03", "true_herring_counts": 2}, # 2_2018-05-05_18-47-03
#     {"file": "2_2018-05-06_08-25-36", "true_herring_counts": 2}, # 2_2018-05-06_08-25-36
#     {"file": "2_2018-04-27_12-01-34", "true_herring_counts": 1},#  
#     {"file": "2_2018-04-29_18-31-06", "true_herring_counts": 1},# 
#     {"file": "2_2018-04-29_17-33-32", "true_herring_counts": 2}, # 
#     {"file": "2_2018-05-02_10-09-38", "true_herring_counts": 2},# 
#     {"file": "2_2018-05-06_17-32-24", "true_herring_counts": 1}, # 2_2018-05-06_16-35-26
#     {"file": "2_2018-05-06_16-35-26", "true_herring_counts": 2}, # 
#     {"file": "2_2018-05-06_17-08-27", "true_herring_counts": 1}, # 
#     {"file": "2_2018-05-06_18-09-56", "true_herring_counts": 3},# 
#     {"file": "2_2018-04-29_16-17-14", "true_herring_counts": 1}, # 
#     {"file": "2_2018-05-13_11-44-28", "true_herring_counts": 1}, # 
#     {"file": "2_2018-05-13_07-46-27", "true_herring_counts": 1}, # 
#     {"file": "2_2018-05-11_19-06-07", "true_herring_counts": 2}, # 
#     {"file": "2_2018-05-10_17-43-39", "true_herring_counts": 1}, # 
#     {"file": "2_2018-05-10_17-50-04", "true_herring_counts": 2}, # 
#     {"file": "2_2018-05-05_13-15-07", "true_herring_counts": 1}, # 
#     {"file": "2_2018-05-05_13-00-52", "true_herring_counts": 2},#  
#     {"file": "2_2018-05-05_13-36-08", "true_herring_counts": 2}, # 
#     {"file": "2_2018-05-05_12-04-58", "true_herring_counts": 3},# 
# ]

# pred_net_counts = []
# true_net_counts = []
# total_duration_seconds = []
# videos_missed = []
# for idx in tqdm(range(len(data))):
#     video_path = [path for path in mp4_files if data[idx]["file"] in path][0]
#     print(video_path)
#     frame_rate, annotated_frames, out_count, in_count,duration_seconds,_ = main(
#         video_path=video_path,device='mps',stream=False,show=False      
#     )
#     pred_net_counts.append(out_count-in_count)
#     if (out_count-in_count) != data[idx]["true_herring_counts"]:
#         videos_missed.append({'file':data[idx],'pred':out_count-in_count,'true':data[idx]["true_herring_counts"]})
#     true_net_counts.append(data[idx]["true_herring_counts"])
#     total_duration_seconds.append(duration_seconds)



In [None]:
# import pandas as pd
# import numpy as np
# from scipy import stats
# def calculate_confidence_interval(count_data, confidence_level=0.95):
#     # Calculate the mean and standard deviation of the count data
#     mean = np.mean(count_data)
#     std = np.std(count_data)

#     # Calculate the confidence interval using the t-distribution
#     n = len(count_data)
#     t_value = stats.t.ppf((1 + confidence_level) / 2, df=n-1)
#     margin_of_error = t_value * std / np.sqrt(n)
#     confidence_interval = (mean - margin_of_error, mean + margin_of_error)

#     return confidence_interval

# def MAPE(y_true, y_pred):
#     """
#     Calculate the Mean Absolute Percentage Error (MAPE) between the true values and predicted values.
    
#     Parameters:
#         y_true (array-like): Array or list of true values.
#         y_pred (array-like): Array or list of predicted values.

#     Returns:
#         float: The calculated MAPE value.
#     """
#     y_true = np.array(y_true)
#     y_pred = np.array(y_pred)

#     # Avoid division by zero
#     epsilon = 1e-10

#     total_percentage_error = 0
#     total_samples = len(y_true)

#     for i in range(total_samples):
#         # Calculate the absolute percentage error for each data point
#         if y_true[i] == 0:
#             absolute_percentage_error = np.abs((y_true[i] - y_pred[i]))
#         else:
#             absolute_percentage_error = np.abs((y_true[i] - y_pred[i]) / (y_true[i] + epsilon))

#         # Add it to the total percentage error
#         total_percentage_error += absolute_percentage_error

#     # Calculate the mean of the absolute percentage errors
#     mape = (total_percentage_error / total_samples) * 100.0

#     return mape

# df_results = pd.DataFrame(data={"pred_net_counts":pred_net_counts,"true_net_counts":true_net_counts},)

In [None]:
# print("num videos predicted wrong: ",len(videos_missed))
# print(f"Total video processed: {sum(total_duration_seconds)} (sec), {sum(total_duration_seconds)/60:.1f} (min)")
# print(f"Total number of videos process: {len(data)}")

# diffs = [np.abs(x - y) for x, y in zip(true_net_counts, pred_net_counts)]
# print(f"Did not count {sum(diffs)} out of {sum(true_net_counts)} herring")
# print(f"MAE (Mean Absolute Error e.g. counts): {mean_absolute_error(y_pred=pred_net_counts,y_true = true_net_counts):.2f}",)
# print(f"MAPE (Mean Absolute Percentage Error of Counts): {MAPE(true_net_counts,pred_net_counts):.1f}%",)

# print(f"Average number of missed counts per second {sum(diffs)/sum(total_duration_seconds):.2f}")



# absolute_error = np.abs(df_results["pred_net_counts"] - df_results["true_net_counts"]).values
# std_absolute_error = np.std(absolute_error)
# ci = calculate_confidence_interval(count_data=diffs)
# print(f"Estimates off between {ci[0]:.1f} and {ci[1]:.1f} fish at an average of {np.mean(diffs):.1f} per video")

# # Bootstrapped

# def bootstrap_mean_ci(data, num_iterations=10000, ci_level=0.95):
#     # Create an array to store bootstrapped means
#     bootstrapped_means = np.empty(num_iterations)

#     # Perform bootstrapping
#     for i in range(num_iterations):
#         bootstrap_sample = np.random.choice(data, size=len(data), replace=True)
#         bootstrapped_means[i] = np.mean(bootstrap_sample)

#     # Calculate confidence interval
#     lower_ci = np.percentile(bootstrapped_means, (1 - ci_level) * 100 / 2)
#     upper_ci = np.percentile(bootstrapped_means, ci_level * 100 + (1 - ci_level) * 100 / 2)

#     return np.mean(bootstrapped_means), lower_ci, upper_ci

# # # Example usage
# # mean, lower_ci, upper_ci = bootstrap_mean_ci(diffs)

# # print(f"Bootstrapped mean: {mean:.2f}")
# # print(f"Bootstrapped CI of how many counts on average the model is off: [{lower_ci:.2f}, {upper_ci:.2f}]")

num videos predicted wrong:  8
Total video processed: 417.93 (sec), 7.0 (min)
Total number of videos process: 32
Did not count 10 out of 52 herring
MAE (Mean Absolute Error e.g. counts): 0.31
MAPE (Mean Absolute Percentage Error of Counts): 19.8%
Average number of missed counts per second 0.02
Estimates off between 0.1 and 0.5 fish at an average of 0.3 per vide

In [None]:
"""

 "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/colab_runs/last3.pt" # best12.pt
num videos predicted wrong:  9
Total video processed: 598.044 (sec), 10.0 (min)
Total number of videos process: 46
Did not count 11 out of 75 herring
MAE (Mean Absolute Error e.g. counts): 0.24
MAPE (Mean Absolute Percentage Error of Counts): 11.6%
Average number of missed counts per second 0.02
Estimates off between 0.1 and 0.4 fish at an average of 0.2 per video


"/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/colab_runs/last2.pt" (medium model)
num videos predicted wrong:  9
Total video processed: 598.044 (sec), 10.0 (min)
Total number of videos process: 46
Did not count 11 out of 75 herring
MAE (Mean Absolute Error e.g. counts): 0.24
MAPE (Mean Absolute Percentage Error of Counts): 13.0%
Average number of missed counts per second 0.02
Estimates off between 0.1 and 0.4 fish at an average of 0.2 per video

"/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/colab_runs/best5.pt"
Total video processed: 370.959 (sec), 6.2 (min)
Total number of videos process: 28
Did not count 8 out of 46 herring
MAE (Mean Absolute Error e.g. counts): 0.29
MAPE (Mean Absolute Percentage Error of Counts): 15.5%
Average number of missed counts per second 0.02
Estimates off between 0.1 and 0.5 fish at an average of 0.3 per video


"/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/colab_runs/best3.pt"

Total video processed: 347.549 (sec), 5.8 (min)
Total number of videos process: 26
Did not count 7 out of 42 herring
MAE (Mean Absolute Error e.g. counts): 0.27
MAPE (Mean Absolute Percentage Error of Counts): 16.7%
Average number of missed counts per second 0.02
Estimates off between 0.0 and 0.5 fish at an average of 0.3 per video


 "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/colab_runs/best2.pt"
 
Total video processed: 311.58 (sec), 5.2 (min)
Total number of videos process: 23
Did not count 6 out of 37 herring
MAE (Mean Absolute Error e.g. counts): 0.26
MAPE (Mean Absolute Percentage Error of Counts): 17.4%
Average number of missed counts per second 0.02
Estimates off between -0.0 and 0.5 fish at an average of 0.3 per video


/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/runs/detect/train79/weights/last.pt

Total video processed: 311.58 (sec), 5.2 (min)
Total number of videos process: 23
Did not count 10 out of 37 herring
MAE (Mean Absolute Error e.g. counts): 0.43
MAPE (Mean Absolute Percentage Error of Counts): 21.0%
Average number of missed counts per second 0.03
Estimates off between 0.2 and 0.7 fish at an average of 0.4 per video


# /Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/detect/train196/weights/last.pt

Total video processed: 284.347 (sec), 4.7 (min)
Total number of videos process: 21
Did not count 14 out of 34 herring
MAE (Mean Absolute Error e.g. counts): 0.67
MAPE (Mean Absolute Percentage Error of Counts): 38.1%
Average number of missed counts per second 0.05
Estimates off between 0.3 and 1.0 fish at an average of 0.7 per video
"""

## Estimate True Counts using Bayes

In [None]:
import scipy 
scipy.__version__

In [None]:
import pymc3 as pm
import numpy as np

# Simulated data (detections in each video)
observed_data =  [s["true_herring_counts"] for s in data]#np.array([4, 1, 1, 3, 0, 1, 1, 1, 1, 1, 3, 1, 1, 2, 2, 2, 3, 1, 1, 2, 2, 2, 1, 3, 1])

# Number of videos
n_videos = len(observed_data)

# Prior belief (Poisson with mean 1)
prior_lambda = 1

# Model definition
with pm.Model() as model:
    # Prior distribution
    theta = pm.Poisson('theta', mu=prior_lambda)
    
    # Likelihood (Binomial)
    likelihood = pm.Binomial('likelihood', n=n_videos, p=theta/n_videos, observed=observed_data)
    
    # Sample from the posterior using MCMC
    trace = pm.sample(10000, tune=1000, cores=2)  # Adjust tune and cores as needed

# Plot posterior distribution
pm.plot_posterior(trace, var_names=['theta'])

# Print posterior summary
print(pm.summary(trace, var_names=['theta']))


In [None]:
from scipy.special import comb

# Given values
p = 0.88
observed_data#np.array([4.0, 1, 1, 3, 0, 1, 1, 1, 1, 1, 3, 1, 1, 2, 2, 2, 3, 1, 1, 2, 2, 2, 1, 3, 1])
n = len(observed_data)

# Example value of theta
theta = 1.9

# Calculate the likelihood
likelihood = comb(n, observed_data) * (p ** observed_data) * ((1 - p) ** (n - observed_data))
likelihood_for_theta = likelihood.prod()

#print("Likelihood for theta =", theta, ":", likelihood_for_theta)
likelihood

In [None]:
p ** observed_data

Above is calculation for Z

Posterior(θ∣data)= 
1/Z
​
 ×Prior(θ)×Likelihood(data∣θ)


The likelihood function represents the probability of observing the given data (observed fish detection counts) given a specific value of the parameter 
�
θ. In your case, the likelihood is modeled as a Binomial distribution because you're dealing with counts of successes (fish detections) in a fixed number of trials (videos). The likelihood function is given by:

Likelihood
(
data
 
∣
 
�
)
=
Binomial
(
data
 
∣
 
�
,
�
)
Likelihood(data∣θ)=Binomial(data∣n,p)

Where:

data
data is the observed fish detection counts.
�
n is the number of trials (number of videos in this case).
�
p is the probability of success (probability of detecting a fish in a video), which is 
�
�
n
θ
​
 .
Given your observed data and a specific value of 
�
θ, you can plug in the values into the Binomial distribution formula to calculate the likelihood of observing the data:

Likelihood
(
data
 
∣
 
�
)
=
(
�
data
)
⋅
�
data
⋅
(
1
−
�
)
�
−
data
Likelihood(data∣θ)=( 
data
n
​
 )⋅p 
data
 ⋅(1−p) 
n−data
 

In your example, you can calculate the likelihood for each observed fish detection count using the formula above and the corresponding value of 
�
θ.

### Run Benchmark

In [None]:
from ultralytics.utils.benchmarks import benchmark
benchmark(model='/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/code/notebooks/runs/detect/train184/weights/best.pt', data='river_herring.yaml', imgsz=(320,240), half=False, device='mps')

In [38]:
import re
from datetime import timedelta

def extract_datetime_from_filename(filename):
  """
  Extracts date and time from a filename with specific format.

  Args:
      filename (str): The filename to extract datetime from.

  Returns:
      datetime.datetime: The extracted datetime object or None if not found.

  Raises:
      ValueError: If the filename format doesn't match the expected pattern.
  """
  pattern = r"(\d+)_(\d{4}-\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})\.(.+)?"
  match = re.search(pattern, filename)

  if match:
    # Extract captured groups
    _, date_str, time_str, _ = match.groups()
    # Combine date and time strings
    datetime_str = f"{date_str} {time_str}"
    # Try converting to datetime object
    try:
      from datetime import datetime
      return datetime.strptime(datetime_str, "%Y-%m-%d %H-%M-%S")
    except ValueError:
      raise ValueError("Invalid date or time format in filename")
  else:
    raise ValueError("Filename format doesn't match expected pattern")


try:
  extracted_datetime = extract_datetime_from_filename(filename)
  print(f"Extracted datetime: {extracted_datetime}")
except ValueError as e:
  print(f"Error: {e}")

def create_timestamps(relative_frame_times, reference_datetime):
  """
  Creates a list of timestamps from relative frame times and a reference datetime.

  Args:
      relative_frame_times (list): A list of frame times in seconds (floats).
      reference_datetime (datetime.datetime): The reference datetime for the video.

  Returns:
      list: A list of datetime objects corresponding to each frame time.
  """
  timestamps = []
  for frame_time in relative_frame_times:
    # Convert frame time to timedelta
    time_delta = timedelta(seconds=frame_time)
    # Add timedelta to reference datetime to get timestamp
    timestamp = reference_datetime + time_delta
    formatted_timestamp = timestamp.strftime(timestamp)
    timestamps.append(formatted_timestamp)
  return timestamps


#invalide_filename = "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/gold_dataset/videos/invalid_name.mp4"
valid_filename = "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/gold_dataset/videos/2_2018-04-14_10-06-19.mp4"
# Example usage
filename = "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/gold_dataset/videos/2_2018-04-14_10-06-19.mp4"




import pandas as pd
df = pd.read_csv("/Users/aus10powell/Downloads/2_2018-04-14_10-06-19_annotated_detections.csv")


# for fname  in [invalide_filename,valid_filename]:
#     f = extract_datetime_from_filename(filename=fname)
#     print(f)

#create_timestamps(df["Relative Time"],reference_datetime=f)
# f + timedelta(seconds=17)
df["Relative Time"]

Extracted datetime: 2018-04-14 10:06:19


0       0.0
1       0.1
2       0.2
3       0.3
4       0.4
       ... 
132    13.2
133    13.3
134    13.4
135    13.5
136    13.6
Name: Relative Time, Length: 137, dtype: float64

In [35]:
import re
from datetime import timedelta
import pandas as pd



# Example usage
valid_filename = "/Users/aus10powell/Documents/Projects/MIT-Fishery-Counter/data/gold_dataset/videos/2_2018-04-14_10-06-19.mp4"

# Extract reference datetime (assuming this logic exists elsewhere)
reference_datetime = extract_datetime_from_filename(valid_filename)

# Read your CSV file
df = pd.read_csv("/Users/aus10powell/Downloads/2_2018-04-14_10-06-19_annotated_detections.csv")

# Create formatted timestamps based on reference datetime and relative times
formatted_timestamps = create_timestamps(df["Relative Time"], reference_datetime)

# Now you have a list of formatted timestamps (formatted_timestamps)
# You can add this as a new column to your DataFrame or use it for further analysis
formatted_timestamps

['2018-04-14 10:06:19.000000',
 '2018-04-14 10:06:19.100000',
 '2018-04-14 10:06:19.200000',
 '2018-04-14 10:06:19.300000',
 '2018-04-14 10:06:19.400000',
 '2018-04-14 10:06:19.500000',
 '2018-04-14 10:06:19.600000',
 '2018-04-14 10:06:19.700000',
 '2018-04-14 10:06:19.800000',
 '2018-04-14 10:06:19.900000',
 '2018-04-14 10:06:20.000000',
 '2018-04-14 10:06:20.100000',
 '2018-04-14 10:06:20.200000',
 '2018-04-14 10:06:20.300000',
 '2018-04-14 10:06:20.400000',
 '2018-04-14 10:06:20.500000',
 '2018-04-14 10:06:20.600000',
 '2018-04-14 10:06:20.700000',
 '2018-04-14 10:06:20.800000',
 '2018-04-14 10:06:20.900000',
 '2018-04-14 10:06:21.000000',
 '2018-04-14 10:06:21.100000',
 '2018-04-14 10:06:21.200000',
 '2018-04-14 10:06:21.300000',
 '2018-04-14 10:06:21.400000',
 '2018-04-14 10:06:21.500000',
 '2018-04-14 10:06:21.600000',
 '2018-04-14 10:06:21.700000',
 '2018-04-14 10:06:21.800000',
 '2018-04-14 10:06:21.900000',
 '2018-04-14 10:06:22.000000',
 '2018-04-14 10:06:22.100000',
 '2018-0

In [5]:
def get_processesor_type():
    """
    Get the type of processor available on the system.
    """
    import torch
    if torch.cuda.is_available():
        return "gpu"
    else:
        return "cpu"

if get_processesor_type()

device(type='cpu')