In [1]:
import sys
import os
import pandas as pd
import time
notebook_dir = os.getcwd()
sys.path.append(os.path.join(notebook_dir, '../src'))
from BeeMonitor import VideoAnalyzer

In [2]:
# Initialize the VideoAnalyzer with the paths to the models and the video dimensions
nest_model = '../models/nest_detection_model.pt'
tracking_model = '../models/bee_tracking_model.pt'
analyzer = VideoAnalyzer(nest_model, tracking_model, 720, 1280)

In [3]:
#### load and process csv data from processed videos ###

In [4]:

output_folder = "output"
input_folder = "CVPR_Evaluation_Video_Data"

videos = os.listdir(input_folder)
videos = [os.path.join(input_folder, f) for f in videos if f.endswith('.mp4')]
videos

['CVPR_Evaluation_Video_Data/mendels_2024-05-08_15_00_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_10_01.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-05-23_12_40_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_00_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_20_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-05-08_15_30_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_30_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-05-23_12_00_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-05-08_15_50_00.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_40_01.mp4',
 'CVPR_Evaluation_Video_Data/mendels_2024-05-23_18_20_01.mp4']

In [5]:
## Process Videos ###

df = pd.DataFrame()
import traceback

files_not_processed = []
for video_path in videos:
    try:
        print(f"Processing file {video_path}")
        # record start time
        start_time = time.time()

        # get the nest coordinates
        nest_ids = analyzer.getNestDetection(video_path)
        nest = analyzer.processNestDetection(nest_ids)

        # get the motion tracking data
        df_temp = analyzer.getMotionTracking(video_path, nest['hotel'], output_folder, False)
        df_temp.to_csv(os.path.join(output_folder, os.path.basename(video_path).split(".")[0] + "_v10_motion_tracking.csv"), index=False)

        # process the motion tracking data to get the foraging events witht the nest coordinates
        events = analyzer.processMotionTracking(df_temp, nest)
        events['video'] = os.path.basename(video_path)
        processed_events = analyzer.synthesizeCSV(events, os.path.basename(video_path))

        # end the time
        end_time = time.time()
        processing_time = end_time - start_time

        # set the processing time for the video file
        events['processing_time'] = [processing_time for i in range(len(events))]
        
        # append the processed data to the dataframe
        df = pd.concat([df, events])
    except Exception as e:
        print(f"Error processing file {video_path}: {e}")
        traceback.print_exc()
        files_not_processed.append(video_path)
        continue


Processing file CVPR_Evaluation_Video_Data/mendels_2024-05-08_15_00_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_10_01.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-05-23_12_40_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_00_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_20_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-05-08_15_30_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_30_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-05-23_12_00_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-05-08_15_50_00.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-04-30_09_40_01.mp4
Processing file CVPR_Evaluation_Video_Data/mendels_2024-05-23_18_20_01.mp4


In [7]:
measured = pd.read_csv('../Manual_Foraging_Events_Observation.csv')

In [8]:
# load from final csv
predicted = df[['action', 'nest', 'frame_number',"video","timestamp","filename"]].copy()

In [9]:
#predicted = 
predicted.reset_index(drop=True, inplace=True)

In [10]:
predicted['video'] = predicted['video'].apply(lambda x: x.replace('.mp4', ''))

In [11]:
predicted['nest'] = predicted['nest'].apply(lambda x: x.split('_')[1])

In [12]:
predicted['timestamp'] = predicted['timestamp'].astype(str)
predicted['timestamp'] = predicted['timestamp'].apply(lambda x: x.split(' ')[1])

In [13]:
measured = measured[['video','action','nest','timestamp']].dropna()

In [14]:
from datetime import time
def getTimestamp1(txt):
    hr, mn, s = txt.split(':')
    #return timedelta(hours=int(hr), minutes=int(mn), seconds=int(s))
    return time(int(hr), int(mn), int(s))

In [15]:
measured['timestamp'] = measured['timestamp'].apply(lambda x: getTimestamp1(x))
predicted['timestamp'] = predicted['timestamp'].apply(lambda x: getTimestamp1(x))

In [16]:
measured['site'] = measured['video'].apply(lambda x: x.split('_')[0])
measured['hour'] = measured['timestamp'].apply(lambda x: x.hour)

predicted['site'] = predicted['video'].apply(lambda x: x.split('_')[0])
predicted['hour'] = predicted['timestamp'].apply(lambda x: x.hour)

In [17]:
# filter measured based on videos in predicted
videos = predicted.video.unique().tolist()
videos = [v.replace('.mp4', '') for v in videos]
measured_temp = measured[measured['video'].isin(videos)]

In [18]:
len(measured_temp)

300

In [19]:
measured_temp.reset_index(drop=True, inplace=True)
predicted.reset_index(drop=True, inplace=True)

In [20]:
# filter site
# site = 'mendels'
# site = 'natalies'
# measured_temp = measured_temp[measured_temp['site'] == site]
# measured_temp = measured_temp.reset_index(drop=True)
# predicted = predicted[predicted['site'] == site]
# predicted = predicted.reset_index(drop=True)

In [23]:
class Action:
    def __init__(self, action, timestamp, nest, video):
        self.action = action
        self.timestamp = timestamp
        self.nest = int(nest)
        self.video = video

    def getAction(self):
        return self.action
    
    def getTimestamp(self):
        return self.timestamp
    
    def getNest(self):
        return self.nest
    
    def getVideo(self):
        return self.video

def getActions(df):
    actions = []
    for i in range(len(df)):
        action = Action(df['action'][i], df['timestamp'][i], df['nest'][i], df['video'][i])
        actions.append(action)
    return actions

from datetime import datetime

def time_difference(time1, time2):
    # Convert the time strings to datetime objects
    date_today = datetime.today().date()
    datetime1 = datetime.combine(date_today, time1)
    datetime2 = datetime.combine(date_today, time2)

    # Calculate the difference
    time_difference = datetime1 - datetime2

    # Get the difference in seconds
    difference_in_seconds = time_difference.total_seconds()

    return abs(difference_in_seconds)

def isActionInActions(action, actions):
    for act in actions:

        if action.action == act.action and time_difference(action.timestamp, act.timestamp) < 3 and action.video == act.video and action.nest == act.nest:
            return True
        
    return False

In [24]:
measured_actions = getActions(measured_temp)

In [25]:
predicted_actions = getActions(predicted)


In [26]:
def calculateTruePositives(measured_actions, predicted_actions):
    tp = 0
    objs = []
    for action in predicted_actions:
        if isActionInActions(action, measured_actions):
            tp += 1
            objs.append(action)
    return tp, objs

tp, tp_obj = calculateTruePositives(measured_actions, predicted_actions)
print(tp)

277


In [27]:
tp_df = pd.DataFrame([obj.__dict__ for obj in tp_obj])
tp_df.groupby('action').count()

Unnamed: 0_level_0,timestamp,nest,video
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Entry,143,143,143
Exit,134,134,134


In [28]:
def calculateFalsePositives(measured_actions, predicted_actions):
    fp = 0
    fp_obj = []
    for action in predicted_actions:
        if not isActionInActions(action, measured_actions):
            fp += 1
            fp_obj.append(action)
    return fp, fp_obj

fp, fp_obj = calculateFalsePositives(measured_actions, predicted_actions)
print(fp)

79


In [29]:
fp_df = pd.DataFrame([obj.__dict__ for obj in fp_obj])
fp_df.groupby('action').count()

Unnamed: 0_level_0,timestamp,nest,video
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Entry,39,39,39
Exit,40,40,40


In [30]:
def calculateFalseNegatives(measured_actions, predicted_actions):
    fn = 0
    fn_obj = []
    for action in measured_actions:
        if not isActionInActions(action, predicted_actions):
            fn += 1
            fn_obj.append(action)
    return fn, fn_obj

fn, fn_obj = calculateFalseNegatives(measured_actions, predicted_actions)
print(fn)

26


In [31]:
fn_df = pd.DataFrame([obj.__dict__ for obj in fn_obj])
fn_df.groupby('action').count()

Unnamed: 0_level_0,timestamp,nest,video
action,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Entry,7,7,7
Exit,19,19,19


In [37]:
import numpy as np

In [41]:
# overall precision
np.mean(tp_df.groupby('video').size() / predicted.groupby('video').size()).tolist()

0.8067084535638877

In [43]:
# precision per video
tp_df.groupby('video').size() / predicted.groupby('video').size()

video
mendels_2024-04-30_09_00_00    0.714286
mendels_2024-04-30_09_10_01    1.000000
mendels_2024-04-30_09_20_00    0.538462
mendels_2024-04-30_09_30_00    0.894737
mendels_2024-04-30_09_40_01    1.000000
mendels_2024-05-08_15_00_00    0.685714
mendels_2024-05-08_15_30_00    0.896552
mendels_2024-05-08_15_50_00    0.628571
mendels_2024-05-23_12_00_00    0.964286
mendels_2024-05-23_12_40_00    0.863014
mendels_2024-05-23_18_20_01    0.688172
dtype: float64

In [44]:
# recall per video
tp_df.groupby('video').size() / measured_temp.groupby('video').size()

video
mendels_2024-04-30_09_00_00    1.000000
mendels_2024-04-30_09_10_01    0.750000
mendels_2024-04-30_09_20_00    0.777778
mendels_2024-04-30_09_30_00    0.944444
mendels_2024-04-30_09_40_01    1.000000
mendels_2024-05-08_15_00_00    1.043478
mendels_2024-05-08_15_30_00    1.000000
mendels_2024-05-08_15_50_00    0.956522
mendels_2024-05-23_12_00_00    1.000000
mendels_2024-05-23_12_40_00    0.954545
mendels_2024-05-23_18_20_01    0.831169
dtype: float64

In [42]:
# overall recall
np.mean(tp_df.groupby('video').size() / measured_temp.groupby('video').size()).tolist()

0.9325396825396826