In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy import stats
import warnings
import sys
import seaborn as sns
sys.path.append('../')
pd.options.mode.chained_assignment = None 



In [2]:
#combine all CSV's of the same data
files = open("bees/files.txt")
filenames = []
while True:
    line = files.readline().strip()
    if not line:
        break
    filenames.append("bees/" + line)

vdf = pd.concat(map(pd.read_csv, filenames), ignore_index=True)

In [3]:
vdf = vdf.drop_duplicates(subset=['track_starttime','track_tagid'], keep='last')

In [4]:
#SECONDS THRESHOLD
#used to classify multiple detections into part of a single event
#also used to tell when two detections are part of different events
#by checking the time distance between them
t = 15

#DISTANCE THRESHOLD
#used to classify an event as entering or exiting
#when two consecutive detections of the same event 
#have this distance in y position, they are utilized to predict
#the trajectory. If not then it checks the detection prior for
#the distance threshold, and continues doing so until it finds
#the last detection in the event or until it finds a distance
#of more than the threshold

t2 = 300

#ANGLE THRESHOLD
#used to generate angle ranges for classifying as exiting
#or entering

angle = 10


In [5]:
vdf['track_endtime'] = vdf['track_endtime'].apply(lambda x: pd.to_datetime(x))
vdf['track_starttime'] = vdf['track_starttime'].apply(lambda x: pd.to_datetime(x))
vdf['track_tagid'] = vdf['track_tagid'].apply(lambda x: str(x))
vdf = vdf.sort_values(by=['track_tagid','track_starttime']).reset_index()
vdf['next_t'] = vdf['track_starttime'].shift(periods=-1)
vdf['timedelta'] = (vdf['next_t'] - vdf['track_endtime']).apply(lambda x: x.total_seconds())
vdf['separate_event'] = (vdf['timedelta'] >= t) | (vdf['timedelta'] < -50)
break_indexes = vdf.separate_event[vdf.separate_event == True].index.tolist()

### GRAPH GENERATION

In [None]:
index = 1
first_detection = 0
for i in range(len(break_indexes)):
    detections = vdf.iloc[first_detection:break_indexes[i]+1]
    
    coordinates = detections[['track_endy','track_starty']]
    coordinates.reset_index(drop=True, inplace=True)
    
    first_detection = break_indexes[i]+1

    positions = []
    for j in range(len(coordinates)):
        positions.append(coordinates['track_starty'].iloc[j])
        positions.append(coordinates['track_endy'].iloc[j])
    x = np.arange(len(positions))
    plot = sns.lineplot(x=x,y=positions)
    plot.set_ylim(0, 1400)
    plt = plot.get_figure()
    plt.savefig(f"graphs/{index}.png")
    plt.clf()
    index += 1

## Look at events prior and classify based on y displacement

In [7]:
def beeCleanPrior(bee):

    new_event = []
    datetime = []
    ids = []
    first_detection = 0
    for i in range(len(break_indexes)):

        ids.append(vdf['track_tagid'].iloc[break_indexes[i]])
        datetime.append(vdf['track_endtime'].iloc[break_indexes[i]])
        
        detections = vdf.iloc[first_detection:break_indexes[i]+1]
    
        coordinates = detections[['track_endy','track_starty']]
        coordinates.reset_index(drop=True, inplace=True)
        
        final = coordinates['track_endy'].iloc[-1]
        
        for k in range(len(coordinates)):
            prev = coordinates['track_starty'].iloc[len(coordinates)-k-1]
            dif = final - prev
            if abs(dif) >= t2:
                if dif < 0:
                    new_event.append('exiting')
                elif dif > 0:
                    new_event.append('entering')
                else:
                    new_event.append('unknown')
                break
            elif k == len(coordinates) - 1:
                if dif < 0:
                    new_event.append('exiting')
                elif dif > 0:
                    new_event.append('entering')
                else:
                    new_event.append('unknown')
        first_detection = break_indexes[i]+1
        

    datadict ={'tagID':ids,'datetime':datetime,'event':new_event}
    return pd.DataFrame.from_dict(datadict)
            
        
 

In [8]:
prior = beeCleanPrior(vdf)
prior.to_csv("bee_prior.csv",index=False)

## Classify events based on summed vector angle of all detections corresponding to an event

Obtained from BeeCam-AprilTag
https://github.com/AERS-Lab/BeeCam-AprilTag

In [9]:
def beeCleanAngle(bee):

    ids = []
    new_event = []
    datetime = []
    first_detection = 0
    
    exit_min = 180 + angle
    exit_max = 360 - angle
    enter_min = angle
    enter_max = 180 - angle
        
    for i in range(len(break_indexes)):

        ids.append(vdf['track_tagid'].iloc[break_indexes[i]])
        datetime.append(vdf['track_endtime'].iloc[break_indexes[i]])
        
        detections = vdf.iloc[first_detection:break_indexes[i]+1]
    
        coordinates = detections['angle'].to_numpy()

        unit_dx = np.cos(np.deg2rad(coordinates))
        unit_dy = np.sin(np.deg2rad(coordinates))
        avg_x = np.average(unit_dx)
        avg_y = np.average(unit_dy)
        if avg_x == 0 and avg_y == 0:
                    deg = 0
        elif avg_x == 0 and avg_y != 0:
            if avg_y > 0:
                deg = 270
            elif avg_y < 0:
                deg = 90
        else:
            # determine direction angle using arctan
            deg = np.rad2deg(np.arctan(avg_y/avg_x))
                    
            # since arctan limits are (-90,90), use coordinate directions to 
            # correct the angle to be within standard [0,360) range
            if avg_x > 0 and avg_y >= 0:
                deg = deg
            elif avg_x < 0 and avg_y >= 0:
                deg = 180 + deg
            elif avg_x < 0 and avg_y < 0:
                deg = deg + 180
            elif avg_x > 0 and avg_y < 0:
                deg = 360 + deg

        if deg >= exit_min and deg <= exit_max:
            new_event.append('exiting')
        elif deg >= enter_min and deg <= enter_max:
            new_event.append('entering')
        else:
            new_event.append('unknown')
        first_detection = break_indexes[i]+1
                    
    datadict ={'tagID':ids,'datetime':datetime,'event':new_event}
    return pd.DataFrame.from_dict(datadict)

In [10]:
summed = beeCleanAngle(vdf)
summed.to_csv("bee_angle.csv",index=False)

## Classify events based on last angle corresponding to an event

Modified from BeeCam-AprilTag
https://github.com/AERS-Lab/BeeCam-AprilTag

In [20]:
def beeCleanSingleAngle(bee):

    ids = []
    new_event = []
    datetime = []
    first_detection = 0
    
    exit_min = 180 + angle
    exit_max = 360 - angle
    enter_min = angle
    enter_max = 180 - angle
        
    for i in range(len(break_indexes)):

        ids.append(vdf['track_tagid'].iloc[break_indexes[i]])
        datetime.append(vdf['track_endtime'].iloc[break_indexes[i]])
        
        detections = vdf.iloc[first_detection:break_indexes[i]+1]
    
        coordinates = detections['angle'].iloc[-1]
        
        angle_rad = np.deg2rad(coordinates)
        avg_x = np.cos(angle_rad)
        avg_y = np.sin(angle_rad)
        if avg_x == 0 and avg_y == 0:
                deg = 0
        elif avg_x == 0 and avg_y != 0:
            if avg_y > 0:
                deg = 270
            elif avg_y < 0:
                deg = 90
        else:
            # determine direction angle using arctan
            deg = np.rad2deg(np.arctan(avg_y/avg_x))
                    
            # since arctan limits are (-90,90), use coordinate directions to 
            # correct the angle to be within standard [0,360) range
            if avg_x > 0 and avg_y >= 0:
                deg = deg
            elif avg_x < 0 and avg_y >= 0:
                deg = 180 + deg
            elif avg_x < 0 and avg_y < 0:
                deg = deg + 180
            elif avg_x > 0 and avg_y < 0:
                deg = 360 + deg

        if deg >= exit_min and deg <= exit_max:
            new_event.append('exiting')
        elif deg >= enter_min and deg <= enter_max:
            new_event.append('entering')
        else:
            new_event.append('unknown')
        first_detection = break_indexes[i]+1
                    
    datadict ={'tagID':ids,'datetime':datetime,'event':new_event}
    return pd.DataFrame.from_dict(datadict)

In [21]:
single = beeCleanSingleAngle(vdf)
single.to_csv("bee_singleangle.csv",index=False)

In [22]:
correct = {'prior':0,'summedvector':0,'singleangle':0}
count = 0
exiting = ['inside_outside']
entering = ['outside_inside']
for index, row in prior.iterrows():
    datetime = row['datetime']
    tagID = row['tagID']
    track_shape = vdf[(vdf['track_tagid'] == tagID) & (vdf['track_endtime'] == datetime)]['track_shape'].iloc[0]
    if track_shape in exiting:
        event = 'exiting'
    elif track_shape in entering:
        event = 'entering'
    else:
        event = 'unknown'

    if event != 'unknown':
        if row['event'] == event:
            correct['prior'] += 1
        if summed['event'].iloc[index] == event:
            correct['summedvector'] += 1
        if single['event'].iloc[index] == event:
            correct['singleangle'] += 1
        count += 1

results = {k: v/count for k, v in correct.items()}
results


{'prior': 1.0,
 'summedvector': 0.8530508609061335,
 'singleangle': 0.8986749186701579}

In [14]:
enter = {'prior':0,'summedvector':0}
exit = {'prior':0,'summedvector':0}

entercount = 0
exitcount = 0
exiting = ['inside_outside']
entering = ['outside_inside']
for index, row in prior.iterrows():
    datetime = row['datetime']
    tagID = row['tagID']
    track_shape = vdf[(vdf['track_tagid'] == tagID) & (vdf['track_endtime'] == datetime)]['track_shape'].iloc[0]
    if track_shape in exiting:
        event = 'exiting'
    elif track_shape in entering:
        event = 'entering'
    else:
        event = 'unknown'

    if event == 'entering':
        if row['event'] == event:
            enter['prior'] += 1
        if summed['event'].iloc[index] == event:
            enter['summedvector'] += 1
        entercount += 1
    if event == 'exiting':
        if row['event'] == event:
            exit['prior'] += 1
        if summed['event'].iloc[index] == event:
            exit['summedvector'] += 1
        exitcount += 1



KeyboardInterrupt: 

In [None]:
results = {k: v/entercount for k, v in enter.items()}
results

In [None]:
results = {k: v/exitcount for k, v in exit.items()}
results

In [None]:
len(prior[prior['event'] == summed['event']])/len(prior)

In [None]:
cheatsheet = pd.read_csv("cheatsheet.csv")

In [None]:
correct = {'prior':0,'summedvector':0, 'single':0}
for index, row in cheatsheet.iterrows():
    if prior['event'].iloc[int(row['line'])-1] == row['event']:
        correct['prior'] += 1
    if summed['event'].iloc[int(row['line'])-1] == row['event']:
        correct['summedvector'] += 1
    if single['event'].iloc[int(row['line'])-1] == row['event']:
        correct['single'] += 1

results = {k: v/len(cheatsheet) for k, v in correct.items()}
results