In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy import stats
import warnings
import sys
import seaborn as sns
sys.path.append('../')
pd.options.mode.chained_assignment = None 



In [2]:
files = open("bees/files.txt")
filenames = []
while True:
    line = files.readline().strip()
    if not line:
        break
    filenames.append("bees/" + line)

vdf = pd.concat(map(pd.read_csv, filenames), ignore_index=True)
vdf = vdf.drop_duplicates(subset=['track_starttime','track_tagid'], keep='last')

In [3]:
#SECONDS THRESHOLD
#used to classify multiple detections into part of a single event
#also used to tell when two detections are part of different events
#by checking the time distance between them
t = 15

#DISTANCE THRESHOLD
#used to classify an event as entering or exiting
#when two consecutive detections of the same event 
#have this distance in y position, they are utilized to predict
#the trajectory. If not then it checks the detection prior for
#the distance threshold, and continues doing so until it finds
#the last detection in the event or until it finds a distance
#of more than the threshold

t2 = 150

#ANGLE THRESHOLD
#used to generate angle ranges for classifying as exiting
#or entering

angle = 10

In [4]:
vdf['track_endtime'] = vdf['track_endtime'].apply(lambda x: pd.to_datetime(x))
vdf['track_starttime'] = vdf['track_starttime'].apply(lambda x: pd.to_datetime(x))
vdf['track_tagid'] = vdf['track_tagid'].apply(lambda x: str(x))

vdf = vdf.sort_values(by=['track_tagid','track_starttime']).reset_index()
vdf['next_t'] = vdf['track_starttime'].shift(periods=-1)
vdf['timedelta'] = (vdf['next_t'] - vdf['track_endtime']).apply(lambda x: x.total_seconds())
vdf['separate_event'] = (vdf['timedelta'] >= t) | (vdf['timedelta'] < -50)
break_indexes = vdf.separate_event[vdf.separate_event == True].index.tolist()

## RULES

In [5]:
#RULE 1 - INSIDE-OUTSIDE OR OUTSIDE-INSIDE
def in_out(data):
    if data['track_shape'].iloc[-1] == "inside_outside":
        return "exiting"
    elif data['track_shape'].iloc[-1] == "outside_inside":
        return "entering"
    else:
        return "unknown"
        

#RULE 2 ITERATE BACKWARDS DISPLACEMENT
def displacement(data):            
        coordinates = data[['track_endy','track_starty']]
        
        final = coordinates['track_endy'].iloc[-1]
        
        for k in range(len(coordinates)):
            prev = coordinates['track_starty'].iloc[len(coordinates)-k-1]
            dif = final - prev
            if abs(dif) >= t2:
                if dif < 0:
                    return 'exiting'
                elif dif > 0:
                    return 'entering'
                else:
                    return 'unknown'
                break
            elif k == len(coordinates) - 1:
                if dif < 0:
                    return 'exiting'
                elif dif > 0:
                    return 'entering'
                else:
                    return 'unknown'
            

#RULE 3 DIRECTIONAL ANGLE

def angles(data):
    exit_min = 180 + angle
    exit_max = 360 - angle
    enter_min = angle
    enter_max = 180 - angle

   
    coordinates = data['angle'].to_numpy()

    unit_dx = np.cos(np.deg2rad(coordinates))
    unit_dy = np.sin(np.deg2rad(coordinates))
    avg_x = np.average(unit_dx)
    avg_y = np.average(unit_dy)
    if avg_x == 0 and avg_y == 0:
        deg = 0
    elif avg_x == 0 and avg_y != 0:
        if avg_y > 0:
            deg = 270
        elif avg_y < 0:
            deg = 90
    else:
        # determine direction angle using arctan
        deg = np.rad2deg(np.arctan(avg_y/avg_x))
                    
        # since arctan limits are (-90,90), use coordinate directions to 
        # correct the angle to be within standard [0,360) range
        if avg_x > 0 and avg_y >= 0:
            deg = deg
        elif avg_x < 0 and avg_y >= 0:
            deg = 180 + deg
        elif avg_x < 0 and avg_y < 0:
            deg = deg + 180
        elif avg_x > 0 and avg_y < 0:
            deg = 360 + deg

    if deg >= exit_min and deg <= exit_max:
        return 'exiting'
    elif deg >= enter_min and deg <= enter_max:
        return 'entering'
    else:
        return 'unknown'

#RULE 4 LAST DIRECTIONAL ANGLE

def lastangle(data):
    exit_min = 180 + angle
    exit_max = 360 - angle
    enter_min = angle
    enter_max = 180 - angle

   
    coordinates = data['angle'].iloc[-1]

    avg_x = np.cos(np.deg2rad(coordinates))
    avg_y = np.sin(np.deg2rad(coordinates))
    if avg_x == 0 and avg_y == 0:
        deg = 0
    elif avg_x == 0 and avg_y != 0:
        if avg_y > 0:
            deg = 270
        elif avg_y < 0:
            deg = 90
    else:
        # determine direction angle using arctan
        deg = np.rad2deg(np.arctan(avg_y/avg_x))
                    
        # since arctan limits are (-90,90), use coordinate directions to 
        # correct the angle to be within standard [0,360) range
        if avg_x > 0 and avg_y >= 0:
            deg = deg
        elif avg_x < 0 and avg_y >= 0:
            deg = 180 + deg
        elif avg_x < 0 and avg_y < 0:
            deg = deg + 180
        elif avg_x > 0 and avg_y < 0:
            deg = 360 + deg

    if deg >= exit_min and deg <= exit_max:
        return 'exiting'
    elif deg >= enter_min and deg <= enter_max:
        return 'entering'
    else:
        return 'unknown'


#RULE 5 INIT - FINAL DISPLACEMENT
def displacementinit(data):            
        coordinates = data[['track_endy','track_starty']]
        
        final = coordinates['track_endy'].iloc[-1]
        init = coordinates['track_starty'].iloc[0]

        if init > final:
            return 'exiting'
        elif final > init:
            return 'entering'
        else:
            return 'unknown'             



In [6]:
vdf['event_id'] = np.nan
first_detection = 0
for i in range(len(break_indexes)):
        
        vdf['event_id'].iloc[first_detection:break_indexes[i]+1] = i
        first_detection = break_indexes[i]+1

In [7]:
cheatsheet = pd.read_csv("cheatsheet.csv")

## ACCURACY

In [17]:
functions = {
    'rule1': in_out,
    'rule2': displacement,
    'rule3': angles,
    'rule4': lastangle,
    'rule5': displacementinit,
}

accuracy = {r: 0 for r in functions}

for index, row in cheatsheet.iterrows():
    event = row['event']
    line = row['line'] - 1
    detections = vdf[vdf['event_id'] == line].copy().reset_index()

    for rule, func in functions.items():
        if func(detections) == event:
            accuracy[rule] += 1
        

In [18]:
results = {k: v/len(cheatsheet) for k, v in accuracy.items()}
results

{'rule1': 0.69, 'rule2': 0.89, 'rule3': 0.71, 'rule4': 0.76, 'rule5': 0.8}

## IMPLICATIONS

In [9]:
rule_names = ['rule1', 'rule2', 'rule3', 'rule4', 'rule5']
implications = {rule: {rr: 0 for rr in rule_names} for rule in rule_names}
for index, row in cheatsheet.iterrows():
    event = row['event']
    line = row['line'] - 1
    detections = vdf[vdf['event_id'] == line].copy().reset_index()
    #rules
    rules = {
        'rule1': in_out(detections),
        'rule2': displacement(detections),
        'rule3': angles(detections),
        'rule4': lastangle(detections),
        'rule5': displacementinit(detections),
    }

    for key, value in rules.items():
        if value == event:
            implications[key][key] += 1
            for otherrule, othervalue in rules.items():
                if otherrule != key and othervalue == value:
                    implications[key][otherrule] += 1
   


In [10]:
results = {k: v/implications['rule1']['rule1'] for k, v in implications['rule1'].items()}
results

{'rule1': 1.0,
 'rule2': 1.0,
 'rule3': 0.7536231884057971,
 'rule4': 0.8405797101449275,
 'rule5': 0.927536231884058}

In [11]:
results = {k: v/implications['rule2']['rule2'] for k, v in implications['rule2'].items()}
results

{'rule1': 0.7752808988764045,
 'rule2': 1.0,
 'rule3': 0.7303370786516854,
 'rule4': 0.7865168539325843,
 'rule5': 0.898876404494382}

In [12]:
results = {k: v/implications['rule3']['rule3'] for k, v in implications['rule3'].items()}
results

{'rule1': 0.7323943661971831,
 'rule2': 0.9154929577464789,
 'rule3': 1.0,
 'rule4': 0.9859154929577465,
 'rule5': 0.8450704225352113}

In [13]:
results = {k: v/implications['rule4']['rule4'] for k, v in implications['rule4'].items()}
results

{'rule1': 0.7631578947368421,
 'rule2': 0.9210526315789473,
 'rule3': 0.9210526315789473,
 'rule4': 1.0,
 'rule5': 0.8026315789473685}

In [14]:
results = {k: v/implications['rule5']['rule5'] for k, v in implications['rule5'].items()}
results

{'rule1': 0.8, 'rule2': 1.0, 'rule3': 0.75, 'rule4': 0.7625, 'rule5': 1.0}