## For each bee id, be able to provide the number of detections in the hive in the given timeslot.

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
import numpy as np
import pandas as pd
import psycopg2
import psycopg2.extras
from datetime import timedelta, datetime
import time

import bb_utils
import bb_utils.meta
import bb_utils.ids
import bb_backend
from bb_backend.api import FramePlotter, VideoPlotter
from bb_backend.api import get_plot_coordinates, transform_axis_coordinates, get_image_origin

bb_backend.api.server_adress = 'localhost:8000'
connect_str = """dbname='beesbook' user='reader' host='tonic.imp.fu-berlin.de' 
                 password='' application_name='mehmed'"""

In [2]:
num_hours = 12
num_intervals_per_hour = 120
datetime_start = datetime(2016, 8, 23, 12)

#(First detections are on 20.07.2016, last are 19.09.2016 (3 months duration))

In [3]:
#Define bee IDs by getting a known forager group
meta = bb_utils.meta.BeeMetaInfo()

group_id = 20
bee_ids_from_group = map(lambda i: i.as_ferwar(), 
                list(map(bb_utils.ids.BeesbookID.from_dec_12, meta.get_foragergroup(group_id).dec12)))

bee_ids_from_group = list(bee_ids_from_group)
group = meta.get_foragergroup(group_id)
print(group.date)

2016-08-23 00:00:00


### Reading from saved CSV files

In [4]:
#for testing: a single csv with hardcoded name
#sample_df = pd.read_csv('2016-08-23_00:00:00.csv', parse_dates=['timestamp'])

#Loading first element before the loop, 
start_csv_name = (datetime_start).strftime("%Y-%m-%d_%H:%M:%S")+".csv"
print('Processing '+start_csv_name+' before the loop')
detections_df = pd.read_csv(start_csv_name, parse_dates=['timestamp'])


#TODO: Don't rerun CSV table with every interval length change
#read and concat a number of hour-long csvs (note: thekla memory crashes if >16)
for i in range(1, num_hours):
    csv_name = (datetime_start + timedelta(hours=i)).strftime("%Y-%m-%d_%H:%M:%S")+".csv"
    print('Processing '+csv_name)
    new_data = pd.read_csv(csv_name, parse_dates=['timestamp'])
    detections_df = pd.concat([detections_df, new_data])
    print('Num. rows after appending: '+str(detections_df.shape[0]))

Processing 2016-08-23_12:00:00.csv before the loop
Processing 2016-08-23_13:00:00.csv
Num. rows after appending: 6474164
Processing 2016-08-23_14:00:00.csv
Num. rows after appending: 9981923
Processing 2016-08-23_15:00:00.csv
Num. rows after appending: 13537412
Processing 2016-08-23_16:00:00.csv
Num. rows after appending: 17134071
Processing 2016-08-23_17:00:00.csv
Num. rows after appending: 20759708
Processing 2016-08-23_18:00:00.csv
Num. rows after appending: 24165008
Processing 2016-08-23_19:00:00.csv
Num. rows after appending: 27375729
Processing 2016-08-23_20:00:00.csv
Num. rows after appending: 30671785
Processing 2016-08-23_21:00:00.csv
Num. rows after appending: 34009006
Processing 2016-08-23_22:00:00.csv
Num. rows after appending: 37211008
Processing 2016-08-23_23:00:00.csv
Num. rows after appending: 40508773


In [5]:
print(detections_df.shape)

(40508773, 14)


In [6]:
#interval length is the total observation period divided by total number of intervals
total_num_intervals = (num_intervals_per_hour*num_hours)
interval_length = timedelta(hours=num_hours) // (num_intervals_per_hour*num_hours)

# prepare dataframe with zeros in the shape [bees x total_num_intervals]
# append bee_ids from the left
intervals = pd.DataFrame(data=np.zeros([len(bee_ids_from_group),total_num_intervals])) 
bee_ids = pd.DataFrame(data={'id': bee_ids_from_group})
presence_df = pd.concat([bee_ids, intervals], axis=1)

In [7]:
#Iterate over intervals and over detections
#If a bee is detected within a given interval, mark the cell for that bee and interval with a 1

interval_starttime = datetime_start
for interval in range(total_num_intervals): 
    #choose detections for interval
    interval_endtime = interval_starttime + interval_length
    before = detections_df['timestamp'] >= interval_starttime 
    after = detections_df['timestamp'] < interval_endtime
    interval_detections = detections_df[before & after]
    bee_row_number = 0
    for b in presence_df['id']:
        if b in interval_detections['bee_id'].unique():
            presence_df.set_value(bee_row_number, interval, 1)
        bee_row_number += 1 
    interval_starttime = interval_endtime

  


In [15]:
#Apply rolling median to filter noise
#use diff to identify entries (with 1) and exits (with -1)
#(sum_of_abs / 2) gives us the presumed number of trips a bee takes 

rolled = presence_df.iloc[:, 1:].rolling(window=9,center=True,axis=1).median()
diffed = rolled.diff(axis=1)
summed = diffed.abs().sum(axis=1) /2


pres_copy = presence_df
pres_copy.iloc[:, 1:] = rolled 
sumtab = pd.concat([pres_copy['id'],summed],axis=1)
print(datetime_start)
sumtab

# sumtab

2016-08-23 12:00:00


Unnamed: 0,id,0
0,1799,25.5
1,1593,15.0
2,2106,4.0
3,1662,11.5
4,1180,23.5
5,2984,27.0
6,1197,12.5
7,1714,22.0
8,1471,16.5
9,1232,19.0
