## For each bee id, be able to provide the number of detections in the hive in the given timeslot.

In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import matplotlib
import math
import seaborn as sns
import numpy as np
import pandas as pd
import psycopg2
import psycopg2.extras
from datetime import timedelta, datetime
from bee_helpers import detections_to_presence
import time

import bb_utils
import bb_utils.meta
import bb_utils.ids
import bb_backend
from bb_backend.api import FramePlotter, VideoPlotter
from bb_backend.api import get_plot_coordinates, transform_axis_coordinates, get_image_origin

bb_backend.api.server_adress = 'localhost:8000'
connect_str = """dbname='beesbook' user='reader' host='tonic.imp.fu-berlin.de' 
                 password='' application_name='mehmed'"""

meta = bb_utils.meta.BeeMetaInfo()

  """)


### Define constant parameters 

In [2]:
#Parameters for loading data, currently using known date of 23th, august 2016)
num_hours = 1
datetime_start = datetime(2016, 8, 23)


#Parameters for presenting data
bin_size_in_hours = 24

#Hyperparameters for the data wrangling process
num_intervals_per_hour = 60
rolling_window_size = 5

print("Starting from", datetime_start, "with number of hours:", num_hours)
print("\nBin size for the trip lengths plot:", bin_size_in_hours, "\nNumber of intervals per hour:", num_intervals_per_hour, "\nRolling win size:", rolling_window_size)

#(NOTE: First detections are on 20.07.2016, last are 19.09.2016 (3 months duration))

Starting from 2016-08-23 00:00:00 with number of hours: 1

Bin size for the trip lengths plot: 24 
Number of intervals per hour: 60 
Rolling win size: 5


### Get a group of bees to work on and calculate their ages

In [3]:
#Getting a known forager group from manual labeling experiments
meta = bb_utils.meta.BeeMetaInfo()

group_id = 20

bee_ids_as_beesbookid_format = list(map(bb_utils.ids.BeesbookID.from_dec_12, meta.get_foragergroup(group_id).dec12))
bee_ids_from_group = map(lambda i: i.as_ferwar(), bee_ids_as_beesbookid_format) #as ferwar

bee_ids_from_group = list(bee_ids_from_group)

In [4]:
#Calculate the ages for each bee of the forager group
bee_days_since_birth = [] 

for id in bee_ids_as_beesbookid_format:
    bee_days_since_birth.append((datetime_start - meta.get_hatchdate(id)).days)

### Process and save detections to presence form

In [6]:
csv_name = detections_to_presence(num_hours, datetime_start, num_intervals_per_hour, bee_ids_from_group)
#NOTE: consider removing from this file altogether
#TODO: tqdm

Processing /mnt/storage/janek/2016-08-23_00:00:00.csv before the loop


  presence_df.set_value(bee_row_number, interval, 1)


0 , SAVED /mnt/storage/janek/PRESENCE-2016-08-23_00_num_hours_1_int_size_60.csv


In [7]:
#Loading the intermediate result (saved from prevoius cell)
print('Loading '+csv_name)
presence_df = pd.read_csv(location_prefix+csv_name)
print('Num. rows after appending: '+str(presence_df.shape))
#Note that this adds an unwanted column in pres_df that causes trouble later - remove it here
#presence_df

TypeError: Can't convert 'NoneType' object to str implicitly

### Computing presence table

In [None]:
#Get lengths of bee trip intervals 
presence_temp2 = presence_df.iloc[:, 1:]
presence_temp2

In [None]:
# presence_temp2=presence_temp2.iloc[:, :-1]
presence_temp2

In [None]:
# presence_temp2.get_value(1799, 10)
# pd.concat([presence_df[]
# presence_temp2.iat[0, 10]

presence_temp2.iloc[:,1]

In [None]:
trip_lengths = []

for bee in range(0, presence_temp2.shape[0]):
    curr_trip_length = 0
    curr_bee_trip_lenghts = []
    #fill with trip lengths
    for interval in range(total_num_intervals): #t: 2880
        #get the 0/1 value from presence_df at the given (bee, interval)
        bool_is_present = presence_temp2.iat[bee, interval]
        if bool_is_present == 0.0: #bee not present in this interval
            if curr_trip_length != 0: #if we had a value for a trip length -> means trip ends here -> add it to trips and reset the counter
                curr_bee_trip_lenghts.append(curr_trip_length)
                curr_trip_length = 0
        if bool_is_present == 1.0: #bee present in this interval, means trip is underway -> increment the length counter 
            curr_trip_length += 1
    trip_lengths.append(curr_bee_trip_lenghts)


In [None]:
pd.DataFrame(trip_lengths)

In [None]:
# np.save('trip_lenghts_23_08.npy', trip_lengths)    # .npy extension is added if not given
# test = np.load('trip_lenghts_23_08.npy')

trip_lengths


flat_list = [item for sublist in trip_lengths for item in sublist]
flat_series = pd.Series(flat_list)


plt.figure(figsize=(30,10))


flat_series_filtered = flat_series[flat_series<20]
# flat_series.hist(bins=680)
plt.title('Histogram of trip lengths, num_intvs = '+str(num_intervals_per_hour)+', unrolled')
flat_series

In [None]:
#Preparing for rolling median
num_nans_to_clean = math.floor(rolling_window_size/2)

#apply copies of the first and last column as offset for the rolling window
first_col = presence_df.iloc[:, 1:2]
last_col = presence_df.iloc[:, -1:]

presence_temp = presence_df.iloc[:, 1:]

presence = presence_df.iloc[:, 1:]
for i in range(0,num_nans_to_clean):
    presence_temp = pd.concat([first_col, presence_temp, last_col] ,axis=1)
    
presence_df

In [None]:
# Applying rolling median window, to filter out noise in the dataframe
rolled = presence_temp.rolling(window=rolling_window_size,center=True,axis=1).median()

#clean up to get rid of the NaNs
rolled = rolled.iloc[:, num_nans_to_clean:-num_nans_to_clean]

In [None]:
#trip lengths again
rolled_trip_lenghs = []

for bee in range(0, rolled.shape[0]):
    curr_trip_length = 0
    curr_bee_trip_lenghts = []
    #fill with trip lengths
    for interval in range(total_num_intervals):
        
        #get the 0/1 value from presence_df at the given (bee, interval)
        bool_is_present = rolled.iat[bee, interval]

        if bool_is_present == 0.0: #bee not present in this interval
            if curr_trip_length != 0: #if we had a value for a trip length, add it to trips and reset the counter
                curr_bee_trip_lenghts.append(curr_trip_length)
                curr_trip_length = 0
        if bool_is_present == 1.0: #bee present in this interval, make the trip longer
            curr_trip_length += 1
    rolled_trip_lenghs.append(curr_bee_trip_lenghts)

 
rolled.head()

In [None]:
flat_list = [item for sublist in rolled_trip_lenghs for item in sublist]
flat_series = pd.Series(flat_list)

plt.figure(figsize=(30,10))


flat_series_filtered = flat_series[flat_series<20]
flat_series.hist(bins=340)
plt.title('Histogram of trip lengths, num_intvs = '+str(num_intervals_per_hour)+', roll_winsize = '+str(rolling_window_size)+'')

In [None]:
#use diff to identify entries (with 1) and exits (with -1)
#(sum_of_abs / 2) gives us the presumed number of trips a bee takes 
diffed = rolled.diff(axis=1)
diffed.iloc[:,0] = np.zeros([11,1]) #clean out a column of NaNs 
diffed.head()

#a copy to be used to preview rolled + diffed presence 
#not actually used in further calculations (they are just based on 'diffed')
presence_df_copy = presence_df
presence_df_copy.iloc[:, 1:] = diffed 

trips_df = presence_df['id'] #for combining results in one table

In [None]:
presence_df_copy2 = presence_df_copy
presence_df_copy2.iloc[:,0] = bee_days_since_birth
presence_df_copy2.head()

In [None]:
#for loop config
bin_starttime = datetime_start
num_intervals_per_bin = num_intervals_per_hour*bin_size_in_hours
total_num_bins = int(num_hours / bin_size_in_hours)

print("num_intervals_per_bin: ", num_intervals_per_bin, "total_num_bins: ", total_num_bins)
for bin_nr in range(total_num_bins): 
    
    start_index = bin_nr*num_intervals_per_bin
    end_index = start_index + num_intervals_per_bin
    
    new_bin = diffed.iloc[:, start_index:end_index]
    
    #limit down to the right bin:
    #read num_intervals_per_hour*bin_size_per_hour columns (as each column represents one interval)
    
    summed = new_bin.abs().sum(axis=1) / 2
    summed.name = bin_nr
    trips_df = pd.concat([trips_df,summed],axis=1) #add this interval to the trips table
    #update loop index
    

# TODO:use a new variable instead of reusing it
# Change values to amount per hour instead of per interval
trips_df = trips_df * 3600 / num_intervals_per_hour

In [None]:
trips_ser = pd.Series(np.array(trips_df.mean(axis=0))[1:])
trips_ser.plot(kind='bar')
# trips_df

In [None]:
#saving (name still incomplete)
date_string = (datetime_start).strftime("%Y-%m-%d_%H:%M:%S")+".csv"
trips_df.to_csv('/mnt/storage/janek/'+'TRIPS-'+date_string+'-'+'h'+'.csv')

In [None]:
#Create dataframe with age and amount of trips
summed_age=pd.concat([pd.Series(bee_days_since_birth),summed],axis=1)
summed_age.columns=['age','amount']
summed_age

In [None]:
#Plot amount of trips relative to age of bee
summed_age.plot(x='age',y='amount',style='o')