In [None]:
#basic package
import tqdm
import os
import numpy as np
import glob
import pandas as pd
import sys
import time
import datetime as dt
import itertools
import re
import math
import pickle #to download MLP vectors
from scipy.stats import entropy

#plot
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
PACKAGE_PARENT = '../'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
from UTILS import preprocessing_Origins, nbrh_WG_open
import config_origins as config

# Define parameters

In [None]:
print('change the configuration file if not done yet!')
path_extracted_data = config.path_extracted_data
path_initial_data = config.path_initial_data
dico_garden_opening_hour = config.dico_garden_opening_hour
id_run = config.id_run
dico_matching = config.dico_matching
print(id_run)

# Download data

In [None]:
#cleaned mvt data
df_init = pd.read_csv(os.path.join(path_extracted_data, id_run+'_CLEANEDDATA.csv'), sep=';', parse_dates=['Timestamp', 'date']) 
print(df_init.shape)
df_init.head(3)

In [None]:
#daily var
df_daily = pd.read_csv(os.path.join(path_extracted_data, id_run+'_daily_ALL_variables.csv'), sep=';',
                 parse_dates=['level', 'FirstTimestamp_1_Zone', 'FirstTimestamp_2_Zone', 'FirstTimestamp_3_Zone',
                             'FirstTimestamp_4_Zone','FirstTimestamp_5_Zone'],
                 dayfirst=True) 
print(df_daily.shape)
df_daily.head(3)

# Transformation - Could be added in UTILS if another run

In [None]:
#add tracking system ID
df_daily['TrackingSystemID'] = df_daily['PenID'].map(lambda x: config.dico_pen_ts[x])
display(df_daily['TrackingSystemID'].value_counts())

In [None]:
#add if animal is sleeping up 
df_daily['Is_Sleeping_UP'] = df_daily['night_Max_duration_zones'].isin(['5_Zone'])
display(df_daily['Is_Sleeping_UP'].value_counts(normalize=True))
#better than binary: height of sleeping
print(df_daily.shape)
df_daily = df_daily[~df_daily['night_Max_duration_zones'].isnull()]
df_daily['SleepingHeight'] = df_daily['night_Max_duration_zones'].map(lambda x: int(x.split('_')[0])-2)#-2: litter (zon2: equal to 0 tiers underneath)
display(df_daily['SleepingHeight'].value_counts(normalize=True))

In [None]:
#nbr transitions per hour. 
df_daily['nbr_h_WGopen'] = df_daily['level'].map(lambda x: nbrh_WG_open(config.dico_garden_opening_hour,x))
li_ = ['vertical_travel_distance','nbr_stays_2_Zone','nbr_stays_3_Zone','nbr_stays_4_Zone','nbr_stays_5_Zone']
for v in li_:
    df_daily[v+'_perh'] = df_daily.apply(lambda x: x[v]/x['nbr_h_per_day'], axis=1)
df_daily['nbr_stays_1_Zone_perh'] = df_daily.apply(lambda x: x['nbr_stays_1_Zone']/x['nbr_h_WGopen'], axis=1)
df_daily[['vertical_travel_distance_perh','vertical_travel_distance', 'nbr_stays_3_Zone', 'nbr_stays_3_Zone_perh',
        'nbr_h_per_day']].tail(3)

In [None]:
#add overal chaotic mvt
li_chaotic = ['chaoticmvt_Middle1_Zone_nbr_180mn', 'chaoticmvt_Middle2_Zone_nbr_180mn', 'chaoticmvt_Middle3_Zone_nbr_180mn', 
              'chaoticmvt_Middle4_Zone_nbr_180mn']
#rounding down
df_daily['percChaoticTransition'] = df_daily.apply(lambda x: int(round(x[li_chaotic].sum() / x['Total_number_transition']*100,
                                                                       0)) if x['Total_number_transition']>0 else np.nan, axis=1)
df_daily.loc[df_daily['Total_number_transition']==0,'percChaoticTransition'] = 0
df_daily[li_chaotic+['percChaoticTransition','Total_number_transition']]

In [None]:
display(list(df_daily.columns))

# Dates tracked and name

In [None]:
#verify hen_129 is not here althought it was in focal bird with 0 workign tracked days!
if df_daily[df_daily['HenID']=='hen_129'].shape[0]>0:
    print('ERROR')
    sys.exit()
df_daily[df_daily['HenID']=='hen_129']

In [None]:
df_daily[df_daily['level']==dt.datetime(2020,9,30)][['PenID','nbr_h_per_day','correct_amount_of_hour','level']].head(5)

In [None]:
df_daily[['PenID','nbr_h_per_day','correct_amount_of_hour','level','Total_number_transition','verification_daily_total_duration']].head(5)

In [None]:
df_daily[['level','DIB','WIB','DOA','WOA']].head(5)

# Daily Variables

In [None]:
li_var = list(df_daily.columns)
li_general = ['HenID', 'level','PenID']
li_var_TODO = li_var #keep track of the columns that still needs to be verified
len(li_var_TODO)
#display(list(li_var))

In [None]:
#print all column with nan
#df_daily.columns[df_daily.isna().any()].tolist()

## Hens variables + disturbances days

In [None]:
li_hens = ['CLASS','TagID','FocalLegringName','R-Pen','InitialStartDate']
print(li_hens)

In [None]:
#specific bird (in pen 9, should not be  here on the 3,4,5, 8, 9 th of OCtober it should have NO data at all
h = 'hen_90'
d = dt.datetime(2020,10,10)
df_daily[(df_daily['HenID']==h)&(df_daily['level']<=d)][li_general+li_hens+['duration_5_Zone',
                                                                            'night_Max_duration_zones']].sort_values(['level'])

In [None]:
#verify that the day with weird device day (can be find here: *_df_alldeviceinfo) are removed
#hen_147 - tag 82: 25.01-03.02 not working (due to rfid installation and then tag bugging) 
h = 'hen_147'
dmin = dt.datetime(2021,1,24)
dmax = dt.datetime(2021,2,10)
df_daily[(df_daily['HenID']==h)&(df_daily['level']<=dmax)&(df_daily['level']>=dmin)][li_general+li_hens].sort_values(['level'])
#TO CHECK: Pen ID here even when the day variables is not here?
#TO DO AT THE END OF THE STUDY: check each indivudal case of GAP: e.g. tag 171 (hen 133) had big gaps on the 11.01.2021
#sometimes its nan due to the student that wrote +-30/70 (weight 04-01-2021, pen 12)
#CORRECT!

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_hens]
len(li_var_TODO)

# sample entropy

In [None]:
#what we did in utils:
#add sample entropy without accounting on duration
#df_daily['SampleEntropy'] = df_daily['list_of_zones'].map(lambda x: sampen([config.dico_zone_order[i] for i in x],
#                                                                               r=0,m=2) if len(x)>5 else np.nan)
df_daily[['distribution_entropy','list_of_zones']]

In [None]:
#max entropy with 5 zones
nbr_zone = 5
print(math.log(nbr_zone, 2), entropy([1,1,1,1,1],base=2))
if round(math.log(nbr_zone, 2),8)!=round(entropy([1,1,1,1,1],base=2),8):
    print('ERROR 1')
    sys.exit()
if df_daily['distribution_entropy'].max()>math.log(nbr_zone, 2):
    print('ERROR 2')
    sys.exit()

In [None]:
plt.hist(df_daily['distribution_entropy']);

## Night

In [None]:
li_night = [i for i in li_var_TODO if ('night' in i)|('nbr_transition_at_' in i)] +['nbr_transition_next1hafterlightoff']
li_night

In [None]:
#another bird, based on:
#df_daily[(df_daily['nbr_transition_at_h8']>3)&(df_daily['level']==dt.datetime(2020,10,7))].head(3)
h = 'hen_24'
d = dt.datetime(2020,10,7) #day of a change in the light schedule of the day: on the 7th it turned off at 18h and on the 8th it 
#turned on at 8h (not 9h sas it was on the 7th)
df = df_init[(df_init['HenID']==h)&((df_init['date']==d)|(df_init['date']==(d+dt.timedelta(days=1))))]
display(df_daily[(df_daily['HenID']==h)&((df_daily['level']==d)|(df_daily['level']==(d+dt.timedelta(days=1))))][li_general+li_night].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct 

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_night]
print(len(li_var_TODO))

## Transitions in aviary (zone 1, zone 2, zone 3, zone 4)

#### first timestamp in each zones and latency

In [None]:
li_latency = [i for i in li_var if 'latency_' in i]
li_first_timestamp = [i for i in li_var if 'FirstTimestamp' in i]+['Total_number_zone']

In [None]:
#random bird
h = 'hen_89'
d = dt.datetime(2020,11,18) #until 17h
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
X = df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)]
display(X[li_general+li_first_timestamp+li_latency])
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct!

In [None]:
#specific bird
#df_daily[li_general+li_first_timestamp].head(3)
h = 'hen_130'
d = dt.datetime(2020,10,6)
df = df_init[(df_init['HenID']==h)&(df_init['date']<=d)]
X = df_daily[(df_daily['HenID']==h)&(df_daily['level']<=d)]
display(X[li_general+li_first_timestamp+li_latency+['verification_daily_total_duration']])
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct!

In [None]:
#specific bird
#df_daily[li_general+li_first_timestamp].head(3)
h = 'hen_90'
d = dt.datetime(2020,10,6) #3,4,5 : tracking should not be used
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
X = df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)]
display(X[li_general+li_first_timestamp].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct!

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_first_timestamp]
li_var_TODO = [i for i in li_var_TODO if i not in li_latency]
len(li_var_TODO)

#### Chaotic/motor transition

In [None]:
li_chaotic = [x for x in li_var if 'chaotic' in x]

In [None]:
df_daily[['level','HenID','percChaoticTransition','Total_number_transition']+li_chaotic].head(5)

In [None]:
#random bird
h = 'hen_89'
#d = dt.datetime(2020,11,18)
d = dt.datetime(2020,11,1)
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
X = df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)]
display(X[li_chaotic])
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
display(dt.datetime(2020,11,1,11,55,56)-dt.datetime(2020,11,1,10,26,45),
dt.datetime(2020,11,1,12,25,54)-dt.datetime(2020,11,1,12,20,19))
#-->correct!

In [None]:
#specific bird
#df_daily[df_daily['chaoticmvt_Middle2_Zone_nbr']>0][['HenID','level']+li_chaotic]
h = 'hen_131'
d = dt.datetime(2020,10,2)
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
X = df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)]
display(X[li_chaotic])
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
display(dt.datetime(2020,10,2,9,37,35)-dt.datetime(2020,10,2,9,19,12),
dt.datetime(2020,10,2,9,37,43)-dt.datetime(2020,10,2,9,37,35))
#-->correct!

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_chaotic]
len(li_var_TODO)

### transitions & stays

In [None]:
li_transition_rest = ['vertical_travel_distance','Total_number_transition']+[i for i in li_var if 'missing' in i]

In [None]:
#stay
li_appearnces = ['nbr_appearances_3_Zone', 'nbr_appearances_4_Zone', 'nbr_appearances_5_Zone', 'nbr_appearances_2_Zone']
li_nbr_stays = ['nbr_stays_1_Zone', 'nbr_stays_2_Zone', 'nbr_stays_3_Zone', 'nbr_stays_4_Zone', 'nbr_stays_5_Zone']
li_nbr_stays_perh = ['nbr_stays_1_Zone_perh', 'nbr_stays_2_Zone_perh', 'nbr_stays_3_Zone_perh', 'nbr_stays_4_Zone_perh', 
                     'nbr_stays_5_Zone_perh']
li_perc_stays = ['perc_'+c for c in li_nbr_stays]
df_daily[li_appearnces+li_nbr_stays+li_nbr_stays_perh+li_perc_stays+['nbr_stays_total','nbr_h_per_day','Total_number_transition']]

In [None]:
#duration
li_dur = ['duration_1_Zone', 'duration_2_Zone', 'duration_3_Zone', 'duration_4_Zone', 'duration_5_Zone']
li_perc = ['perc_'+c for c in li_dur]
df_daily[li_dur+li_perc]

In [None]:
#ratio dur and stay
li_ratio = ['ratio_percdur_percstays_1_Zone', 'ratio_percdur_percstays_2_Zone', 'ratio_percdur_percstays_3_Zone', 
            'ratio_percdur_percstays_4_Zone', 'ratio_percdur_percstays_5_Zone']
df_daily[li_perc+li_perc_stays+li_ratio]

In [None]:
#verify Total_number_zone==1 if and only if Total_number_transitions==0
display(df_daily[df_daily['Total_number_transition']==0][['Total_number_transition','HenID','level','Total_number_zone']].head(3))
display(df_daily[df_daily['Total_number_zone']==1][['Total_number_transition','HenID','level','Total_number_zone']].head(3))
if df_daily[(df_daily['Total_number_zone']==1)|(df_daily['Total_number_transition']==0)].shape[0]!=df_daily[(df_daily['Total_number_zone']==1)&(df_daily['Total_number_transition']==0)].shape[0]:
    print('ERROR: you dont have: Total_number_zone==1 if and only if Total_number_transitions==0')
    sys.exit()

In [None]:
#random bird
h = 'hen_130'
d = dt.datetime(2020,10,7)
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_transition_rest].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct

In [None]:
#another random bird
h = 'hen_30'
d = dt.datetime(2020,10,4) #9h-18h
df = df_init[(df_init['HenID']==h)&(df_init['Timestamp']<=dt.datetime(2020,10,4,18,0,0))]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_transition_rest].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#vertical_travel_distance: 23 is correct (note: it came from 5 to 2 at 12:20:20)
#down_missingZone_mvtPerc corret: 2/20*100=10% (one from 5 to 2 and one from 4 to 2), we had 20 transitions
#--> correct

In [None]:
#same bird
h = 'hen_30'
d = dt.datetime(2020,10,4) #9h-18h
df = df_init[(df_init['HenID']==h)&(df_init['Timestamp']<=dt.datetime(2020,10,4,18,0,0))]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_nbr_stays].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#vertical_travel_distance: 23 is correct (note: it came from 5 to 2 at 12:20:20)
#total_number of transition: 21 rows including 2 repetitives so its 20
#stay_longer_60sec_4_Zone: correct, it did not counted the one of <60seconds
#down_missingZone_mvtPerc corret: 2/21*100=9,5% (one from 5 to 2 and one from 4 to 2)
#stay_longer_60sec_4_Zone: correct, it did not counted the one of <60seconds
#note that the stay_* is at amximum one mroe thant the total number of transition
#total_number of transition: 21 rows including a repetition of 3_zone so its 20
#--> correct

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_nbr_stays]
li_var_TODO = [i for i in li_var_TODO if i not in li_transition_rest]
li_var_TODO = [i for i in li_var_TODO if i not in li_nbr_stays_perh]
li_var_TODO = [i for i in li_var_TODO if i not in li_perc_stays]
li_var_TODO = [i for i in li_var_TODO if i not in li_appearnces]
print(len(li_var_TODO))

## duration

In [None]:
li_dur = [i for i in li_var_TODO if ('duration' in i)&('Nestbox' not in i)&('WG' not in i)&('night' not in i)]
li_dur

In [None]:
df_daily[['ratio_percdur_percstays_3_Zone','duration_3_Zone','verification_daily_total_duration','perc_duration_3_Zone','nbr_stays_total','nbr_stays_3_Zone','perc_nbr_stays_3_Zone','nbr_appearances_3_Zone']]

In [None]:
#same bird
h = 'hen_130'
d = dt.datetime(2020,10,7) #9h-18h is the day
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_dur].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#duration_*_zone, correct:
display(dt.datetime(2020,10,7,9,3,45)-dt.datetime(2020,10,7,9,0,0),#zone 3 #225
dt.datetime(2020,10,7,9,7,11)-dt.datetime(2020,10,7,9,3,45), #zone 2 #206
dt.datetime(2020,10,7,9,7,35)-dt.datetime(2020,10,7,9,7,11), #zone 3 #24
dt.datetime(2020,10,7,9,9,45)-dt.datetime(2020,10,7,9,7,35), #zone 4 #130
dt.datetime(2020,10,7,18,0,0)-dt.datetime(2020,10,7,9,9,45)) #zone 5 #31815
#rest also correct
#--> correct (should all be in seconds except when explicitely written "_mn")

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_dur]
print(len(li_var_TODO))

## Nestbox

In [None]:
li_nb = [i for i in li_var_TODO if 'Nestbox' in i]+ ['sucessIntrusion_9','NBtimefirstvisitlonger900_minus_time1visit']
li_nb

In [None]:
#same bird
#22.11.2020: from 3h to 17h the light is on: day time
h = 'hen_54'
d = dt.datetime(2020,11,22) #late date to have nestbox usage
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_nb].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
display((dt.datetime(2020,11,22,5,13,10)-dt.datetime(2020,11,22,4,45,21)).total_seconds())
display((dt.datetime(2020,11,22,4,45,21)-dt.datetime(2020,11,22,4,15,21)).total_seconds())
#CORRECT!
#Note that: B10h_Nestbox_nbrvisit is the number of visits DURING DAY TIME in the test box

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_nb]
print(len(li_var_TODO))
display(li_var_TODO)

## WG

In [None]:
li_wg = [i for i in li_var_TODO if 'WG' in i]+[i for i in df_daily.columns if '1_Zone' in i]+['duration_1_Zone']
li_wg
#'in_WG_15mnAfterOpening', 'Max_duration_WG', latency1_zone, perc_1_Zone_while_WG_open

In [None]:
#TODO: verify!
print(config.date_first_opening_WG)
df_daily[df_daily['level']<config.date_first_opening_WG][['HenID','level']+li_wg]

In [None]:
df_daily[df_daily['level']>=config.date_first_opening_WG][['HenID','level','is_mvt_night']+li_wg]

In [None]:
#bird based on : df_daily[df_daily['Max_duration_WG']>0].tail(15)
h = 'hen_98'
d = dt.datetime(2021,1,10)  #10h a 16h20(to be checked with new data)
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_wg].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct

In [None]:
#random bird
h = 'hen_12'
d = dt.datetime(2021,1,9)  #10h a 16h20(to be checked with new data)
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_wg].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
display(dt.datetime(2021,1,15,9,14)-dt.datetime(2021,1,15,8,58),
       dt.datetime(2021,1,15,10,55)-dt.datetime(2021,1,15,9,23))
#Correct

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_wg]
print(len(li_var_TODO))
display(li_var_TODO)

## Activity

In [None]:
li_activity = [i for i in li_var_TODO if ('activity' in i)|('percentile' in i)]
li_activity

In [None]:
#verify nan
df_daily[['Total_number_transition','activity_5percentile_h','activity_25percentile_h','activity_50percentile_h','activity_95percentile_h']]

In [None]:
#random bird
h = 'hen_98'
d = dt.datetime(2021,1,10)  #10h a 16h20(to be checked with new data)
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_activity].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct 

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_activity]
print(len(li_var_TODO))
display(li_var_TODO)

## emproba

In [None]:
li_proba = ['empproba_2_Zone','empproba_3_Zone','empproba_4_Zone'] #not that we implemented only for those zones

In [None]:
#random-already used bird
#by defintion:  its the emp prba of going up as e.g.:
#li = [1,1,1,1,2,2,2,3,3,4,4,2,2,4,1,2] --> [1,2,3,4,3,2,3,4,3,2,1,2] --> {2: 0.6666666666666666, 3: 0.5, 4: 0.0}
h = 'hen_98'
d = dt.datetime(2021,1,10)
df = df_init[(df_init['HenID']==h)&(df_init['date']==d)]
display(df_daily[(df_daily['HenID']==h)&(df_daily['level']==d)][li_general+li_proba].sort_values(['level']))
display(tuple(zip(df['Timestamp'].tolist(),df['Zone'].tolist())))
#-->correct 

In [None]:
#update the list of variable that we still need to verify
li_var_TODO = [i for i in li_var_TODO if i not in li_proba]
print(len(li_var_TODO))
display(li_var_TODO)

## Other

print(len(li_var_TODO))
display(li_var_TODO)
-_> only: distribution_entropy and verification_daily_total_duration is important, the rest is meant to help debugung in case of errors

In [None]:
df_daily['verification_daily_total_duration'].value_counts()

In [None]:
df_daily[df_daily['verification_daily_total_duration']==28800]['level'].value_counts() #on the 30 septemebr: 9h-17h was the 
#light on: 8h=28'800
#2020-09-30: first full day, but for the chicken that never moved since they enter the day before, they still hav no data
#Correct!

In [None]:
display(df_daily[df_daily['verification_daily_total_duration']<28800]['HenID'].value_counts())
df_daily[df_daily['verification_daily_total_duration']<28800]['level'].value_counts()
#2020-09-30: first full day, but for the chicken that never moved since they enter the day before, they still hav no data

In [None]:
df_daily[df_daily['verification_daily_total_duration']<28800][li_general+['verification_daily_total_duration']+\
                                                               li_first_timestamp]
#can appear in any zone... we dont know from when on, where th ebird was before going there, so we can only remove those days
#for those specific chickens
#TODO at the end of experiment: CHECK ALL CASES ONE BY ONE

In [None]:
#how does equally does bird spend their time in the different zones
plt.hist(df_daily['distribution_entropy'].dropna(),bins='auto');

# Save and last verification

In [None]:
#final selected variables (for now)
li_h = ['HenID','PenID','CLASS','TagID','FocalLegringName','R-Pen','InitialStartDate', 'Treatment','level','DIB','WIB','DOA','WOA',
        'verification_daily_total_duration','TrackingSystemID']
li_mvt = ['perc_duration_2_Zone','perc_duration_3_Zone','perc_duration_4_Zone','perc_duration_5_Zone', #for first two months
        'duration_1_Zone','duration_2_Zone','duration_3_Zone','duration_4_Zone','duration_5_Zone', #for all, easier to analyse
        'nbr_stays_1_Zone', 'nbr_stays_2_Zone','nbr_stays_3_Zone','nbr_stays_4_Zone','nbr_stays_5_Zone',
        'nbr_stays_1_Zone_perh', 'nbr_stays_2_Zone_perh', 'nbr_stays_3_Zone_perh', 'nbr_stays_4_Zone_perh','nbr_stays_5_Zone_perh',
        'ratio_percdur_percstays_2_Zone','ratio_percdur_percstays_3_Zone','ratio_percdur_percstays_4_Zone','ratio_percdur_percstays_5_Zone',
        'latency_2_Zone_h','latency_3_Zone_h','latency_4_Zone_h','latency_5_Zone_h','latency_since15mnnestbox_h',
        'Total_number_transition','nbr_stays_total','distribution_entropy','Total_number_zone','Max_duration_zones',
        'vertical_travel_distance','vertical_travel_distance_perh','activity_5percentile_h','activity_50percentile_h','activity_95percentile_h','percChaoticTransition',
        'night_Max_duration_zones','is_mvt_night', #not more as flickering: more attention
        'empproba_3_Zone', 'empproba_4_Zone', 'empproba_2_Zone',
        'in_WG_15mnAfterOpening', 'Max_duration_WG_h', 'perc_1_Zone_while_WG_open',
         'SleepingHeight']
li_2keep = li_h + li_mvt

In [None]:
#print(list(df_daily.columns))

In [None]:
df_daily.filter(li_2keep).to_csv(os.path.join(path_extracted_data, id_run+'_daily_ALL_variables_verified.csv'),sep=';',index=False)
print(df_daily.shape)
df_daily.head(3)