In [1]:
#basic package
import tqdm
import os
import numpy as np
import glob
import pandas as pd
import sys
import time
import datetime as dt
import itertools
import re
import math

#plot
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
PACKAGE_PARENT = '../'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
from UTILS import ZoneVariable
import config_origins as config

scipy.sparse.sparsetools is a private module for scipy.sparse, and should not be used.
  _deprecated()


# Define parameters

In [3]:
print('change the configuration file if not done yet!')
path_extracted_data = config.path_extracted_data
path_initial_data = config.path_initial_data
id_run = config.id_run
dico_matching = config.dico_matching
print(id_run)

change the configuration file if not done yet!
v3_


# Group consistency

In [4]:
#for being more reproductible, we open the file that was saved from cleaning
df = pd.read_csv(os.path.join(path_extracted_data, id_run+'_records_GeneralCleaning.csv'), sep=';', 
                 parse_dates=['Timestamp', 'Timestamp_initial', 'date', 'duration']) 
print(df.shape)
df.head(3)

(309261, 12)


Unnamed: 0,Timestamp,HenID,Zone,PenID,TagID,log_file_name,date,ts_order_logname,ts_order_list,ms,Timestamp_initial,duration
0,2020-09-30 00:01:25,hen_111,3_Zone,11,147,log_00000001,2020-09-30,log_00000001_42425,log_00000001_42425,0.0,2020-09-30 00:01:25,13.0
1,2020-09-30 00:01:38,hen_111,2_Zone,11,147,log_00000001,2020-09-30,log_00000001_42436,log_00000001_42436,0.0,2020-09-30 00:01:38,731.0
2,2020-09-30 00:13:49,hen_111,3_Zone,11,147,log_00000001,2020-09-30,log_00000001_43035,log_00000001_43035,0.0,2020-09-30 00:13:49,2.0


In [None]:
def ZoneVariable(df_ts, config, save=True, red_dot_for_each_hen=True, nbr_bird_per_square_meter=False):

    '''From a time series (one column per hen named by 'hen_'), compute a Heatmap of number of birds in each zone at each 
    timestamp we are taking one value per minute (the first one), and we are not considering the rest
    red_dot_for_each_hen: if True, then we will plot where each bird is with a red dot in order to understand his synchronicity with other birds and if he likes crowd and when. It can then help extract some variables of interest
    nbr_bird_per_square_meter: If True, the nbr of birds will be divided by the umber of square meter associated to that zone'''
    
    #start recording the time it last
    START_TIME = time.clock()
    
    #initialise variable
    path_extracted_data = config.path_extracted_data
    dico_zone_order = config.dico_zone_order
    id_run = config.id_run
    dico_zone_plot_name = config.dico_zone_plot_name
    dico_zone_meter2 = config.dico_zone_meter2

    df_ts['minute'] = df_ts['Timestamp'].map(lambda x: x.minute)
    li_hen = [i for i in df_ts.columns if i.startswith('hen_')]
    li_zones = list(df_ts[li_hen].stack().unique())
    plot_type = 'number of birds'
    if nbr_bird_per_square_meter:
        plot_type = plot_type+' per m2'
    #sort the yaxis for the naming
    s = sorted(dico_zone_order.items(), key=operator.itemgetter(1))
    s = {x[1]:dico_zone_plot_name[x[0]] for x in s}

    #create path where to save if not existing yet
    path_ = os.path.join(path_extracted_data,'visual','Nbr_bird_In_Zone')
    #create a director if not existing
    if not os.path.exists(path_):
        os.makedirs(path_)

    #for each day draw a heatmap
    for day in tqdm.tqdm(df_ts['day'].unique()):
        df_ = df_ts[df_ts['day']==day].sort_values(['Timestamp'])
        #xaxis might be different over the days, if not complete days, so we will take the appropriate timestamp
        #take only the smallest timestamp per minute
        Xaxis = df_.groupby(['hour','minute'])['Timestamp'].agg(lambda x: min(list(x))).reset_index()['Timestamp'].tolist()       
        M = np.zeros(shape=(max(dico_zone_order.values())+1, len(Xaxis))) #+1 car starts from 0
        for i,ts in enumerate(Xaxis):
            #list of all zones happening on a particular timestamp that day
            li = list(df_[df_['Timestamp']==ts][li_hen].values[0])
            c = Counter(li)
            #print(sum(list(c.values()))) 
            for zone_, order in dico_zone_order.items():
                if zone_ in c:
                    M[order][i] = c[zone_]
                    if nbr_bird_per_square_meter:
                        M[order][i] = M[order][i] / dico_zone_meter2[zone_]

        #plot and save
        #plt.figure()
        plt.clf() # clears the entire current figure instead of plt.figure() which will create a new one, and hence keeping all figures
        #in memory
        #fig, ax = plt.subplots(figsize=(10,8))         #figsize in inches
        sns.set(font_scale=0.6) 
        ax = sns.heatmap(M, cmap="YlGnBu", yticklabels=[s.get(j,' ') for j in range(M.shape[0])],
                   xticklabels=[':'.join(str(Xaxis[i]).split(' ')[1].split(':')[0:2]) if i%30==0 else '' for i in range(len(Xaxis))])  
        ax.invert_yaxis()
        plt.title(str(day).split('T')[0] +'      '+plot_type)
        if save:
            plt.savefig(os.path.join(path_,id_run+'_'+plot_type+'_'+str(day).split('T')[0]+'.png'), format='png', dpi=300)
        #plt.show()
        plt.close()
        
        #add a red point for each hen and save the hen plot
        dico_zone_order_ = dico_zone_order.copy()
        dico_zone_order_['nan'] = -0.5
        if red_dot_for_each_hen:
            for hen_ in li_hen:
                #plot the whole heatmap again 
                path_plt = os.path.join(path_, id_run+'_'+plot_type+'_'+hen_+'_'+str(day).split('T')[0]+'.png')
                p = glob.glob(path_plt)
                if len(p)==1:
                    continue
             
                plt.clf()
                ax = sns.heatmap(M, cmap="YlGnBu", yticklabels=[s.get(j,' ') for j in range(M.shape[0])],
                           xticklabels=[':'.join(str(Xaxis[i]).split(' ')[1].split(':')[0:2]) if \
                                        i%30==0 else '' for i in range(len(Xaxis))])  
                ax.invert_yaxis()
                plt.title(str(day).split('T')[0]+'      '+plot_type+' and '+hen_ +' (red)')
                #add info of the hen
                li_zone_hen = df_[df_['Timestamp'].isin(Xaxis)][hen_].tolist()
                li_zone_hen = [dico_zone_order_[str(x)]+0.5 for x in li_zone_hen] #0.5 to show it in the middle of the heatmap bar
                ax.scatter(range(len(Xaxis)), li_zone_hen, marker='d', s=1, color='red') #s = size
                if save:
                    plt.savefig(path_plt, format='png', dpi=300, bbox_inches='tight') 
                #plt.show()    
                plt.close()
                
    END_TIME = time.clock()
    print ("Total running time: %.2f mn" %((END_TIME-START_TIME)/60))  
    
    