In [None]:
#basic package
import tqdm
import os
import numpy as np
import pandas as pd
import sys
import time
import glob
import datetime as dt
import itertools
import math
import random
import pickle
from scipy import stats
from numpy.linalg import eig #eigenvector decomposition

#plot
import matplotlib.pyplot as plt
import seaborn as sns
from  matplotlib.colors import LinearSegmentedColormap  # to define our own palette for plots
from matplotlib import pyplot #barplot

In [None]:
PACKAGE_PARENT = '../'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
import config_origins as config

# Define parameters

In [None]:
#should be consistent across all notebooks
#ADD YOUR PATH TO SAVE OUTPUT (e.g.: 'G:\\VPHI\\Welfare\\2- Research Projects\\OFHE2.OriginsE2\\DataOutput\\')
path_extracted_data = 'TO ADD'
#ADD A NAME in case you want multiple run with all output saved each time
saving_name = 'TO ADD'
#choose better naming of behaviour (different names from the behavioural syndrome csv file than repeatbatility, so that 4 of the 
#behaviours have to be defined two times
dico_mvt_name = {'scalefoodsameDurTimingr50final':'food reactivity index',
                 'scaleverticaltraveldistancefulllightperinsideh':'vertical travelled distance',
                 'midcumZ4hMorning':'mid-nestbox zone usage',
                 'SleepingUppest':'has slept on top tier',
                 'WentinWG':'has been outside',
                'food_sameDurTiming_r50_final':'food reactivity index',
                'Sleeping_Uppest':'has slept on top tier',
                'vertical_travel_distance_fulllight_perinsideh':'vertical travelled distance',
                'mid_cum_Z4_h_Morning':'mid-nestbox zone usage'}

In [None]:
#TO DELETE WHEN SENDING TO PUBLICATION
path_extracted_data = config.path_extracted_data
saving_name = 'ALLOBS'

# Behavioural syndrome

### Download mean and CI estimates

In [None]:
df_IC = pd.read_csv(os.path.join(path_extracted_data,saving_name+'_df_CI_BRMS_BS_pred_allvar_FINAL.csv'), sep=',')
print(df_IC.shape) 
display(df_IC.head(3))

df_mean = pd.read_csv(os.path.join(path_extracted_data,saving_name+'_df_mean_BRMS_BS_pred_allvar_FINAL.csv'), sep=',')
print(df_mean.shape) 
display(df_mean.head(3))

#merge both estimates and the CI
df = pd.merge(df_mean, df_IC, on=['Unnamed: 0'], how='outer')
df.rename(columns={'Unnamed: 0':'parameter','x':'value_mean'}, inplace=True)
df['text'] = df.apply(lambda x: "%.2f" % round(x['value_mean'],2)+' ['+"%.2f" % round(x['lower'],2)+', '+"%.2f" % round(x['upper'],2)+']', 
                          axis=1)
#separate parameters family (random effect, fexed effect, sigma, correlatiom)
display(list(df['parameter'].unique()))
df['type'] = df['parameter'].map(lambda x: x.split('_')[0])
display(df['type'].value_counts())

#henID intercept random intercept
df['HenID'] = df['parameter'].map(lambda x: x.split(',')[0].split('[')[-1] if 'hen_' in x else None)
#else None, otherwise will utput the entire name (i.e. x)
print(df['HenID'].unique())

#mvt variable for the hen intercepts to be visualised
df['mvtvariable'] = df['parameter'].map(lambda x: x.split('r_HenID__')[-1].split('[')[0] if ('r_HenID__' in x) \
                                        & ('cor_HenID' not in x) else None)
li_mvt = [i for i in df['mvtvariable'].unique() if i!=None]
print(li_mvt)
print('------------------all dataframe')
print(df.shape) 
display(df.head(3))

#correlations dataframe
df_cor_summarized = df[df['type']=='cor'].copy()
for mvt in li_mvt:
    df_cor_summarized[mvt] = df_cor_summarized['parameter'].map(lambda x: mvt in x)
print('------------------correlations dataframe')
display(df_cor_summarized)

#sd dataframe
df_sd_summarized = df[df['type']=='sd'].copy()
df_sd_summarized['parameter'] = df_sd_summarized['parameter'].map(lambda x: x.split('sd_HenID__')[-1])
print('------------------sd dataframe')
display(df_sd_summarized)

### correlation matrix

In [None]:
#build the correlation matrix
CorrM = np.zeros(shape=(len(li_mvt), len(li_mvt)))
labels_ = np.array([['                                '] * len(li_mvt)] * len(li_mvt))
for i in range(0,len(li_mvt)):
    CorrM[i][i] = np.nan
    for j in range(i+1,len(li_mvt)):
        mvt1 = li_mvt[i]
        mvt2 = li_mvt[j]
        v = df_cor_summarized[(df_cor_summarized[mvt1])&(df_cor_summarized[mvt2])]['value_mean'].values[0]     
        CorrM[i][j] = v
        CorrM[j][i] = np.nan
        t = df_cor_summarized[(df_cor_summarized[mvt1])&(df_cor_summarized[mvt2])]['text'].values[0] 
        t = t.replace(' [','\n [')
        labels_[i][j] = t
        labels_[j][i] = ''
print(CorrM)
#print(labels_)
plt.figure(figsize=(14,3)) #9, 6.5
sns.heatmap(CorrM, cmap='RdYlGn', annot=labels_, fmt = '', 
            xticklabels=[dico_mvt_name[i] for i in li_mvt], yticklabels=[dico_mvt_name[i] for i in li_mvt],
           vmin=-0.55, vmax=0.55)
plt.xticks(rotation=10)
#fmt = ''is required for string labels
plt.xlabel('');
plt.ylabel('');
plt.savefig(os.path.join(path_extracted_data,'BS_corr.png'),dpi=300,format='png',bbox_inches='tight')
plt.show();

### eigenvector decomposition

In [None]:
#https://numpy.org/doc/stable/reference/generated/numpy.linalg.eig.html
#chose behaviour we want in the pca and assocaite them to a name
li_mvt_pc = ['scalefoodsameDurTimingr50final', 'scaleverticaltraveldistancefulllightperinsideh', 'SleepingUppest', 'WentinWG']
#midcumZ4hMorning
Corr = np.zeros(shape=(len(li_mvt_pc), len(li_mvt_pc)))
for i in range(0,len(li_mvt_pc)):
    Corr[i][i] = 1
    for j in range(i+1,len(li_mvt_pc)):
        mvt1 = li_mvt_pc[i]
        mvt2 = li_mvt_pc[j]
        v = df_cor_summarized[(df_cor_summarized[mvt1])&(df_cor_summarized[mvt2])]['value_mean'].values[0]     
        Corr[i][j] = v
        Corr[j][i] = v
print(Corr)
#eigen decomposition sorted by biggest eingenvalues
eigenValues, eigenVectors = eig(Corr)
idx = eigenValues.argsort()[::-1]   
eigenValues = eigenValues[idx]
eigenVectors = eigenVectors[:,idx]
#in R would simply be: eigen(Corr). same result
print(li_mvt_pc)
print(eigenValues)
comp1 = list(eigenVectors[:,0])
comp2 = list(eigenVectors[:,1])
#unit length vector
print(np.sqrt(sum([i*i for i in comp1])))
print(eigenVectors)#first columns: pc1 (multiple 0,0 by limvt0, 1,0 by VTD,... ;  second: pc2
print('Explained var:')
li_exvar = [i/sum(eigenValues) for i in eigenValues]
print(sum(li_exvar))
li_exvar

### standard deviation of the loadings

In [None]:
#download all posterior samples to compute std dev of the PCs
df_allit = pd.read_csv(os.path.join(path_extracted_data,'BRMS_BS_pred_allvar_FINAL.csv'), sep=',')
print(df_allit.shape)  #(1250, 4044) ou (5000, 1011)
li_col = set([x for x in list(df_allit.columns) if 'cor_HenID__' in x]) #X*, where * is the number of chain
print(len(li_col))
display(li_col)
df_allit = df_allit[li_col].copy()
df_allit['run'] = df_allit.index
print(df_allit.shape)
display(df_allit.head(3))

In [None]:
#find std deviation of pcs
li_comp1 = []
li_comp2 = []
li_chain = list(set([x.split('.')[0] for x in list(df_allit.columns) if 'X' in x]))
print(li_chain)
for run in tqdm.tqdm(df_allit['run'].unique()):
    for chain in li_chain:
        df_ = df_allit[df_allit['run']==run][[x for x in list(df_allit.columns) if 'X1' in x]].copy()
        M = np.zeros(shape=(len(li_mvt_pc), len(li_mvt_pc)))
        for i in range(0,len(li_mvt_pc)):
            M[i][i] = 1
            for j in range(i+1,len(li_mvt_pc)):
                mvt1 = li_mvt_pc[i]
                mvt2 = li_mvt_pc[j]
                vc = [x for x in list(df_.columns) if (mvt1 in x)&(mvt2 in x)]
                if len(vc)!=1:
                    print('ERROR')
                    print(vc)
                    sys.exit()
                v = df_[vc[0]].values[0]
                M[i][j] = v
                M[j][i] = v
        #print(M)
        #eigen decomposition sorted by biggest eingenvalues
        eigenValues, eigenVectors = eig(M)
        idx = eigenValues.argsort()[::-1]   
        eigenValues = eigenValues[idx]
        eigenVectors = eigenVectors[:,idx]
        #extract first and second column
        li_comp1.append(eigenVectors[:,0])
        li_comp2.append(eigenVectors[:,1])
df_pc1loading = pd.DataFrame(li_comp1, columns=li_mvt_pc)
df_pc2loading = pd.DataFrame(li_comp2, columns=li_mvt_pc)
print(df_pc1loading.shape)
display(df_pc1loading.head(5))

#because Eigenvectors are not unique & multiplying by any constant, including -1 (which simply changes the sign), gives 
#another valid eigenvector, we have to account for this and ensure for isntace that one of the behaviour is always positive
#and if not,  multiplying all behaviours by -1
df_pc1loading['scaleverticaltraveldistancefulllightperinsideh'] = np.where(df_pc1loading['scalefoodsameDurTimingr50final']>0, 
                                 df_pc1loading['scaleverticaltraveldistancefulllightperinsideh'], #where condition is True (>0:keep)
                                 df_pc1loading['scaleverticaltraveldistancefulllightperinsideh']*-1)
#df_pc1loading['midcumZ4hMorning'] = np.where(df_pc1loading['scalefoodsameDurTimingr50final']>0, 
#                                 df_pc1loading['midcumZ4hMorning'], #where condition is True
#                                 df_pc1loading['midcumZ4hMorning']*-1)
df_pc1loading['SleepingUppest'] = np.where(df_pc1loading['scalefoodsameDurTimingr50final']>0, 
                                 df_pc1loading['SleepingUppest'], #where condition is True
                                 df_pc1loading['SleepingUppest']*-1)
df_pc1loading['WentinWG'] = np.where(df_pc1loading['scalefoodsameDurTimingr50final']>0, 
                                 df_pc1loading['WentinWG'], #where condition is True
                                 df_pc1loading['WentinWG']*-1)
#Note: this behaviours have to be done after all others
df_pc1loading['scalefoodsameDurTimingr50final'] = np.where(df_pc1loading['scalefoodsameDurTimingr50final']>0, 
                                 df_pc1loading['scalefoodsameDurTimingr50final'], #where condition is True
                                 df_pc1loading['scalefoodsameDurTimingr50final']*-1)
df_pc1loading.head(5)

### visualise results

In [None]:
plt.figure(figsize=(12,1.5))
cmap_ = LinearSegmentedColormap.from_list('rg',["lightgrey", "black"], N=256) 
labels_ = np.array([['                   '] * len(li_mvt_pc)] * 2)
for i in range(0,len(li_mvt_pc)):
    labels_[0][i] = '%.2f' %round(comp1[i],2)+'\n  (±'+'%.2f' %round(np.std(abs(df_pc1loading[li_mvt_pc[i]])),2) +')'
    labels_[1][i] = '%.2f' %round(comp2[i],2)+'\n  (±'+'%.2f' %round(np.std(abs(df_pc2loading[li_mvt_pc[i]])),2) +')'
sns.heatmap(np.array([[i for i in comp1], [i for i in comp2]]), annot=labels_, fmt = '', cmap='RdYlGn',vmin=-1, vmax=1,
            yticklabels=['PC1 \n('+str(round(li_exvar[0]*100))+'%)', 'PC2 \n('+str(round(li_exvar[1]*100))+'%)'], 
            xticklabels=[dico_mvt_name[i] for i in li_mvt_pc]);
plt.yticks(rotation=0) 
plt.xticks(rotation=0) 
plt.savefig(os.path.join(path_extracted_data,'BS_PrincComp_loadings.png'),dpi=300,format='png',bbox_inches='tight')
plt.show();

### visualise among behaviours correlation

In [None]:
li_mvt = ['scalefoodsameDurTimingr50final','scaleverticaltraveldistancefulllightperinsideh', 'midcumZ4hMorning',
          'SleepingUppest', 'WentinWG']

In [None]:
#choose color: https://matplotlib.org/stable/gallery/color/named_colors.html
#df[df['HenID'].isnull()].shape
df_plt = df[~df['HenID'].isnull()].pivot(index='HenID', columns='mvtvariable', values=['value_mean',
                                                                                       'lower','upper']).reset_index()
df_plt.columns = ['_'.join(col) for col in df_plt.columns.values] #joining name from different index
display(df_plt.head(3))
for i in range(0,len(li_mvt)-1):
    for j in range(i+1,len(li_mvt)):
        mvtx = li_mvt[i]
        mvty = li_mvt[j]
        print(mvtx)
        print(mvty)
        cor_mvtx_mvty = df_cor_summarized[(df_cor_summarized[mvtx])&(df_cor_summarized[mvty])]['value_mean'].iloc[0]
        cor_mvtx_mvty_upper = df_cor_summarized[(df_cor_summarized[mvtx])&(df_cor_summarized[mvty])]['upper'].iloc[0]
        cor_mvtx_mvty_lower = df_cor_summarized[(df_cor_summarized[mvtx])&(df_cor_summarized[mvty])]['lower'].iloc[0]
        print(cor_mvtx_mvty)
        sd_mvtx = df_sd_summarized[df_sd_summarized['parameter']==mvtx+'_Intercept']['value_mean'].iloc[0]
        print(sd_mvtx)
        sd_mvty = df_sd_summarized[df_sd_summarized['parameter']==mvty+'_Intercept']['value_mean'].iloc[0]
        print(sd_mvty)        
        slope = cor_mvtx_mvty*sd_mvtx*sd_mvty/(sd_mvtx*sd_mvtx)
        print(slope)
        #choose color:
        li_ = [mvtx, mvty]
        if ('scalefoodsameDurTimingr50final' in li_) & ('scaleverticaltraveldistancefulllightperinsideh' in li_):
            color_ = 'green'
        elif ('scalefoodsameDurTimingr50final' in li_) & ('WentinWG' in li_):
            color_ = 'yellowgreen' #lightsalmon
        elif ('scaleverticaltraveldistancefulllightperinsideh' in li_) & ('SleepingUppest' in li_):
            color_ = 'sandybrown'
        elif ('scaleverticaltraveldistancefulllightperinsideh' in li_) & ('WentinWG' in li_):
            color_ = 'darkgreen'
        else:
            color_ = 'black'
        plt.figure(figsize=(5,5))
        #ax = sns.scatterplot(data=df_plt, x='value_mean_'+mvtx,  y='value_mean_'+mvty)#,  size="size", sizes=(20, 200))
        plt.scatter(df_plt['value_mean_'+mvtx].tolist(), df_plt['value_mean_'+mvty].tolist(), color=color_, s=5)
        plt.errorbar(df_plt['value_mean_'+mvtx].tolist(), df_plt['value_mean_'+mvty].tolist(),
                     xerr=[df_plt['lower_'+mvtx].tolist(),df_plt['upper_'+mvtx].tolist()], 
                     yerr=[df_plt['lower_'+mvty].tolist(),df_plt['upper_'+mvty].tolist()],
                     fmt="o", color=color_, alpha=0.2,ms=5)
        plt.xlabel(mvtx)
        plt.ylabel(mvty)
        x_min = min(df_plt['value_mean_'+mvtx].tolist())
        y_min = x_min*slope
        x_max = max(df_plt['value_mean_'+mvtx].tolist())
        y_max = x_max*slope
        plt.plot([x_min, x_max], [y_min, y_max], color='black')
        text_ = 'r = '+"%.2f" % round(cor_mvtx_mvty,2)+' ['+"%.2f" % round(cor_mvtx_mvty_lower,2)+', '+"%.2f" % round(cor_mvtx_mvty_upper,2)+']'
        plt.text(max(df_plt['upper_'+mvtx].tolist())*0.1, max(df_plt['upper_'+mvty].tolist())*1.4, text_, fontsize=13)
        #plt.set_xlim([x_min, x_max])   
        plt.savefig(os.path.join(path_extracted_data,'BS_'+mvtx+'_'+mvty+'.png'),dpi=300,format='png',bbox_inches='tight')
        plt.show();

# Repeatability

In [None]:
#search for the repeatability csv files, download each file and concatenate them
li_df = []
for path_ in glob.glob(os.path.join(path_extracted_data,'R_estimates_*.csv')):
    print('--------------------- files %s'%path_.split('\\')[-1])
    df_ = pd.read_csv(path_, sep=',', index_col=0)
    print(df_.shape) 
    display(df_.head(3))
    li_df.append(df_)
print('------------------------- ALL TOGETHER:')
df_R = pd.concat(li_df)  
df_R['VI_LL'] = df_R['li_VI'].map(lambda x: float(x.split(', ')[0].split('[')[1]))
df_R['VI_UL'] = df_R['li_VI'].map(lambda x: float(x.split(', ')[1].strip(']')))
df_R['VE_LL'] = df_R['li_VE'].map(lambda x: float(x.split(', ')[0].split('[')[1]))
df_R['VE_UL'] = df_R['li_VE'].map(lambda x: float(x.split(', ')[1].strip(']')))
df_R['VI'] = df_R['li_VI'].map(lambda x:float(x.split(' ')[0]))
df_R['VE'] = df_R['li_VE'].map(lambda x:float(x.split(' ')[0]))
display(df_R[['li_VI','li_VE','VI','VE','VI_LL','VI_UL','VE_LL','VE_UL']].head(3))
df_R['li_mvt_named'] = df_R['li_mvt'].map(lambda x: dico_mvt_name[x])
print(df_R.shape) 
display(df_R.head(3))

In [None]:
#add nice text to output in the paper (i.e. with specific number of values after the comma, adding useless 0)
#add R and 95% interval as a text column
df_R['text'] = df_R.apply(lambda x: ("%.2f" % x['li_R'])+' ['+("%.2f" % x['li_CIL'])+', '+("%.2f" % x['li_CIU'])+']', 
                          axis=1)
df_R['li_VI'] = df_R.apply(lambda x: ("%.4f" % x['VI'])+' ['+("%.4f" % x['VI_LL'])+', '+("%.4f" % x['VI_UL'])+']', 
                          axis=1)
df_R['li_VE'] = df_R.apply(lambda x: ("%.4f" % x['VE'])+' ['+("%.4f" % x['VE_LL'])+', '+("%.4f" % x['VE_UL'])+']', 
                          axis=1)
df_R.head(3)

In [None]:
#create a matrix with each row corresponding to a movement behavior and each column a situation (over time, wihtin a context,
#across context). sort matrix by specific order
dico_bstype_order = {'AC':6, 'LP10':5, 'LP2':2, 'VD':4, 'cold':3, 'time':1}
dico_bstype_name = {'AC':'AC', 'LP10':'LLS', 'LP2':'ELS', 'VD':'VD', 'cold':'CET', 'time':'OT'}
df_R['order'] = df_R['li_BStype'].map(lambda x: dico_bstype_order[x])
df_R['li_BStype'] = df_R['li_BStype'].map(lambda x: dico_bstype_name[x])
df_R = df_R.sort_values(['order'])
#all R with CI
df_plt = df_R.pivot(index='li_mvt_named', columns='li_BStype', values='text').reset_index()
df_plt = df_plt[['li_mvt_named','OT','ELS','CET','VD','LLS','AC']]
df_plt

### visualise

In [None]:
cmap_ = LinearSegmentedColormap.from_list('rg',["lightgrey", "black"], N=256) 
df_plt = df_R.pivot(index='li_mvt_named', columns='li_BStype', values='li_R')#.reset_index()
df_plt = df_plt[['OT','ELS','CET','VD','LLS','AC']]
#df_plt
li_col = list(df_R['li_BStype'].unique())

########################## normalize by row ##########################
plt.figure(figsize=(5,2))
df_plt_nr = df_plt.copy()
for mv in list(df_plt_nr.index): 
    df_plt_nr.loc[mv] = (df_plt_nr.loc[mv]-df_plt_nr.loc[mv].min())/(df_plt_nr.loc[mv].max()-df_plt_nr.loc[mv].min())
display(df_plt_nr)
sns.heatmap(df_plt_nr, cmap=cmap_, annot=False,
        xticklabels=list(df_plt_nr.columns), yticklabels=list(df_plt_nr.index),linewidths=1.8)
plt.xlabel('');
plt.ylabel('');
plt.title('Repeatability normalized by rows \n (comparison between contexts)', size=10.5)
plt.savefig(os.path.join(path_extracted_data,'R_normalizedrowwise.png'),dpi=300,format='png',bbox_inches='tight')
plt.show();

########################## normalize by columns ##########################
plt.figure(figsize=(5,2))
df_plt_nc = (df_plt[li_col]-df_plt[li_col].min())/(df_plt[li_col].max()-df_plt[li_col].min())
display(df_plt_nc)
sns.heatmap(df_plt_nc, cmap=cmap_, annot=False,
            xticklabels=list(df_plt_nc.columns), yticklabels=list(df_plt_nc.index),linewidths=1.8)
plt.xlabel('');
plt.ylabel('');
plt.title('Repeatability normalized by columns \n (comparison between behaviours)', size=10.5)
plt.savefig(os.path.join(path_extracted_data,'R_normalizedcolumnwise.png'),dpi=300,format='png',bbox_inches='tight')
plt.show();

### check #obs and #individual

In [None]:
#check metadata
#small check: (the gamma  : nestbox zone behaviour) family should have NAN everywhere, but is base dont he exact same dataset
display(df_R[['li_BStype','li_mvt','li_ngroup','li_nobs']].sort_values(['li_BStype','li_mvt'])) #CORRECT!
df_R[df_R['li_mvt']=='vertical_travel_distance_fulllight_perinsideh'][['li_BStype','li_ngroup','li_nobs']]

In [None]:
#verification of R based on VI, VE
#in the binary the VE is the residual variance, which is define a s w(math.pi**2/3) innakawaga paper
#in the gamma the VE is define as trigamma(shape) innakawaga paper
df_R['verification'] = df_R.apply(lambda x: x['VI']/(x['VI']+x['VE']), axis=1)
df_R['isequal'] = df_R.apply(lambda x: x['li_R']-x['verification']<0.01, axis=1)
if df_R[~df_R['isequal']].shape[0]!=0:
    print('ERROR in your within / between individual variance!')
    display(df_R[~df_R['isequal']][['li_BStype','li_R','li_mvt','verification','VI','VE']].head(15))
    sys.exit()

In [None]:
dico_bstype_name = {'AC':'across context',
                    'OT':'over time',
                    'LLS':'late life stage',
                    'ELS':'early life stage',
                    'VD':'vaccination disturbance',
                    'CET':'cold external temperature'}
df_VIVEM = df_R.sort_values(['li_mvt'])[['li_BStype','li_mvt','li_VI','li_VE','li_mvtmean']].copy()
df_VIVEM['li_mvt'] = df_VIVEM['li_mvt'].apply(lambda x: dico_mvt_name[x])
df_VIVEM['li_BStype'] = df_VIVEM['li_BStype'].apply(lambda x: dico_bstype_name[x])
df_VIVEM.rename(columns={'li_mvt': 'movement behaviour', 'li_BStype':'situations',
                        'li_VI':'between-individual variance',
                        'li_VE':'within-individual variance',
                        'li_mvtmean':'trait meant'}, inplace=True)
df_VIVEM.to_csv(os.path.join(path_extracted_data,'BS_df_VI_VE_TraitMean.csv'), index=False, sep=',')
df_VIVEM

### Dataframe for metadata significance

In [None]:
dico_c_meannbr = {'OT':60, 
                  'ELS':9, 
                  'CET':9, 
                  'VD':85, 
                  'LLS':9, 
                  'AC':128}
df_R['meannbrdays'] = df_R['li_BStype'].map(lambda x: dico_c_meannbr[x])
#remove the outlier
df_R[['li_R','li_mvt','li_BStype','meannbrdays','VI','VE']].to_csv(os.path.join(path_extracted_data,'BS_lm_metadata.csv'), index=False, sep=',')
df_R.head(3)

In [None]:
plt.hist(df_R['li_R']);

In [None]:
dico_mvt_name

In [None]:
df_ = pd.melt(df_R, id_vars=['li_mvt','li_BStype','meannbrdays'], value_vars=['VI','VE'], value_name = 'value')
dico_V_named = {'VI':'between-individual \n variance', 'VE':'within-individual \n variance'}
df_['variable'] = df_['variable'].map(lambda x: dico_V_named[x])
dico_mvt_name2 = {'WentinWG': 'has been outside',
                 'food_sameDurTiming_r50_final': 'food reactivity index',
                 'Sleeping_Uppest': 'has slept on top tier',
                 'vertical_travel_distance_fulllight_perinsideh': 'vertical travelled \ndistance',
                 'mid_cum_Z4_h_Morning': 'mid-nestbox zone \nusage'}
df_['li_mvt_named'] = df_['li_mvt'].map(lambda x: dico_mvt_name2[x])
pal_ = {'between-individual \n variance':'black', 'within-individual \n variance':'grey'}
display(df_.head(3))
plt.figure(figsize=(10,1.1))
l=1
c=5
for i,(mvt, df_plt) in enumerate(df_.groupby(['li_mvt_named'])):
    plt.subplot(l,c,i+1)
    if i==0:
        sns.lineplot(x='meannbrdays',y='value', data=df_plt, hue='variable', palette=pal_, legend=True);
        plt.legend(bbox_to_anchor=(-0.04, 0), loc='lower right', borderaxespad=0)
    else:
        sns.lineplot(x='meannbrdays',y='value', data=df_plt, hue='variable', palette=pal_, legend=False);
    plt.ylabel('')
    if i==2:
        plt.xlabel('mean number of days between any 2 observations')
    else:
        plt.xlabel('')
    plt.ylabel('')
    plt.yticks([])
    plt.title(mvt, size=10)
    #plt.show();
plt.savefig(os.path.join(path_extracted_data,'Within_between_var.png'),dpi=300,format='png',bbox_inches='tight')