In [None]:
#basic package
import os
import numpy as np
import pandas as pd
import sys
import time
import datetime as dt
import pickle

#plot
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
PACKAGE_PARENT = '../'
SCRIPT_DIR = os.path.dirname(os.path.realpath(os.path.join(os.getcwd(), os.path.expanduser('__file__'))))
sys.path.append(os.path.normpath(os.path.join(SCRIPT_DIR, PACKAGE_PARENT)))
from UTILS import kmeans_clustering, sampen, chi2_distance, ts_visual, correct_key, chi2_distance, most_frequent, mssd
import config_origins as config

In [None]:
print('change the configuration file if not done yet!')
path_extracted_data = config.path_extracted_data
id_run = config.id_run
path_extracted_data_adap = os.path.join(path_extracted_data,'Adaptability')
print(id_run)

# Download data

In [None]:
#selected animals
li_selected_hens = pickle.load(open(os.path.join(path_extracted_data_adap, 'li_selected_hens.pkl'), 'rb'))
print(len(li_selected_hens))

In [None]:
#focal birds info to control for initial weight and rearingPenID
df_FB = pd.read_csv(os.path.join(config.path_extracted_data, id_run+'df_FOCALBIRDS.csv'), sep=';')
df_FB['early_death'] = df_FB['early_death'].fillna(0).replace(2,1) #2 means not sure
df_FB.rename(columns={'29-09 weight':'InitialWeight','R-Pen':'RearingPenID'}, inplace=True)
display(df_FB['early_death'].value_counts())
print(df_FB.shape)
df_FB.head(3)

In [None]:
#daily movement variables
df_MVT_ALL = pd.read_csv(os.path.join(path_extracted_data, id_run+'_daily_ALL_variables_verified.csv'), sep=';', 
                     parse_dates=['level'], dayfirst=True) 
print(df_MVT_ALL.shape)
df_MVT_ALL = df_MVT_ALL[df_MVT_ALL['HenID'].isin(li_selected_hens)]
#restrict to the adaptability study timeframe
df_MVT_ALL = df_MVT_ALL[df_MVT_ALL['level']<=config.max_date_adaptability]
dico_cl = {'EPI':'Other','MEXP':'MEXP','LEXP':'LEXP','LEXPLOST':'Other','MEXPLOST':'Other'}
df_MVT_ALL['CLASS'] = df_MVT_ALL['CLASS'].map(lambda x: dico_cl[x])
display(df_MVT_ALL['CLASS'].value_counts())
#make sure we have their mvt
df_MVT_ALL = df_MVT_ALL[~df_MVT_ALL['perc_duration_5_Zone'].isnull()]
print(df_MVT_ALL.shape)
df_MVT_ALL.head(3)

In [None]:
#weather conditions
path_weather = os.path.join(r'C:\Users\camil\Desktop\vm_exchange\PhD_Data\Weather','Weather_process.csv')
df_weather_h = pd.read_csv(path_weather, sep=';', parse_dates=['date','Timestamp'])
df_weather_h = df_weather_h.sort_values('Timestamp') 
df_weather_h = df_weather_h.drop(['date'], axis=1)
df_weather_h['Timestamp_weather'] = df_weather_h['Timestamp'].copy()
print(df_weather_h.shape)
display(df_weather_h.head(3))

# Add external temperature as environemntal factor

In [None]:
#check variation over the day... exist very much
df_weather_h['date'] = df_weather_h['Timestamp'].map(lambda x: dt.datetime.date(x))
df_wday = df_weather_h[df_weather_h['hour'].isin([11,12,13,14,15,16])].groupby(['date'])['temperature_C'].agg(lambda x: list(x)).reset_index()
df_wday['temperature_C_avg'] = df_wday['temperature_C'].map(lambda x: np.mean(x))
df_wday.head(3)

In [None]:
print(df_MVT_ALL.shape)
df_MVT_ALL['date'] = df_MVT_ALL['level'].map(lambda x: dt.datetime.date(x))
df_MVT_ALL = pd.merge(df_MVT_ALL, df_wday, on='date', how='left')
print(df_MVT_ALL.shape)
df_MVT_ALL.head(2)

# Add initial conditions (weight, rearingpenID)

In [None]:
print(df_MVT_ALL.shape)
df_MVT_ALL = pd.merge(df_MVT_ALL, df_FB[['HenID','RearingPenID','InitialWeight']], on='HenID', how='left')
print(df_MVT_ALL.shape)
df_MVT_ALL.head(2)

# Select only needed variable and days & save

In [None]:
#ONLW since WG open to compare apples with apples
print(df_MVT_ALL.shape)
df_MVT = df_MVT_ALL[df_MVT_ALL['level']>=config.date_first_opening_WG]
df_MVT.shape

In [None]:
#df to do PCA on first observation per week per bird 
df_4pca = df_MVT.groupby(['WIB','HenID'])['DIB'].agg(lambda x: min(list(x))).reset_index()
print(df_4pca.shape)
df_4pca.head(3)
df_MVT['tobeusedPCA'] = df_MVT.apply(lambda x: int(df_4pca[(df_4pca['HenID']==x['HenID'])&\
                                                       (df_4pca['WIB']==x['WIB'])]['DIB'].values[0]==x['DIB']), axis=1)
display(df_MVT['tobeusedPCA'].value_counts())
#small verification: only one value per animal per week
#df_MVT[df_MVT['tobeusedPCA']].groupby(['WIB','HenID']).count().reset_index()

In [None]:
#for stat
dico_tr_ = {'OFH':1, 'TRAN':-1}
df_MVT['Treatment_encoded'] = df_MVT['Treatment'].map(lambda x: dico_tr_[x])

In [None]:
li_var = ['perc_duration_5_Zone', 'perc_duration_4_Zone','perc_duration_3_Zone', 'perc_duration_2_Zone','perc_1_Zone_while_WG_open',
          'nbr_stays_2_Zone_perh', 'nbr_stays_4_Zone_perh','nbr_stays_3_Zone_perh', 'nbr_stays_5_Zone_perh','nbr_stays_1_Zone_perh',
          'in_WG_15mnAfterOpening','distribution_entropy', 'SleepingHeight','vertical_travel_distance_perh']
df_MVT['in_WG_15mnAfterOpening'] = df_MVT['in_WG_15mnAfterOpening'].map(lambda x: int(x))
df_MVT.rename(columns={'R-Pen':'RPen'}, inplace=True)
df_MVT = df_MVT[['WIB','HenID','RPen', 'DIB','CLASS','TrackingSystemID','PenID','temperature_C_avg', 'Treatment',
                'tobeusedPCA','RearingPenID','InitialWeight', 'Treatment_encoded']+li_var].copy()
df_MVT.to_csv(os.path.join(path_extracted_data_adap,'df_MVT_4individuality.csv'), index=False, sep=';')
df_MVT.head(3)

In [None]:
sns.lineplot(y='vertical_travel_distance_perh', x='DIB', data=df_MVT, hue='PenID');#,legend=False);