In [2]:
import pandas as pd
import os
import numpy as np
import ast
import math
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

# Rename Ethogram

In [3]:
ethogram_path = "./DATA/Ethogram_Baboons_2024.xlsx"

ethogram = pd.read_excel(ethogram_path)[['Behavior code', 'Key', 'Code', 'Modality', 'Social value']]

In [93]:
ethogram_df = pd.DataFrame()
ethogram_df['old ethogram'] = ethogram['Key'] + ethogram['Behavior code'].str[0]
ethogram_df = ethogram_df[~ethogram_df['old ethogram'].str.endswith('$')]

option = 'modality_social_value' #'Code' or 'modality' 

if option == 'social_value':
  ethogram_df['new ethogram'] = ethogram['Social value']
elif option == 'modality':
  ethogram_df['new ethogram'] = ethogram['Modality']
else:
  ethogram_df['new ethogram'] = ethogram['Code']

In [94]:
ethogram_df.head()

Unnamed: 0,old ethogram,new ethogram
4,cB,B*
5,Ce,T+
6,cr,B*
7,ct,B*
8,Cg,T+


In [95]:
csv_file_path = "Outputs/no_metaunits_all_files_output.csv"

max_columns = 0
with open(csv_file_path, 'r') as file:
    for line in file:
        num_columns = len(line.split(',"'))
        max_columns = max(max_columns, num_columns)
column_names = ['id', 'File name'] + [i for i in range(max_columns-1)]

sequence_df = pd.read_csv(csv_file_path, delimiter=",", names=column_names).drop(0)

sequence_df['id'] = sequence_df['id'].astype(int)
sequence_df = sequence_df.sort_values(by='id')

In [96]:
new_rows= []
for i, row in sequence_df.iterrows():
  aux = []
  for j in row[2:]:
    if not pd.isna(j):
      elem = ast.literal_eval(j)
      unit = elem[1]
      new_ethogram_value = ethogram_df.loc[ethogram_df['old ethogram'] == unit, 'new ethogram'].values[0]
      aux.append((elem[0], new_ethogram_value))
  new_rows.append(aux)
  
new_ethogram_sequence_df = pd.DataFrame(new_rows)
new_ethogram_sequence_df.insert(0, 'id', sequence_df['id'].reset_index(drop=True))
new_ethogram_sequence_df.insert(1, 'File name', sequence_df['File name'].reset_index(drop=True))


In [97]:
print(output_new_ethogram_sequence_df)
output_new_ethogram_sequence_df = f"Outputs/{option}_no_metaunits_sequence.xlsx"
if os.path.exists(output_new_ethogram_sequence_df):
  os.remove(output_new_ethogram_sequence_df)

new_ethogram_sequence_df.to_excel(output_new_ethogram_sequence_df, index=False)

Outputs/modality_no_metaunits_sequence.xlsx


# Action 0.5s - New Ethogram

In [98]:
action_df_path = "Outputs/actions.xlsx"
action_df = pd.read_excel(action_df_path)

In [99]:

f_i_actions = action_df[['f','i']]

def calculate_start_end(my_dict):
    # Check if the dictionary is not empty before accessing values
    if my_dict:
        start = min(action[0] if action[0] is not None else 0 for unit_actions in my_dict.values() for action in unit_actions)
        end = max(action[1] if action[1] is not None else action[0] for unit_actions in my_dict.values() for action in unit_actions)
    else:
        start = end = np.nan
    
    return start, end
start_actions_array = []
end_actions_array = []

def nan_aware_min(a, b):
    if np.isnan(a):
        return b
    elif np.isnan(b):
        return a
    else:
        return min(a, b)

def nan_aware_max(a, b):
    if np.isnan(a):
        return b
    elif np.isnan(b):
        return a
    else:
        return max(a, b)

for i, row in f_i_actions.iterrows():
    f_dict = ast.literal_eval(row['f']) 
    i_dict = ast.literal_eval(row['i'])

    start_f, end_f = calculate_start_end(f_dict)
    start_i, end_i = calculate_start_end(i_dict)

    start_total = nan_aware_min(start_f, start_i)
    end_total = nan_aware_max(end_f, end_i)

    start_actions_array.append(start_total)
    end_actions_array.append(end_total)




final_action_df = action_df.copy()[['Index', 'File name']]
final_action_df['Start'] = start_actions_array
final_action_df['End'] = end_actions_array
final_action_df['Duration'] = final_action_df['End'] - final_action_df['Start']


In [100]:

amount_columns = np.arange(0, math.ceil(max(final_action_df['Duration'])+5), 0.5)
for i in amount_columns:
    final_action_df['S' + str(i)] = ""

In [101]:
def process_dict(unit_actions, start_actions, i):
    result_dict = {}
    for unit_key, actions in unit_actions.items():
        result_actions = []
        for action in actions:
            # print(start_actions)
            action_start = round(action[0] - start_actions[i], 1)
            if action[1] is None:
                action_end = action_start
            else:
                action_end = round(action[1] - start_actions[i], 1)

            action_duration = round(action_end - action_start, 1)
            # print(action,"as",action_start, start_actions[i], "ae", action_end)
            action_duration_counting_half = np.arange(action_start, action_end + 0.5, 0.5)

            for k in range(len(action_duration_counting_half)):
                action_duration_counting_half[k] = round(action_duration_counting_half[k], 2)
                decimal = int(str(action_duration_counting_half[k]).split('.')[1])
                if decimal > 5:
                    action_duration_counting_half[k] += (10 - decimal) * 0.1
                if decimal < 5 and decimal != 0:
                    action_duration_counting_half[k] += (5 - decimal) * 0.1

            for j in action_duration_counting_half:
                column_name = 'S' + str(j)
                result_actions.append(column_name)
        result_dict[unit_key] = result_actions
    return result_dict

def update_dataframe(dataframe, result_dict, i, i_or_f):
    for unit_key, actions in result_dict.items():
        
        for action in actions:
            if dataframe.at[i, action] == '':
                dataframe.at[i, action] = []
            new_ethogram_value = ethogram_df.loc[ethogram_df['old ethogram'] == unit_key, 'new ethogram'].values[0]
            dataframe.at[i, action].append(i_or_f+"_"+new_ethogram_value)


for i, row in f_i_actions.iterrows():
    f_dict = ast.literal_eval(row['f'])
    result_f_dict = process_dict(f_dict, start_actions_array, i)
    update_dataframe(final_action_df, result_f_dict, i, "f")


for i, row in f_i_actions.iterrows():
    i_dict = ast.literal_eval(row['i'])
    result_i_dict = process_dict(i_dict, start_actions_array, i)
    update_dataframe(final_action_df, result_i_dict, i, "i")


In [102]:
for column in final_action_df.columns[5:]:
  final_action_df[column] = final_action_df[column].apply(lambda x: x if x !="" else np.nan)
final_action_df.head()

Unnamed: 0,Index,File name,Start,End,Duration,S0.0,S0.5,S1.0,S1.5,S2.0,...,S18.0,S18.5,S19.0,S19.5,S20.0,S20.5,S21.0,S21.5,S22.0,S22.5
0,1,01_Lomé.Nekketsu_23.01.23_matin_BI,22.292,33.179,10.887,[i_B*],,[i_T+],[i_T+],"[f_T+, i_T+]",...,,,,,,,,,,
1,2,02_Lips.Pipo_25.01.23_matin_UNI,321.431,329.48,8.049,[f_B*],,[f_B*],,[f_T+],...,,,,,,,,,,
2,3,03_Bobo.Mako_26.01.23_matin_UNI,28.276,45.573,17.297,[i_L*],[i_L*],[i_L*],[i_L*],"[i_L*, i_B*]",...,[i_T+],,,,,,,,,
3,4,04_Angèle.Violette_26.01.23_matin_UNI,267.951,281.075,13.124,"[f_B*, f_L*]",[f_L*],[f_L*],[f_L*],"[f_L*, f_T+]",...,,,,,,,,,,
4,5,05_Bobo.Muse_30.01.23_matin_UNI,125.235,139.75,14.515,[i_L*],[i_L*],[i_L*],[i_L*],"[i_L*, i_T+]",...,,,,,,,,,,


## Dataframe saving

In [103]:
output_mapping = {
    'social_value': "Outputs/social_value_sequence_per_second.xlsx",
    'modality': "Outputs/modality_sequence_per_second.xlsx",
    'modality_social_value': "Outputs/modality_social_value_sequence_per_second.xlsx",
}

selected_option = option if option in output_mapping else 'modality_social_value'
 
output = output_mapping[selected_option]

if os.path.exists(output):
    os.remove(output)

result_sequence = final_action_df.copy()
result_sequence.to_excel(output, index=False)
result_sequence.head()


Unnamed: 0,Index,File name,Start,End,Duration,S0.0,S0.5,S1.0,S1.5,S2.0,...,S18.0,S18.5,S19.0,S19.5,S20.0,S20.5,S21.0,S21.5,S22.0,S22.5
0,1,01_Lomé.Nekketsu_23.01.23_matin_BI,22.292,33.179,10.887,[i_B*],,[i_T+],[i_T+],"[f_T+, i_T+]",...,,,,,,,,,,
1,2,02_Lips.Pipo_25.01.23_matin_UNI,321.431,329.48,8.049,[f_B*],,[f_B*],,[f_T+],...,,,,,,,,,,
2,3,03_Bobo.Mako_26.01.23_matin_UNI,28.276,45.573,17.297,[i_L*],[i_L*],[i_L*],[i_L*],"[i_L*, i_B*]",...,[i_T+],,,,,,,,,
3,4,04_Angèle.Violette_26.01.23_matin_UNI,267.951,281.075,13.124,"[f_B*, f_L*]",[f_L*],[f_L*],[f_L*],"[f_L*, f_T+]",...,,,,,,,,,,
4,5,05_Bobo.Muse_30.01.23_matin_UNI,125.235,139.75,14.515,[i_L*],[i_L*],[i_L*],[i_L*],"[i_L*, i_T+]",...,,,,,,,,,,


In [104]:
social_value_sequence_timelapse = pd.read_excel(output_mapping['social_value'])
modality_sequence_timelapse = pd.read_excel(output_mapping['modality'])
modality_social_value_sequence_timelapse = pd.read_excel(output_mapping['modality_social_value'])


# final_action_df = social_value_sequence.copy()
# final_action_df = modality_sequence.copy()
# final_action_df = modality_social_value_sequence.copy()

## Run for Modality, then Social Value, then Modality+Social Value

In [105]:
import numpy as np

mean_action_second = final_action_df.iloc[:, :5].copy()

for column in final_action_df.columns[5:]:
    mean_action_second[column] = final_action_df[column].apply(
        lambda x: len(set(item[2:] for item in x)) if isinstance(x, list) else np.nan
    )


In [106]:
mean_action_second.head()

Unnamed: 0,Index,File name,Start,End,Duration,S0.0,S0.5,S1.0,S1.5,S2.0,...,S18.0,S18.5,S19.0,S19.5,S20.0,S20.5,S21.0,S21.5,S22.0,S22.5
0,1,01_Lomé.Nekketsu_23.01.23_matin_BI,22.292,33.179,10.887,1,,1,1.0,1,...,,,,,,,,,,
1,2,02_Lips.Pipo_25.01.23_matin_UNI,321.431,329.48,8.049,1,,1,,1,...,,,,,,,,,,
2,3,03_Bobo.Mako_26.01.23_matin_UNI,28.276,45.573,17.297,1,1.0,1,1.0,2,...,1.0,,,,,,,,,
3,4,04_Angèle.Violette_26.01.23_matin_UNI,267.951,281.075,13.124,2,1.0,1,1.0,2,...,,,,,,,,,,
4,5,05_Bobo.Muse_30.01.23_matin_UNI,125.235,139.75,14.515,1,1.0,1,1.0,2,...,,,,,,,,,,


If focal is emitting two B unit there's no overlap. But if it's two B and V there is!
If focal is emitting two B unit there's no overlap. But if the individual is emitting a B, still there's no overlap.
If focal is emitting two B unit there's no overlap. But if the individual is emitting a V, there's an overlap.

In [107]:
import ast
import pandas as pd
import numpy as np

mean_list, min_list, max_list, sd_list, len_list, overlap_list, len_list_f, len_list_i, overlap_list_inter, overlap_list_intra_f, overlap_list_intra_i, overlap_list_intra = [], [], [], [], [], [], [], [], [], [], [], []

last_column_not_nan = []



for i, row in final_action_df.iloc[:, 5:].iterrows():
    count = count_overlap = count_f = count_i = count_inter = count_intra_f = count_intra_i = count_intra = 0
    column_not_nan = 0
    unique_values_row = []

    for j, elements in enumerate(row):
        count_unique_values = 0
        unique_values_cell = set()


        if type(elements) is list and not pd.isna(elements).any():
            
            elem = elements
            count += 1
            unique_values_cell.update(set([unit[2:] for unit in elem]))  # update unique values for the cell
            
            # focal len :
            if "f" in [unit[0] for unit in elem]:
                count_f += 1
                
                # intra individual overlap (in this case, we don't care about who is emitting the unit, just about the unit)
                unique_units = set([unit[2:] for unit in elem])
                
                if len(unique_units) > 1:
                    # More than one type and one of them is from i
                    if "i" in [unit[0] for unit in elem]:
                        count_inter += 1 

                    # More than one type and more than one of them is from f
                    if [unit[0] for unit in elem].count("f") > 1 and len(set([unit[2:] for unit in elem if unit[0] == "f"])) > 1:
                        count_intra_f += 1
                        
            # indiv len :
            if "i" in [unit[0] for unit in elem]:
                count_i += 1
                
                # intra individual overlap (in this case, we don't care about who is emitting the unit, just about the unit)
                unique_units = set([unit[2:] for unit in elem])
                
                if len(unique_units) > 1:
                    # More than one type and more than one of them is from f
                    if [unit[0] for unit in elem].count("i") > 1 and len(set([unit[2:] for unit in elem if unit[0] == "i"])) > 1:
                        count_intra_i += 1
            # overlap len:
            if len(unique_values_cell) > 1:
                
                count_overlap += 1
                
            # overlap intra total :
            if ([unit[0] for unit in elem].count("f") > 0 and len(set([unit[2:] for unit in elem if unit[0] == "f"])) > 1 ) or ([unit[0] for unit in elem].count("i") > 0 and len(set([unit[2:] for unit in elem if unit[0] == "i"])) > 1):
                count_intra += 1
            
            column_not_nan = j + 1
            count_unique_values += len(unique_values_cell)
            unique_values_row.append(count_unique_values)
            # print(elements)
            # print("set", unique_values_cell)
            # print("Overlap:", count_overlap)
            # print("Overlap count_inter:", count_inter)
            # print("Overlap count_intra_f:", count_intra_f)
            # print("Overlap count_intra_i:", count_intra_i)
            # print("Overlap count_intra:", count_intra)
            
    mean_action_second.iloc[i, 5:column_not_nan + 5] = mean_action_second.iloc[i, 5:column_not_nan + 5].map(lambda x: 0 if pd.isna(x) else x)
    
    # mean_list.append(sum(unique_values_row)/column_not_nan)
    # min_list.append(min(unique_values_row))
    # max_list.append(max(unique_values_row))
    # sd_list.append(np.std(unique_values_row))
    
    mean_list.append(np.mean(mean_action_second.iloc[i, 5:column_not_nan+5]))
    min_list.append(mean_action_second.iloc[i, 5:column_not_nan+5].min())
    max_list.append(mean_action_second.iloc[i, 5:column_not_nan+5].max())
    # print(mean_action_second.iloc[i, 5:column_not_nan+5].max())
    sd_list.append(np.std(mean_action_second.iloc[i, 5:column_not_nan+5]))
    last_column_not_nan.append(column_not_nan)        
    len_list.append(count)
    len_list_f.append(count_f)
    len_list_i.append(count_i)
    overlap_list.append(count_overlap)
    overlap_list_inter.append(count_inter)
    overlap_list_intra_f.append(count_intra_f)
    overlap_list_intra_i.append(count_intra_i)
    overlap_list_intra.append(count_intra)

# print(max_list)

# print(last_column_not_nan)
# percentages : 
percent_f = [(len_list_f[i] / last_column_not_nan[i]) for i in range(len(last_column_not_nan))]
percent_i = [(len_list_i[i] / last_column_not_nan[i]) for i in range(len(last_column_not_nan))]
percent_overlap = [(overlap_list[i] / last_column_not_nan[i]) for i in range(len(last_column_not_nan))]
percent_inter_overlap = [(overlap_list_inter[i] / last_column_not_nan[i]) for i in range(len(last_column_not_nan))]

percent_intra_f_overlap = [(overlap_list_intra_f[i] / last_column_not_nan[i]) for i in range(len(last_column_not_nan))]
percent_intra_f_overlap_in_f = [(0 if len_list_f[i] == 0 else overlap_list_intra_f[i] / len_list_f[i]) for i in range(len(len_list_f))]

percent_intra_i_overlap = [(overlap_list_intra_i[i] / last_column_not_nan[i]) for i in range(len(last_column_not_nan))]
percent_intra_i_overlap_in_i = [(0 if len_list_i[i] == 0 else overlap_list_intra_i[i] / len_list_i[i]) for i in range(len(len_list_i))]

percent_intra_overlap = [(overlap_list_intra[i] / last_column_not_nan[i]) for i in range(len(last_column_not_nan))]


In [108]:
mean_action_second['Mean different units'] = mean_list
mean_action_second['Min different in a moment'] = min_list
mean_action_second['Max different in a moment'] = max_list
mean_action_second['Standard Deviation'] = sd_list


mean_action_second["Total States"] = last_column_not_nan
mean_action_second["Len States Focal"] = len_list_f
mean_action_second["Percentage of Focal States"] = percent_f
mean_action_second["Len States Individual"] = len_list_i
mean_action_second["Percentage of Individual States"] = percent_i
mean_action_second["Number Overlaps"] = overlap_list
mean_action_second["Percentage of Overlaps"] = percent_overlap

mean_action_second["Number of Inter-Individual Overlaps"] = overlap_list_inter
mean_action_second["Percentage of Inter-Individual Overlaps"] = percent_inter_overlap
mean_action_second["Number of Intra Focal Overlaps"] = overlap_list_intra_f
mean_action_second["Percentage of Intra Focal Overlaps"] = percent_intra_f_overlap
mean_action_second["Percentage of Intra Focal Overlaps Just Focal State"] = percent_intra_f_overlap_in_f


mean_action_second["Number of Intra Individual Overlaps"] = overlap_list_intra_i
mean_action_second["Percentage of Intra Individual Overlaps"] = percent_intra_i_overlap
mean_action_second["Percentage of Intra Individual Overlaps Just Focal State"] = percent_intra_i_overlap_in_i


mean_action_second["Number of Intra Overlaps for Both"] = overlap_list_intra
mean_action_second["Percentage of Intra Overlaps for Both"] = percent_intra_overlap


# mean_action_second.iloc[:, -25:]

In [109]:
output = f"Outputs/{option}_details_action_per_second.xlsx"
print(output)
if os.path.exists(output):
  os.remove(output)

mean_action_second.to_excel(output, index=False)

Outputs/modality_social_value_details_action_per_second.xlsx


In [110]:
# options = ['Social value', 'Modality', 'Code']
# for option in options:
  

# Sentence len + tokens

In [111]:
modality_sequence = pd.read_excel("Outputs/modality_no_metaunits_sequence.xlsx")
social_value_sequence = pd.read_excel("Outputs/social_value_no_metaunits_sequence.xlsx")
modality_social_value_sequence = pd.read_excel("Outputs/modality_social_value_no_metaunits_sequence.xlsx")

sequences_dict = {
    'modality_len_token': modality_sequence,
    'social_value_len_token': social_value_sequence,
    'modality_social_value_len_token': modality_social_value_sequence
}




In [112]:
def length_and_tokens(key, df):
  
  focal_length, individual_length, all_tokens, focal_tokens, individual_tokens, both_tokens = [],[],[],[],[],[]
  aux_df = df.copy()
  
  for i, row in df.iterrows():
    len_f, token_f, len_i, token_a, token_i, token_both = [], [], [], [], [], []
    
    for elem in row[2:-2] :
        if not pd.isna(elem): 
            # count+=1
            if elem.split("'")[3] not in token_both :
                token_both.append(elem.split("'")[3])
            if elem not in token_a: #.split("'")[3]
                token_a.append(elem)
            if elem.split("'")[1]=='f' :
                len_f.append(elem)
                if elem.split("'")[3] not in token_f:
                    token_f.append(elem.split("'")[3])
            if elem.split("'")[1]=='i' :
                len_i.append(elem)
                if elem.split("'")[3] not in token_i :
                    token_i.append(elem.split("'")[3])
    all_tokens.append(len(token_a))
    focal_length.append(len(len_f))
    individual_length.append(len(len_i))
    focal_tokens.append(len(token_f))
    individual_tokens.append(len(token_i))
    both_tokens.append(len(token_both))
  

  aux_df["Length"] = df.iloc[:, 2:].count(axis=1)
  aux_df["Focal Sequence Length"] = focal_length
  aux_df["Individual Sequence Length"] = individual_length
  aux_df["Total of Tokens"] = all_tokens
  aux_df["Focal Tokens"] = focal_tokens
  aux_df["Individual Tokens"] = individual_tokens
  aux_df["Both Tokens"] = both_tokens

  
  excel_string = f"Outputs/{key}.xlsx"
  if os.path.exists(excel_string):
    os.remove(excel_string)
  aux_df.to_excel(excel_string, index=False)
  return aux_df

In [113]:
for key, df in sequences_dict.items():
  aux_df = length_and_tokens(key, df)
  
aux_df.head()

Unnamed: 0,id,File name,0,1,2,3,Length,Focal Sequence Length,Individual Sequence Length,Total of Tokens,Focal Tokens,Individual Tokens,Both Tokens
0,1,01_Lomé.Nekketsu_23.01.23_matin_BI,"('i', 'B*')","('i', 'T+')","('f', 'T+')",,3,0,2,2,0,2,2
1,2,02_Lips.Pipo_25.01.23_matin_UNI,"('f', 'B*')","('f', 'B*')","('f', 'T+')",,3,2,0,1,1,0,1
2,3,03_Bobo.Mako_26.01.23_matin_UNI,"('i', 'L*')","('i', 'B*')","('i', 'T+')","('f', 'B*')",4,0,2,2,0,2,2
3,4,04_Angèle.Violette_26.01.23_matin_UNI,"('f', 'B*')","('f', 'L*')","('f', 'T+')","('f', 'T+')",4,2,0,2,2,0,2
4,5,05_Bobo.Muse_30.01.23_matin_UNI,"('i', 'L*')","('i', 'T+')","('i', 'T+')",,3,0,2,2,0,2,2
