Our endgoal is to both merge the timebudget exports and the aggregated events with both the playback plan and the manually collected metadata.

For that we need the path to these files. You can set them to paths as shown at the top of Windows explorer.

In [18]:
import pandas as pd
import os

TIME_BUDGET_FOLDER = "data export/data_export_30_sec_cut_off/time budget/single xlsx"
AGGREGATED_EVENTS_FILE = "data export/data_export_30_sec_cut_off/aggregated events/aggregated_events_01.09.25.xlsx"
PLAYBACK_PLAN_FILE = "data export/playback_plan_filled_out.xlsx"
MANUAL_METADATA_FILE = "data export/playback_plan_BORIS_blind_coding_for analysis.xlsx"

OUTPUT_TIMEBUDGET = "output_timebudget.xlsx"
OUTPUT_AGGREGATED_EVENTS = "output_aggregated_events.xlsx"

Timebudgets are exported as indidivual files per observation id. Merge them all into one and add an ID keeping the origin.

These contain all behaviors for all subjects, even if the subject didn't exist in the specific observation or it didn't show the behavior. So we remove all entries where the "Total number of occurences" is 0.

There are some observations where two cameras where used, they have _a and _b suffixes that should be removed from the ID.

In [19]:
# This is a function we can reuse later to do the same thing for the events
def strip_ab_and_create_unique_id(df: pd.DataFrame):
    df["Observation id"] = df["Observation id"].str.rstrip("_ab")
    df["uid"] = df["Observation id"].str.cat(df["Subject"], sep="_")
    return df

def remove_nonexistant(df: pd.DataFrame):
    return df[df["Total number of occurences"] > 0]

# Create an iterator over all files in the timebudget folder
files = map(lambda x: os.path.join(TIME_BUDGET_FOLDER, x), os.listdir(TIME_BUDGET_FOLDER))
# Open them as excel files
excels = map(pd.read_excel, files)
# Apply the function to all
without_nonexistant = map(remove_nonexistant, excels)
# Again apply the function to all
with_ids = map(strip_ab_and_create_unique_id, without_nonexistant)

# Concatenate all individual files
merged_timebudget = pd.concat(with_ids).reset_index().set_index("Observation id")
merged_timebudget

Unnamed: 0_level_0,index,Observation date,Description,Time budget start,Time budget stop,Time budget duration,Subject,Behavior,Modifiers,Total number of occurences,Total duration (s),Duration mean (s),Duration std dev,inter-event intervals mean (s),inter-event intervals std dev,% of total length,uid
Observation id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
01_01.10.2024,3,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 1,latency to look,,1,1.559,1.559,,,,1.2,01_01.10.2024_Focal 1
01_01.10.2024,4,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 1,latency to move away,,1,7.839,7.839,,,,5.8,01_01.10.2024_Focal 1
01_01.10.2024,5,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 1,raise head - looking at speaker,,1,6.279,6.279,,,,4.7,01_01.10.2024_Focal 1
01_01.10.2024,7,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 2,latency to move away,,1,5.319,5.319,,,,3.9,01_01.10.2024_Focal 2
01_02.10.2024,3,2024-12-0311:11:14.025,,0.0,113.36,113.36,Focal 1,latency to look,,1,4.200,4.200,,,,3.7,01_02.10.2024_Focal 1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
09_07.10.2024,7,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 2,latency to move away,,1,29.080,29.080,,,,5.4,09_07.10.2024_Focal 2
09_07.10.2024,8,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 2,raise head - looking at speaker,,2,15.158,7.579,5.346,0.001,,2.8,09_07.10.2024_Focal 2
09_07.10.2024,9,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 3,latency to look,,1,14.159,14.159,,,,2.6,09_07.10.2024_Focal 3
09_07.10.2024,10,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 3,latency to move away,,1,21.879,21.879,,,,4.0,09_07.10.2024_Focal 3


From the aggregated events export we need to remove any rows where there is no subject, then pivot the table on the Behavior columns, counting the occurences.

In [20]:
aggregated_events = pd.read_excel(AGGREGATED_EVENTS_FILE)
agg = strip_ab_and_create_unique_id(aggregated_events)
agg = agg[agg["Subject"] != "No focal subject"]

# This shows the table before pivoting, if you remove the #
# display(agg)

counted_events = agg.pivot_table(
    index=["uid", "Observation id", "Subject"], columns="Behavior", aggfunc="size", fill_value=0
).reset_index().set_index("Observation id")
counted_events

Behavior,uid,Subject,Mobile,alarm call,approach speaker,end of observation,feeding,get up,grazing/browsing,grouping,...,raise head - looking at speaker,raise head - other direction,resting,resting/lying down,run away,standing inattentive,standing relaxed,standing vigilent/attentive,travelling/on the move,vigilance
Observation id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01_01.10.2024,01_01.10.2024_Focal 1,Focal 1,0,0,0,1,0,0,0,0,...,1,0,0,0,0,0,0,1,0,1
01_01.10.2024,01_01.10.2024_Focal 2,Focal 2,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,1,1,0,1
01_02.10.2024,01_02.10.2024_Focal 1,Focal 1,0,0,0,1,1,0,1,0,...,2,2,0,0,0,0,0,0,0,0
01_02.10.2024,01_02.10.2024_Focal 2,Focal 2,0,0,0,1,1,0,1,0,...,2,0,0,0,0,0,0,0,0,0
01_04.10.2024,01_04.10.2024_Focal 1,Focal 1,1,0,3,1,2,0,2,0,...,7,3,0,0,1,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
09_07.10.2024,09_07.10.2024_Focal 2,Focal 2,0,0,0,1,2,0,1,0,...,2,2,0,0,0,2,0,0,0,1
09_07.10.2024,09_07.10.2024_Focal 3,Focal 3,0,0,0,1,0,0,2,1,...,1,1,0,0,0,1,0,1,0,1
10_07.10.2024,10_07.10.2024_Focal 1,Focal 1,0,0,0,1,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
10_07.10.2024,10_07.10.2024_Focal 2,Focal 2,0,0,0,1,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0


The playback plan contains lots of data, but we are currently only interested in the stimulus category and stimulus. These we need to match on the Observation id, so we also create one here.

In [21]:
playback_plan = pd.read_excel(PLAYBACK_PLAN_FILE)

padded_experiment_number = playback_plan["experiment number"].astype(str).str.zfill(2)

playback_plan["Observation id"] = padded_experiment_number.str.cat(playback_plan["date"].dt.strftime("%d.%m.%Y"), sep="_")
playback_plan = playback_plan.set_index("Observation id")
playback_plan = playback_plan[["stim. cat.", "stimulus"]]
playback_plan

Unnamed: 0_level_0,stim. cat.,stimulus
Observation id,Unnamed: 1_level_1,Unnamed: 2_level_1
01_10.09.2024,1b,1b_4
02_10.09.2024,4,4_9
03_10.09.2024,3b,3b_5
01_11.09.2024,2,2_7
02_11.09.2024,1c,1c_5
...,...,...
03_15.10.2024,2,2_11
04_15.10.2024,1c,1c_1
01_16.10.2024,3b,3b_1
02_16.10.2024,1b,1b_5



Handwritten metadata has one row per observation. So it contains data belonging to the observation itself and the individual focals. The tricky part is the way that this is organized, as we have columns name "focal 1" through "focal 5", which contain the values for said subjects.

In [22]:
# These need to be merged onto all obervations
unspecific_columns = [
    "experiment number",
    "date",
    "time",
    "location (latitude)",
    "location (longitude)",
    "groupsize category",
    "other species present",
    "group comp.",
    "habitat",
    "group 30sec-looking",
    "group 30sec-moving",
    "temp",
    "wind",
    "dist. to speaker",
    "car side"
]

manual_metadata = pd.read_excel(MANUAL_METADATA_FILE)

# Also strip _ab here
manual_metadata["Observation id"] = manual_metadata["Observation id"].str.rstrip("_ab")
manual_metadata = manual_metadata.set_index("Observation id")

# This function will extract the relevant columns for subject n
def focal_specific(n):
    translation = {
        f"focal {n}": "sex",
        f"f{n} move of": "move of distance",
        f"species focal {n}": "species"
    }
    df = manual_metadata[translation.keys()]
    df = df.rename(columns=translation)
    df = df[df["sex"] != "/"]
    df = df.dropna()
    df["Subject"] = f"Focal {n}"
    return df

# Create a single table with the extracted specifics for focals 1 to 5 (the end of range is exclusive)
focal_metadata = pd.concat([focal_specific(n) for n in range(1, 6)]).reset_index()
display(focal_metadata)

unspecific_metadata = manual_metadata[unspecific_columns]
unspecific_metadata

Unnamed: 0,Observation id,sex,move of distance,species,Subject
0,01_02.10.2024,f,3,impala,Focal 1
1,02_30.09.2024,m,3,wildebeest,Focal 1
2,04_24.09.2024,f,0,impala,Focal 1
3,04_02.10.2024,f,0,impala,Focal 1
4,01_24.09.2024,u,0,wildebeest,Focal 1
...,...,...,...,...,...
204,01_11.10.2024,f,0,impala,Focal 3
205,02_16.10.2024,f,0,impala,Focal 3
206,05_25.09.2024,u,0,wildebeest,Focal 3
207,03_17.09.2024_a,f?,0,zebra,Focal 4


Unnamed: 0_level_0,experiment number,date,time,location (latitude),location (longitude),groupsize category,other species present,group comp.,habitat,group 30sec-looking,group 30sec-moving,temp,wind,dist. to speaker,car side
Observation id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
01_02.10.2024,1,2024-10-02,09:20:00,-24.80525,27.96375,10-20,,"f, sa",semi-open,1,1,24.5,0,30,right
02_30.09.2024,2,2024-09-30,09:14:00,-24.73582,27.89078,1,,m,open,/,/,22.4,3.2,100,behind camera
04_24.09.2024,4,2024-09-24,10:04:00,-24.72586,27.89312,6-10,,f? at least 5 Kudu close by browsing,open,/,/,30.2,0.8,78,slightly left
04_02.10.2024,4,2024-10-02,10:16:00,-24.81601,27.97074,21+,,m + zebra,semi-open,1,0,28.5,1.6,40,left
01_24.09.2024,1,2024-09-24,08:28:00,-24.74021,27.92088,3-5,,?,open,1,0,18.6,0,95,slightly right
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
01_15.10.2024,1,2024-10-15,09:07:00,-24.75130,27.93611,11-20,,mixed,open,1,1,25.2,2.3,82,slightly right
01_11.10.2024,1,2024-10-11,09:25:00,-24.80570,27.95961,6-10,,mixed,bushy,1,0,29.7,2.6,25,right
02_16.10.2024,2,2024-10-16,10:10:00,-24.73344,27.93610,11-20,,"mixed, +zebra, + hartebeest",open,0,0,26.8,1.4,100,behind camera
05_25.09.2024,5,2024-09-25,14:46:00,-24.74003,27.92097,11-20,,mixed,open,0,0,36.1,2.6,45,right


Now we put everything together

In [23]:
def merge_with_data(df: pd.DataFrame):
    with_plan = df.join(other=playback_plan)
    with_meta = with_plan.join(other=unspecific_metadata)
    return pd.merge(left=with_meta, right=focal_metadata, on=["Observation id", "Subject"], how="left")

full_timebudget = merge_with_data(merged_timebudget)
display(full_timebudget)

full_counted_events = merge_with_data(counted_events)
full_counted_events


Unnamed: 0,Observation id,index,Observation date,Description,Time budget start,Time budget stop,Time budget duration,Subject,Behavior,Modifiers,...,habitat,group 30sec-looking,group 30sec-moving,temp,wind,dist. to speaker,car side,sex,move of distance,species
0,01_01.10.2024,3,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 1,latency to look,,...,bushy,/,/,28.0,0.8,73,left,u,2?,zebra
1,01_01.10.2024,4,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 1,latency to move away,,...,bushy,/,/,28.0,0.8,73,left,u,2?,zebra
2,01_01.10.2024,5,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 1,raise head - looking at speaker,,...,bushy,/,/,28.0,0.8,73,left,u,2?,zebra
3,01_01.10.2024,7,2024-12-0609:38:38.882,,0.0,135.00,135.00,Focal 2,latency to move away,,...,bushy,/,/,28.0,0.8,73,left,u,2?,zebra
4,01_02.10.2024,3,2024-12-0311:11:14.025,,0.0,113.36,113.36,Focal 1,latency to look,,...,semi-open,1,1,24.5,0,30,right,f,3,impala
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
331,09_07.10.2024,7,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 2,latency to move away,,...,open,1,1,34.8,4.2,48,left,u,2,zebra
332,09_07.10.2024,8,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 2,raise head - looking at speaker,,...,open,1,1,34.8,4.2,48,left,u,2,zebra
333,09_07.10.2024,9,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 3,latency to look,,...,open,1,1,34.8,4.2,48,left,u,2,zebra
334,09_07.10.2024,10,2024-12-1014:49:09.784,,0.0,540.64,540.64,Focal 3,latency to move away,,...,open,1,1,34.8,4.2,48,left,u,2,zebra


Unnamed: 0,Observation id,uid,Subject,Mobile,alarm call,approach speaker,end of observation,feeding,get up,grazing/browsing,...,habitat,group 30sec-looking,group 30sec-moving,temp,wind,dist. to speaker,car side,sex,move of distance,species
0,01_01.10.2024,01_01.10.2024_Focal 1,Focal 1,0,0,0,1,0,0,0,...,bushy,/,/,28.0,0.8,73,left,u,2?,zebra
1,01_01.10.2024,01_01.10.2024_Focal 2,Focal 2,0,0,0,1,0,0,0,...,bushy,/,/,28.0,0.8,73,left,u,2?,zebra
2,01_02.10.2024,01_02.10.2024_Focal 1,Focal 1,0,0,0,1,1,0,1,...,semi-open,1,1,24.5,0,30,right,f,3,impala
3,01_02.10.2024,01_02.10.2024_Focal 2,Focal 2,0,0,0,1,1,0,1,...,semi-open,1,1,24.5,0,30,right,f,3,impala
4,01_04.10.2024,01_04.10.2024_Focal 1,Focal 1,1,0,3,1,2,0,2,...,open,1,0,25.3,0,66,right,u,1,zebra
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,09_07.10.2024,09_07.10.2024_Focal 2,Focal 2,0,0,0,1,2,0,1,...,open,1,1,34.8,4.2,48,left,u,2,zebra
206,09_07.10.2024,09_07.10.2024_Focal 3,Focal 3,0,0,0,1,0,0,2,...,open,1,1,34.8,4.2,48,left,u,2,zebra
207,10_07.10.2024,10_07.10.2024_Focal 1,Focal 1,0,0,0,1,1,0,1,...,open,0,0,34.2,2,95,right,f,0,impala
208,10_07.10.2024,10_07.10.2024_Focal 2,Focal 2,0,0,0,1,1,0,1,...,open,0,0,34.2,2,95,right,f,0,impala


In [24]:
full_timebudget.to_excel(OUTPUT_TIMEBUDGET)
full_counted_events.to_excel(OUTPUT_AGGREGATED_EVENTS)