In [None]:
import numpy as np
from scipy.stats import chi2
import pandas as pd

In [None]:
projectPath = "./data/"
file_path = projectPath + "timeVideo1.csv"
df = pd.read_csv(file_path)
df

Unnamed: 0,TIME,behaviour
0,00:00:00,
1,00:00:15,
2,00:00:30,
3,00:00:45,
4,00:01:00,
...,...,...
4669,19:27:15,
4670,19:27:30,
4671,19:27:45,
4672,19:28:00,


In [None]:
df.describe()

Unnamed: 0,TIME,behaviour
count,4674,2268
unique,4674,27
top,00:00:00,MAN
freq,1,343


**Removing consecutive behaviours' sequences**

In [None]:
df['behaviour_next'] = df['behaviour'].shift(-1)
df = df[df['behaviour'] != df['behaviour_next']]
df = df.drop('behaviour_next', axis=1)

In [None]:
df

Unnamed: 0,TIME,behaviour
0,00:00:00,
1,00:00:15,
2,00:00:30,
3,00:00:45,
4,00:01:00,
...,...,...
4669,19:27:15,
4670,19:27:30,
4671,19:27:45,
4672,19:28:00,


In [None]:
# Remove "veg"
df = df[df["behaviour"] != "veg"]
df

Unnamed: 0,TIME,behaviour
0,00:00:00,
1,00:00:15,
2,00:00:30,
3,00:00:45,
4,00:01:00,
...,...,...
4669,19:27:15,
4670,19:27:30,
4671,19:27:45,
4672,19:28:00,


In [None]:
#Remove NaN
dfNoNan = df[df['behaviour'].notna()]

In [None]:
dfNoNan

Unnamed: 0,TIME,behaviour
20,00:05:00,NIP_OUT
45,00:11:15,BN
58,00:14:30,LAND
97,00:24:15,FAN
99,00:24:45,MAN
...,...,...
4610,19:12:30,BEND
4613,19:13:15,DAN_CIRC
4625,19:16:15,NIP_CORE
4637,19:19:15,INSF


**Calculate metrics in dfNoNan**

In [None]:
sequence1 = dfNoNan["behaviour"].tolist()  # generate list
sequence1

['NIP_OUT',
 'BN',
 'LAND',
 'FAN',
 'MAN',
 'NIP_IN',
 'FAN',
 'MAN',
 'FAN',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_CORE',
 'FAN',
 'NIP_IN',
 'NIP_IN',
 'MAN',
 'SA',
 'NIP_OUT',
 'NIP_OUT',
 'AM',
 'LAND',
 'NIP_IN',
 'NIP_IN',
 'NIP_IN',
 'MAN',
 'SA',
 'SA',
 'BN',
 'LAND',
 'ROLL',
 'INSF',
 'NIP_CORE',
 'LF',
 'H_UP',
 'LF',
 'FAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'NIP_IN',
 'AM',
 'NIP_IN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'NIP_IN',
 'SA',
 'AM',
 'LAND',
 'NIP_CORE',
 'NIP_CORE',
 'MAN',
 'FAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'NIP_IN',
 'NIP_IN',
 'NIP_IN',
 'SA',
 'BN',
 'LAND',
 'NIP_CORE',
 'NIP_IN',
 'NIP_IN',
 'NIP_IN',
 'NIP_IN',
 'SA',
 'NIP_OUT',
 'NIP_OUT',
 'NIP_OUT',
 'BN',
 'AM',
 'MAN',
 'NIP_IN',
 'SA',
 'BN',
 'LAND',
 'FAN',
 'FAN',
 'NIP_CORE',
 'ROLL',
 'UP',
 'BK_SW',
 'ROLL',
 'LAND',
 'LAND',
 'H_DOWN',
 'FS',
 'SHAK',
 'LF',
 'SHAK',
 'FS',
 'SHAK',
 'SHAK',
 'FS',
 'DAN_CIRC',
 'NIP_CORE',
 'NIP_CORE',
 'BEND',
 'MAF',
 'BEN

In [None]:
states1 = list(set(sequence1))  # Unique states
states1

['GB',
 'H_UP',
 'CIRC',
 'BEND',
 'INSF',
 'SPA',
 'BK_SW',
 'H_DOWN',
 'MAF',
 'LAND',
 'NIP_CORE',
 'CHA/flee',
 'NIP_IN',
 'SA',
 'FAN',
 'DAN_CIRC',
 'LF',
 'UP',
 'FS',
 'SHAK',
 'AM',
 'NIP_OUT',
 'ROLL',
 'MAN',
 'BEND_OPP',
 'BN']

In [None]:
def get_transition_counts(seq, order):
    counts = {}
    for i in range(len(seq) - order):
        state = tuple(seq[i:i+order])
        next_state = seq[i+order]
        if state not in counts:
            counts[state] = {s: 0 for s in states1}
        counts[state][next_state] += 1
    return counts

In [None]:
# Calculate metrics
def calculate_metrics(seq, order):
    counts = get_transition_counts(seq, order)
    chi_square = 0
    df = 0
    for state, next_states in counts.items():
        total = sum(next_states.values())
        expected = total / len(states)
        for next_state, observed in next_states.items():
            chi_square += (observed - expected) ** 2 / expected
        df += len(states) - 1
    p_value = 1 - chi2.cdf(chi_square, df)
    log_likelihood = 0
    for state, next_states in counts.items():
        total = sum(next_states.values())
        for next_state, count in next_states.items():
            if count > 0:
                log_likelihood += count * np.log(count / total)
    k = len(states)**(order + 1)
    aic = 2*k - 2*log_likelihood
    bic = np.log(len(seq))*k - 2*log_likelihood
    return chi_square, df, p_value, aic, bic

In [None]:
# Test for order 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 396.83
 Degrees of freedom: 26
 p-value: 0.0000
 AIC: 1706.46
 BIC: 1806.46

Order 1:
 Chi-square statistic: 2413.98
 Degrees of freedom: 676
 p-value: 0.0000
 AIC: 2311.77
 BIC: 5011.83

Order 2:
 Chi-square statistic: 4697.65
 Degrees of freedom: 3016
 p-value: 0.0000
 AIC: 39764.10
 BIC: 112665.65

Order 3:
 Chi-square statistic: 6219.90
 Degrees of freedom: 4888
 p-value: 0.0000
 AIC: 1063064.36
 BIC: 3031406.22



**Calculate metrics in each behaviour group**

In [None]:
Courtship_all = ['BEND', 'DAN_CIRC', 'ROLL', 'GB', 'LF', 'BK_SW', 'BEND_OPP', 'H_DOWN', 'H_UP', 'MAF', 'UP', 'INSF', 'SPA']
Nest_mantainance = ['SA', 'NIP_OUT', 'NIP_IN', 'BN', 'AM', 'LAND', 'MAN', 'NIP_CORE' ]
Egg_care = ['FAN', 'NIP_CORE', 'INSF']
Agonism = ['CHA/flee', 'MAF']

**COURTSHIP**

In [None]:
Courtship_df = df[df["behaviour"].isin(Courtship_all)]
Courtship_df

Unnamed: 0,TIME,behaviour
518,02:09:30,ROLL
519,02:09:45,ROLL
520,02:10:00,ROLL
521,02:10:15,ROLL
522,02:10:30,ROLL
...,...,...
4633,19:18:15,INSF
4634,19:18:30,INSF
4635,19:18:45,INSF
4636,19:19:00,INSF


In [None]:
Courtship_df['behaviour_next'] = Courtship_df['behaviour'].shift(-1)
Courtship_df = Courtship_df[Courtship_df['behaviour'] != Courtship_df['behaviour_next']]
Courtship_df = Courtship_df.drop('behaviour_next', axis=1)
Courtship_df

Unnamed: 0,TIME,behaviour
533,02:13:15,ROLL
540,02:15:00,INSF
577,02:24:15,LF
587,02:26:45,H_UP
589,02:27:15,LF
...,...,...
4599,19:09:45,GB
4604,19:11:00,SPA
4610,19:12:30,BEND
4613,19:13:15,DAN_CIRC


In [None]:
sequence = Courtship_df["behaviour"].tolist()
sequence

['ROLL',
 'INSF',
 'LF',
 'H_UP',
 'LF',
 'ROLL',
 'UP',
 'BK_SW',
 'ROLL',
 'H_DOWN',
 'LF',
 'DAN_CIRC',
 'BEND',
 'MAF',
 'BEND',
 'DAN_CIRC',
 'ROLL',
 'BEND',
 'DAN_CIRC',
 'GB',
 'MAF',
 'BEND',
 'BK_SW',
 'ROLL',
 'H_UP',
 'UP',
 'BEND',
 'ROLL',
 'UP',
 'BEND',
 'ROLL',
 'BK_SW',
 'ROLL',
 'BK_SW',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'GB',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'GB',
 'LF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'GB',
 'BEND',
 'ROLL',
 'BEND',
 'ROLL',
 'BEND_OPP',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'ROLL',
 'BK_SW',
 'MAF',
 'BK_SW',
 'LF',
 'MAF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'DAN_CIRC',
 'BEND',
 'GB',
 'SPA',
 'BEND',
 'GB'

In [None]:
states = list(set(sequence))
states

['UP',
 'H_DOWN',
 'BEND_OPP',
 'SPA',
 'DAN_CIRC',
 'H_UP',
 'BK_SW',
 'GB',
 'BEND',
 'MAF',
 'INSF',
 'ROLL',
 'LF']

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 174.98
 Degrees of freedom: 12
 p-value: 0.0000
 AIC: 622.79
 BIC: 661.49

Order 1:
 Chi-square statistic: 686.65
 Degrees of freedom: 156
 p-value: 0.0000
 AIC: 646.74
 BIC: 1149.81

Order 2:
 Chi-square statistic: 1118.53
 Degrees of freedom: 528
 p-value: 0.0000
 AIC: 4555.73
 BIC: 11095.62

Order 3:
 Chi-square statistic: 1358.06
 Degrees of freedom: 840
 p-value: 0.0000
 AIC: 57212.38
 BIC: 142230.87



**Nest Mantainance**

In [None]:
Nest_mantainance_df = df[df["behaviour"].isin(Nest_mantainance)]
Nest_mantainance_df

Unnamed: 0,TIME,behaviour
16,00:04:00,NIP_OUT
17,00:04:15,NIP_OUT
18,00:04:30,NIP_OUT
19,00:04:45,NIP_OUT
20,00:05:00,NIP_OUT
...,...,...
4642,19:20:30,NIP_CORE
4643,19:20:45,NIP_CORE
4644,19:21:00,NIP_CORE
4645,19:21:15,NIP_CORE


In [None]:
Nest_mantainance_df['behaviour_next'] = Nest_mantainance_df['behaviour'].shift(-1)
Nest_mantainance_df = Nest_mantainance_df[Nest_mantainance_df['behaviour'] != Nest_mantainance_df['behaviour_next']]
Nest_mantainance_df = Nest_mantainance_df.drop('behaviour_next', axis=1)
Nest_mantainance_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Nest_mantainance_df['behaviour_next'] = Nest_mantainance_df['behaviour'].shift(-1)


Unnamed: 0,TIME,behaviour
20,00:05:00,NIP_OUT
45,00:11:15,BN
58,00:14:30,LAND
99,00:24:45,MAN
106,00:26:30,NIP_IN
...,...,...
4048,16:52:00,NIP_IN
4089,17:02:15,LAND
4523,18:50:45,NIP_CORE
4546,18:56:30,NIP_IN


In [None]:
sequence = Nest_mantainance_df["behaviour"].tolist()
sequence

['NIP_OUT',
 'BN',
 'LAND',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_CORE',
 'NIP_IN',
 'MAN',
 'SA',
 'NIP_OUT',
 'AM',
 'LAND',
 'NIP_IN',
 'MAN',
 'SA',
 'BN',
 'LAND',
 'NIP_CORE',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'AM',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'SA',
 'AM',
 'LAND',
 'NIP_CORE',
 'MAN',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'SA',
 'BN',
 'LAND',
 'NIP_CORE',
 'NIP_IN',
 'SA',
 'NIP_OUT',
 'BN',
 'AM',
 'MAN',
 'NIP_IN',
 'SA',
 'BN',
 'LAND',
 'NIP_CORE',
 'LAND',
 'NIP_CORE',
 'NIP_IN',
 'NIP_CORE',
 'NIP_IN',
 'AM',
 'NIP_IN',
 'NIP_CORE',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'NIP_CORE',
 'NIP_IN',
 'MAN',
 'NIP_IN',
 'NIP_CORE',
 'NIP_IN',
 'LAND',
 'NIP_IN',
 'LAND',
 'NIP_CORE',
 'NIP_IN',
 'NIP_CORE']

In [None]:
states = list(set(sequence))
states

['BN', 'MAN', 'AM', 'NIP_OUT', 'SA', 'NIP_IN', 'NIP_CORE', 'LAND']

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 34.00
 Degrees of freedom: 7
 p-value: 0.0000
 AIC: 309.74
 BIC: 328.59

Order 1:
 Chi-square statistic: 222.07
 Degrees of freedom: 56
 p-value: 0.0000
 AIC: 273.42
 BIC: 424.25

Order 2:
 Chi-square statistic: 270.99
 Degrees of freedom: 168
 p-value: 0.0000
 AIC: 1136.33
 BIC: 2342.96

Order 3:
 Chi-square statistic: 376.87
 Degrees of freedom: 315
 p-value: 0.0095
 AIC: 8252.46
 BIC: 17905.54



**EGG CARE**

In [None]:
Egg_care_df = df[df["behaviour"].isin(Egg_care)]
Egg_care_df

Unnamed: 0,TIME,behaviour
59,00:14:45,FAN
60,00:15:00,FAN
61,00:15:15,FAN
62,00:15:30,FAN
63,00:15:45,FAN
...,...,...
4642,19:20:30,NIP_CORE
4643,19:20:45,NIP_CORE
4644,19:21:00,NIP_CORE
4645,19:21:15,NIP_CORE


In [None]:
Egg_care_df['behaviour_next'] = Egg_care_df['behaviour'].shift(-1)
Egg_care_df = Egg_care_df[Egg_care_df['behaviour'] != Egg_care_df['behaviour_next']]
Egg_care_df = Egg_care_df.drop('behaviour_next', axis=1)
Egg_care_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Egg_care_df['behaviour_next'] = Egg_care_df['behaviour'].shift(-1)


Unnamed: 0,TIME,behaviour
201,00:50:15,FAN
260,01:05:00,NIP_CORE
273,01:08:15,FAN
540,02:15:00,INSF
546,02:16:30,NIP_CORE
593,02:28:15,FAN
917,03:49:15,NIP_CORE
1031,04:17:45,FAN
1249,05:12:15,NIP_CORE
1584,06:36:00,FAN


In [None]:
sequence = Egg_care_df["behaviour"].tolist()
sequence

['FAN',
 'NIP_CORE',
 'FAN',
 'INSF',
 'NIP_CORE',
 'FAN',
 'NIP_CORE',
 'FAN',
 'NIP_CORE',
 'FAN',
 'NIP_CORE',
 'FAN',
 'NIP_CORE',
 'FAN',
 'NIP_CORE',
 'INSF',
 'NIP_CORE',
 'INSF',
 'NIP_CORE']

In [None]:
states = list(set(sequence))
states

['NIP_CORE', 'FAN', 'INSF']

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 2.95
 Degrees of freedom: 2
 p-value: 0.2291
 AIC: 44.50
 BIC: 47.34

Order 1:
 Chi-square statistic: 21.86
 Degrees of freedom: 6
 p-value: 0.0013
 AIC: 32.74
 BIC: 41.24

Order 2:
 Chi-square statistic: 21.00
 Degrees of freedom: 10
 p-value: 0.0211
 AIC: 67.59
 BIC: 93.09

Order 3:
 Chi-square statistic: 22.40
 Degrees of freedom: 16
 p-value: 0.1307
 AIC: 172.01
 BIC: 248.51



**Agonistic**

In [None]:
Agonism_df = df[df["behaviour"].isin(Agonism)]
Agonism_df

Unnamed: 0,TIME,behaviour
1947,08:06:45,MAF
1948,08:07:00,MAF
1949,08:07:15,MAF
1950,08:07:30,MAF
1951,08:07:45,MAF
1952,08:08:00,MAF
1953,08:08:15,MAF
1954,08:08:30,MAF
2128,08:52:00,MAF
2129,08:52:15,MAF


In [None]:
Agonism_df['behaviour_next'] = Agonism_df['behaviour'].shift(-1)
Agonism_df = Agonism_df[Agonism_df['behaviour'] != Agonism_df['behaviour_next']]
Agonism_df = Agonism_df.drop('behaviour_next', axis=1)
Agonism_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Agonism_df['behaviour_next'] = Agonism_df['behaviour'].shift(-1)


Unnamed: 0,TIME,behaviour
2137,08:54:15,MAF
2143,08:55:45,CHA/flee
4569,19:02:15,MAF


In [None]:
sequence = Agonism_df["behaviour"].tolist()
sequence

['MAF', 'CHA/flee', 'MAF']

In [None]:
states = list(set(sequence))
states

['CHA/flee', 'MAF']

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 0.33
 Degrees of freedom: 1
 p-value: 0.5637
 AIC: 7.82
 BIC: 6.02

Order 1:
 Chi-square statistic: 2.00
 Degrees of freedom: 2
 p-value: 0.3679
 AIC: 8.00
 BIC: 4.39

Order 2:
 Chi-square statistic: 1.00
 Degrees of freedom: 1
 p-value: 0.3173
 AIC: 16.00
 BIC: 8.79

Order 3:
 Chi-square statistic: 0.00
 Degrees of freedom: 0
 p-value: nan
 AIC: 32.00
 BIC: 17.58

