In [None]:
import numpy as np
from scipy.stats import chi2
import pandas as pd

In [None]:
projectPath = "./data/"
file_path = projectPath + "timeVideo2.csv"
df = pd.read_csv(file_path)
df

Unnamed: 0,TIME,behaviour
0,00:00:00,
1,00:00:15,
2,00:00:30,
3,00:00:45,
4,00:01:00,
...,...,...
563,02:20:45,
564,02:21:00,
565,02:21:15,
566,02:21:30,


In [None]:
df.describe()

Unnamed: 0,TIME,behaviour
count,568,280
unique,568,12
top,00:00:00,BEND
freq,1,76


**Removing consecutive behaviours' sequences**

In [None]:
df['behaviour_next'] = df['behaviour'].shift(-1)
df = df[df['behaviour'] != df['behaviour_next']]
df = df.drop('behaviour_next', axis=1)

In [None]:
df

Unnamed: 0,TIME,behaviour
0,00:00:00,
1,00:00:15,
2,00:00:30,
3,00:00:45,
4,00:01:00,
...,...,...
563,02:20:45,
564,02:21:00,
565,02:21:15,
566,02:21:30,


In [None]:
sequence = df["behaviour"].replace(np.nan, "notKnown").tolist()  # Replace "NaN" with nan and generate list
sequence = [seq for i, seq in enumerate(sequence) if i == 0 or seq != sequence[i - 1]]
sequence

['notKnown',
 'ROLL',
 'LF',
 'notKnown',
 'NIP_CORE',
 'notKnown',
 'LF',
 'notKnown',
 'ROLL',
 'BEND',
 'notKnown',
 'LF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'notKnown',
 'LF',
 'notKnown',
 'H_DOWN',
 'INSF',
 'notKnown',
 'NIP_CORE',
 'notKnown',
 'ROLL',
 'BEND_OPP',
 'notKnown',
 'BEND_OPP',
 'notKnown',
 'ROLL',
 'LF',
 'ROLL',
 'notKnown',
 'BEND',
 'notKnown',
 'BEND',
 'notKnown',
 'BEND',
 'notKnown',
 'BEND',
 'notKnown',
 'DAN_CIRC',
 'GB',
 'notKnown',
 'SPA',
 'notKnown',
 'SPA',
 'notKnown',
 'SPA',
 'notKnown',
 'SPA',
 'notKnown',
 'INSF',
 'notKnown',
 'NIP_CORE',
 'H_DOWN',
 'notKnown',
 'ROLL',
 'BEND',
 'notKnown',
 'ROLL',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'notKnown',
 'LF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'notKnown',
 'SPA',
 'notKnown',
 'SPA',
 'notKnown',
 'DAN_CIRC',
 'notKnown',
 'LF',
 'notKnown',
 'MAF',
 'notKnown',
 'BN']

In [None]:
states = list(set(sequence))  # Unique states
states

['LF',
 'ROLL',
 'INSF',
 'GB',
 'notKnown',
 'H_DOWN',
 'MAF',
 'BN',
 'DAN_CIRC',
 'NIP_CORE',
 'BEND_OPP',
 'BEND',
 'SPA']

In [None]:
def get_transition_counts(seq, order):
    counts = {}
    for i in range(len(seq) - order):
        state = tuple(seq[i:i+order])
        next_state = seq[i+order]
        if state not in counts:
            counts[state] = {s: 0 for s in states}
        counts[state][next_state] += 1
    return counts



In [None]:
# Metrics calculation
def calculate_metrics(seq, order):
    counts = get_transition_counts(seq, order)
    chi_square = 0
    df = 0
    for state, next_states in counts.items():
        total = sum(next_states.values())
        expected = total / len(states)
        for next_state, observed in next_states.items():
            chi_square += (observed - expected) ** 2 / expected
        df += len(states) - 1
    p_value = 1 - chi2.cdf(chi_square, df)
    log_likelihood = 0
    for state, next_states in counts.items():
        total = sum(next_states.values())
        for next_state, count in next_states.items():
            if count > 0:
                log_likelihood += count * np.log(count / total)
    k = len(states)**(order + 1)
    aic = 2*k - 2*log_likelihood
    bic = np.log(len(seq))*k - 2*log_likelihood
    return chi_square, df, p_value, aic, bic



In [None]:
# Test per ordini 0, 1, 2, e 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 129.30
 Degrees of freedom: 12
 p-value: 0.0000
 AIC: 347.59
 BIC: 378.56

Order 1:
 Chi-square statistic: 359.56
 Degrees of freedom: 144
 p-value: 0.0000
 AIC: 532.20
 BIC: 934.77

Order 2:
 Chi-square statistic: 605.80
 Degrees of freedom: 372
 p-value: 0.0000
 AIC: 4479.46
 BIC: 9712.78

Order 3:
 Chi-square statistic: 735.50
 Degrees of freedom: 576
 p-value: 0.0000
 AIC: 57166.05
 BIC: 125199.11



**Calculate metrics without NAN**

In [None]:
#rimuovi NaN
dfNoNan = df[df['behaviour'].notna()]

In [None]:
dfNoNan

Unnamed: 0,TIME,behaviour
38,00:09:30,ROLL
43,00:10:45,LF
54,00:13:30,NIP_CORE
83,00:20:45,LF
91,00:22:45,ROLL
96,00:24:00,BEND
106,00:26:30,LF
114,00:28:30,BEND
119,00:29:45,DAN_CIRC
127,00:31:45,BEND


In [None]:
sequence = dfNoNan["behaviour"].tolist()  # genera lista
sequence

['ROLL',
 'LF',
 'NIP_CORE',
 'LF',
 'ROLL',
 'BEND',
 'LF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'LF',
 'H_DOWN',
 'INSF',
 'NIP_CORE',
 'ROLL',
 'BEND_OPP',
 'BEND_OPP',
 'ROLL',
 'LF',
 'ROLL',
 'BEND',
 'BEND',
 'BEND',
 'BEND',
 'DAN_CIRC',
 'GB',
 'SPA',
 'SPA',
 'SPA',
 'SPA',
 'INSF',
 'NIP_CORE',
 'H_DOWN',
 'ROLL',
 'BEND',
 'ROLL',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'LF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'SPA',
 'SPA',
 'DAN_CIRC',
 'LF',
 'MAF',
 'BN']

In [None]:
states = list(set(sequence))  # Stati unici, escludendo None
states

['LF',
 'ROLL',
 'INSF',
 'GB',
 'H_DOWN',
 'MAF',
 'BN',
 'DAN_CIRC',
 'NIP_CORE',
 'BEND_OPP',
 'BEND',
 'SPA']

In [None]:
def get_transition_counts(seq, order):
    counts = {}
    for i in range(len(seq) - order):
        state = tuple(seq[i:i+order])
        next_state = seq[i+order]
        if state not in counts:
            counts[state] = {s: 0 for s in states}
        counts[state][next_state] += 1
    return counts

In [None]:
# Funzione per calcolare le metriche
def calculate_metrics(seq, order):
    counts = get_transition_counts(seq, order)
    chi_square = 0
    df = 0
    for state, next_states in counts.items():
        total = sum(next_states.values())
        expected = total / len(states)
        for next_state, observed in next_states.items():
            chi_square += (observed - expected) ** 2 / expected
        df += len(states) - 1
    p_value = 1 - chi2.cdf(chi_square, df)
    log_likelihood = 0
    for state, next_states in counts.items():
        total = sum(next_states.values())
        for next_state, count in next_states.items():
            if count > 0:
                log_likelihood += count * np.log(count / total)
    k = len(states)**(order + 1)
    aic = 2*k - 2*log_likelihood
    bic = np.log(len(seq))*k - 2*log_likelihood
    return chi_square, df, p_value, aic, bic

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 31.08
 Degrees of freedom: 11
 p-value: 0.0011
 AIC: 238.77
 BIC: 261.47

Order 1:
 Chi-square statistic: 189.26
 Degrees of freedom: 121
 p-value: 0.0001
 AIC: 390.49
 BIC: 662.91

Order 2:
 Chi-square statistic: 361.00
 Degrees of freedom: 308
 p-value: 0.0202
 AIC: 3496.91
 BIC: 6765.97

Order 3:
 Chi-square statistic: 442.00
 Degrees of freedom: 418
 p-value: 0.2011
 AIC: 41486.91
 BIC: 80715.70



**CALCULATE METRICS PER BEHAVIOUR GROUPS**

In [None]:
Courtship_all = ['BEND', 'DAN_CIRC', 'ROLL', 'GB', 'LF', 'BK_SW', 'BEND_OPP', 'H_DOWN', 'H_UP', 'MAF', 'UP', 'INSF', 'SPA']
Nest_mantainance = ['SA', 'NIP_OUT', 'NIP_IN', 'BN', 'AM', 'LAND', 'MAN', 'NIP_CORE' ]
Egg_care = ['FAN', 'NIP_CORE', 'INSF']
Agonism = ['CHA/flee', 'MAF']

**COURTSHIP**

In [None]:
Courtship_df = df[df["behaviour"].isin(Courtship_all)]
Courtship_df

Unnamed: 0,TIME,behaviour
38,00:09:30,ROLL
43,00:10:45,LF
83,00:20:45,LF
91,00:22:45,ROLL
96,00:24:00,BEND
106,00:26:30,LF
114,00:28:30,BEND
119,00:29:45,DAN_CIRC
127,00:31:45,BEND
142,00:35:30,LF


In [None]:
sequence = Courtship_df["behaviour"].tolist()  # Sostituisci "NaN" con nan e genera lista
sequence

['ROLL',
 'LF',
 'LF',
 'ROLL',
 'BEND',
 'LF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'LF',
 'H_DOWN',
 'INSF',
 'ROLL',
 'BEND_OPP',
 'BEND_OPP',
 'ROLL',
 'LF',
 'ROLL',
 'BEND',
 'BEND',
 'BEND',
 'BEND',
 'DAN_CIRC',
 'GB',
 'SPA',
 'SPA',
 'SPA',
 'SPA',
 'INSF',
 'H_DOWN',
 'ROLL',
 'BEND',
 'ROLL',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'LF',
 'BEND',
 'DAN_CIRC',
 'BEND',
 'SPA',
 'SPA',
 'DAN_CIRC',
 'LF',
 'MAF']

In [None]:
states = list(set(sequence))
states

['LF',
 'ROLL',
 'INSF',
 'GB',
 'H_DOWN',
 'MAF',
 'DAN_CIRC',
 'BEND_OPP',
 'BEND',
 'SPA']

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 25.44
 Degrees of freedom: 9
 p-value: 0.0025
 AIC: 202.56
 BIC: 220.63

Order 1:
 Chi-square statistic: 123.71
 Degrees of freedom: 81
 p-value: 0.0016
 AIC: 298.67
 BIC: 479.34

Order 2:
 Chi-square statistic: 267.00
 Degrees of freedom: 225
 p-value: 0.0287
 AIC: 2038.14
 BIC: 3844.80

Order 3:
 Chi-square statistic: 324.67
 Degrees of freedom: 306
 p-value: 0.2217
 AIC: 20014.91
 BIC: 38081.53



**Nest Mantainance**

In [None]:
Nest_mantainance_df = df[df["behaviour"].isin(Nest_mantainance)]
Nest_mantainance_df

Unnamed: 0,TIME,behaviour
54,00:13:30,NIP_CORE
184,00:46:00,NIP_CORE
454,01:53:30,NIP_CORE
567,02:21:45,BN


In [None]:
Nest_mantainance_df['behaviour_next'] = Nest_mantainance_df['behaviour'].shift(-1)
Nest_mantainance_df = Nest_mantainance_df[Nest_mantainance_df['behaviour'] != Nest_mantainance_df['behaviour_next']]
Nest_mantainance_df = Nest_mantainance_df.drop('behaviour_next', axis=1)
Nest_mantainance_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Nest_mantainance_df['behaviour_next'] = Nest_mantainance_df['behaviour'].shift(-1)


Unnamed: 0,TIME,behaviour
454,01:53:30,NIP_CORE
567,02:21:45,BN


In [None]:
sequence = Nest_mantainance_df["behaviour"].tolist()
sequence

['NIP_CORE', 'NIP_CORE', 'NIP_CORE', 'BN']

In [None]:
states = list(set(sequence))
states

['BN', 'NIP_CORE']

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 1.00
 Degrees of freedom: 1
 p-value: 0.3173
 AIC: 8.50
 BIC: 7.27

Order 1:
 Chi-square statistic: 0.33
 Degrees of freedom: 1
 p-value: 0.5637
 AIC: 11.82
 BIC: 9.36

Order 2:
 Chi-square statistic: 0.00
 Degrees of freedom: 1
 p-value: 1.0000
 AIC: 18.77
 BIC: 13.86

Order 3:
 Chi-square statistic: 1.00
 Degrees of freedom: 1
 p-value: 0.3173
 AIC: 32.00
 BIC: 22.18



**EGG CARE**

In [None]:
Egg_care_df = df[df["behaviour"].isin(Egg_care)]
Egg_care_df

Unnamed: 0,TIME,behaviour
54,00:13:30,NIP_CORE
176,00:44:00,INSF
184,00:46:00,NIP_CORE
438,01:49:30,INSF
454,01:53:30,NIP_CORE


In [None]:
Egg_care_df['behaviour_next'] = Egg_care_df['behaviour'].shift(-1)
Egg_care_df = Egg_care_df[Egg_care_df['behaviour'] != Egg_care_df['behaviour_next']]
Egg_care_df = Egg_care_df.drop('behaviour_next', axis=1)
Egg_care_df

In [None]:
sequence = Egg_care_df["behaviour"].replace("NaN", None).tolist()  # Sostituisci "NaN" con nan e genera lista
sequence

['NIP_CORE', 'INSF', 'NIP_CORE', 'INSF', 'NIP_CORE']

In [None]:
states = list(set(sequence))
states

['INSF', 'NIP_CORE']

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()

Order 0:
 Chi-square statistic: 0.20
 Degrees of freedom: 1
 p-value: 0.6547
 AIC: 10.73
 BIC: 9.95

Order 1:
 Chi-square statistic: 4.00
 Degrees of freedom: 2
 p-value: 0.1353
 AIC: 8.00
 BIC: 6.44

Order 2:
 Chi-square statistic: 3.00
 Degrees of freedom: 2
 p-value: 0.2231
 AIC: 16.00
 BIC: 12.88

Order 3:
 Chi-square statistic: 2.00
 Degrees of freedom: 2
 p-value: 0.3679
 AIC: 32.00
 BIC: 25.75



**Agonistic not calculated: just MAF**

In [None]:
Agonism_df = df[df["behaviour"].isin(Agonism)]
Agonism_df

Unnamed: 0,TIME,behaviour
552,02:18:00,MAF


In [None]:
sequence = Agonism_df["behaviour"].tolist()
sequence

In [None]:
states = list(set(sequence))
states

In [None]:
# Test per orders 0, 1, 2, and 3
for order in range(4):
    chi_square, df, p_value, aic, bic = calculate_metrics(sequence, order)
    print(f"Order {order}:")
    print(f" Chi-square statistic: {chi_square:.2f}")
    print(f" Degrees of freedom: {df}")
    print(f" p-value: {p_value:.4f}")
    print(f" AIC: {aic:.2f}")
    print(f" BIC: {bic:.2f}")
    print()