# Analysis of synchrony across all dyads

In [1]:
import pandas as pd
import os
from pprint import pprint
from importlib import reload
from collections import defaultdict
from facetracking_helpers import calculations
reload(calculations)

LOGGING_RATE = 0.1
SOURCE_DIR = "./prepped_data/facetracking"

if True:
    data = []  # All face tracking data
    print("reading prepped data")
    for _, _, files in os.walk(SOURCE_DIR):
        for f in files:
            df_subj = pd.read_csv(fr"{SOURCE_DIR}\{f}", sep=',')
            df_subj["Time"] = df_subj["Time"].apply(pd.to_datetime)
            df_subj = df_subj.set_index("Time")

            # TEMPORARY
            df_subj.columns = df_subj.columns.str.replace('Left', '', 1)
            df_subj.columns = df_subj.columns.str.replace('Right', '', 1)
            df_subj.columns = df_subj.columns.str.strip('_')
            df_subj = df_subj.groupby(level=0, axis=1).mean()

            df_subj.name = f.rstrip('.csv')
            data.append(df_subj)

    print("combining datasets")
    data_by_subj = defaultdict(list)  # Paired face tracking data sets {dyad1: [data1, data2], ...}
    for df in data:
        subj = df.name.split('_')[0]  # e.g. 3_2 -> 3
        data_by_subj[subj].append(df)
    data_by_subj = dict(data_by_subj)

reading prepped data
combining datasets


In [2]:
reload(calculations)
faceshape_index = None

if True:
    for dyad, pair in data_by_subj.items():
        print("Calculating synchrony for dyad", dyad)
        df_sync = calculations.windowed_synchrony(*pair)
        df_sync["FaceShape"] = pair[0].columns
        df_sync = df_sync.set_index("FaceShape")
        df_sync = df_sync.sort_index()
        faceshape_index = df_sync.index.copy()
        df_sync.to_csv(f"out/tmp/s{dyad}_sync.csv", index=True)

In [3]:
data = []  # All face tracking data
for _, _, files in os.walk("./out/tmp/"):
    for f in files:
        df_subj = pd.read_csv(f"./out/tmp/{f}", sep=',')
        df_subj.name = f.rstrip('_sync.csv')
        data.append(df_subj)

In [4]:
def get_cleaned_sync(df):
    """
    Filter out FaceShape with low variance or extreme synchrony.
    Return mean synchrony and lag
    """
    limit_v1, limit_v2 = tuple(df[["var_subj1", "var_subj2"]].quantile(0.25))
    limit_s = df["Synchrony"].quantile(0.75)
    df_filtered = df[(df["var_subj1"] >= limit_v1) & (df["var_subj2"] >= limit_v2) & (df["Synchrony"] <= limit_s)]
    mean_sync = df_filtered["Synchrony"].apply(calculations.v_fisher_z).mean()
    mean_sync = calculations._inverse_fisher_z(mean_sync)
    mean_lag = df_filtered["Lags"].mean()
    return mean_sync, mean_lag

print("Mean synchrony values for all facial movements, filtered out low variance (lower than 25th percentile)")
df_sync = pd.DataFrame(columns=["Dyad", "Synchrony", "Lag"])
for df in data:
    dyad = df.name
    dyad = dyad.lstrip('s')
    mean_sync, mean_lag = get_cleaned_sync(df)
    df_sync = df_sync.append({"Dyad": dyad, "Synchrony": mean_sync, "Lag": mean_lag}, ignore_index=True)
df_sync.set_index("Dyad").to_csv("out/result/results_sync.csv")

Mean synchrony values for all facial movements, filtered out low variance (lower than 25th percentile)


In [5]:
print("Synchrony by FaceShape:")
s_fs_sync = pd.Series([0.0]*len(data[0].index), index=data[0]["FaceShape"])
for df in data:
    df = df.set_index("FaceShape")
    df = df.sort_index()
    fs_sync = df["Synchrony"]
    s_fs_sync += fs_sync
s_fs_sync /= len(data)
s_fs_sync.index = list(map(lambda x: x.replace('Left', '', 1).replace('Right', '', 1), s_fs_sync.index.values))
s_fs_sync = s_fs_sync.groupby(s_fs_sync.index).mean()
s_fs_sync = s_fs_sync.sort_values(ascending=False)


ax = s_fs_sync.plot.bar(figsize=(20,5), ylim=(0,0.4))
ax.axhline(s_fs_sync.mean(), color='red')
ax.axhline(s_fs_sync.mean() - s_fs_sync.std(), color='orange')
ax.axhline(s_fs_sync.mean() + s_fs_sync.std(), color='orange')
s_fs_sync.describe()

Synchrony by FaceShape:


IndexError: list index out of range

In [None]:
reload(calculations)

def calc_sync(_sample1, _sample2, **kwargs):
    sample1 = _sample1.copy()
    sample2 = _sample2.copy()
    df_out = calculations.windowed_synchrony(sample1, sample2, **kwargs)
    r, _ = get_cleaned_sync(df_out)
    return r.mean()


In [None]:
import random

interval = 30
if True:
    for dyad in range(1, 12):
        if dyad == 5: continue
        print(f"Pseudo-Test for dyad {dyad}")

        sample1, sample2 = data_by_subj[str(dyad)]

        print("Real Synchrony: ", end='')
        print(calc_sync(sample1, sample2, interval_sec=interval, test_pseudo=False, verbose=False))

        print("Pseudo-Synchrony random shuffle: ", end='')
        print(calc_sync(sample1, sample2, interval_sec=interval, test_pseudo=True, verbose=False))

        while True:
            rand_subj = random.randint(1, 11)
            if rand_subj != dyad:
                break

        sample2 = data_by_subj[str(rand_subj)][1]

        if len(sample1.index) > len(sample2.index):
            sample1 = sample1[:len(sample2)]
            sample1.index = sample2.index
        else:
            sample2 = sample2[:len(sample1)]
            sample2.index = sample1.index

        print(f"Pseudo-Synchrony with subject {rand_subj}: ", end='')
        print(calc_sync(sample1, sample2, interval_sec=interval, test_pseudo=False, verbose=False))
        print("=====================")

In [None]:
from collections import defaultdict
import pickle

if True:
    real_rs = {}
    pseudo_rs = defaultdict(list)
    for i in range(1, 12):
        if i == 5: continue
        print(f"Calculating dyad {i} pseudo-synchronies")
        sample = data_by_subj[str(i)]
        real_rs[i] = calc_sync(*sample)
        for _ in range(100):
            pseudo_rs[i].append(calc_sync(*sample, test_pseudo=True))

    print(real_rs, pseudo_rs, sep='\n')

    with open("out/result/real_syncs_n50.pkl", 'wb') as f:
        pickle.dump(real_rs, f)

    with open("out/result/pseudo_syncs_n50.pkl", 'wb') as f:
        pickle.dump(pseudo_rs, f)

In [None]:
import pickle

with open("out/result/real_syncs.pkl", 'rb') as f:
    real_rs = pickle.load(f)

with open("out/result/pseudo_syncs.pkl", 'rb') as f:
    pseudo_rs = pickle.load(f)


In [None]:
from scipy.stats import ttest_1samp
import pandas as pd
import numpy as np

ts = []
for dyad, p_rs in pseudo_rs.items():
    p_rs = np.array(p_rs)
    p_rs = p_rs[~np.isnan(p_rs)]
    t = ttest_1samp(p_rs, real_rs[dyad], alternative="greater")
    ts.append(t)
ts = pd.Series(ts)

for i, (t, p) in enumerate(ts):
    print("Dyad", list(real_rs.keys())[i])
    print(f"t = {t:.4f}\np = {p}")
    print("================")


In [None]:
import numpy as np
dyad = 9
print("Pseudo mean", np.array(pseudo_rs[dyad]).mean(),
      "\nReal sync", real_rs[dyad])



