<H1>FaceShape Data</H1>
Exploratory analysis of single dyad.

## Imports

In [1]:
from IPython.display import Markdown as md
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

## Constants

In [2]:
# Paths to log files
LOG_SUBJ1 = r"prepped_data/facetracking/8_1.csv"
LOG_SUBJ2 = r"prepped_data/facetracking/8_2.csv"

# Logging intervalls in seconds
LOGGING_RATE = 0.1

# Save images to png if true
SAVE_IMG = True

## Read Data
The FaceShape data is a time series of the weightings of all eye and lip shapes the
HTC Facial Tracker recorded.

In [3]:
from datetime import datetime

# Read data
df_subj1 = pd.read_csv(LOG_SUBJ1)
df_subj2 = pd.read_csv(LOG_SUBJ2)

# Convert time to datetime objects
df_subj1["Time"] = df_subj1["Time"].apply(pd.to_datetime)
df_subj2["Time"] = df_subj2["Time"].apply(pd.to_datetime)

# Time as index
df_subj1 = df_subj1.set_index("Time")
df_subj2 = df_subj2.set_index("Time")

FileNotFoundError: [Errno 2] No such file or directory: 'prepped_data/facetracking/8_1.csv'

First comparison of data from both subjects.

In [None]:
fig, axs = plt.subplots(2, sharey=True, figsize=(10,5))

prefix = "Mouth_Smile"
fs1 = (df_subj1.iloc[:, df_subj1.columns.str.startswith(prefix)].sum(axis=1))
fs2 = (df_subj2.iloc[:, df_subj2.columns.str.startswith(prefix)].sum(axis=1))

fig.suptitle('FaceShape Signals')
axs[0].plot(fs1)
axs[1].plot(fs2)

fig.tight_layout()
if SAVE_IMG: plt.savefig("out/img/signals.png")
plt.show()


## Data Analysis
We now do a Cross-Correlation analysis across both these signals. Pandas does not offer
a native method to this. But there is a simple implementation using Pandas .corr()
method from
https://towardsdatascience.com/computing-cross-correlation-between-geophysical-time-series-488642be7bf0

As the interpersonal synchrony can change over time, we slice the Dataframe into 1 minute
slices to assess the change of pacing in mimicry by the change of lags and extent
of maximal correlation.
The following function slices the dataframes into equal parts. We want one minute
of data for each slice. We compute the seconds per slice and divide by the rate
of the time series.

### Cross-Correlation between two signals
Using this function, we can compute the Cross-Correlation of two signals at different time
lags.

In [None]:
def crosscorr(datax, datay, lag=0):
    """
    Lag-N cross correlation.Shifted data filled with NaNs
    :param lag: default 0
    :param datax, datay: pandas.Series objects of equal length
    :return: Float indicating cross-correlation
    """

    return datax.corr(datay.shift(lag))

prefix = "Mouth_Smile"

signal1 = (df_subj1.iloc[:, df_subj1.columns.str.startswith(prefix)].apply(sum, axis=1)
                / len(df_subj1.columns.str.startswith(prefix)))
signal2 = (df_subj2.iloc[:, df_subj2.columns.str.startswith(prefix)].apply(sum, axis=1)
                / len(df_subj2.columns.str.startswith(prefix)))

signal1 = signal1.reset_index(drop=True)
signal2 = signal2.reset_index(drop=True)

# Compute Cross-Correlation at different time lags
lags = np.arange(-75, 75, 1)
rs = np.nan_to_num([crosscorr(signal1, signal2, lag) for lag in lags])

# Calculate maximum correlation and maximizing time lag
max_rs, min_rs = np.abs(np.max(rs)), np.abs(np.min(rs))
if np.abs(max_rs) >= np.abs(min_rs):
    corr_val = max_rs
    corr_lag = lags[np.argmax(rs)]
else:
    corr_val = min_rs
    corr_lag = lags[np.argmin(rs)]

# Visualization of Cross-Correlation
fig, ax = plt.subplots(2, 1, figsize=(18, 9))
ax[0].plot(signal1, lw=0.7, c='b')
ax[0].plot(signal2, lw=0.7, c='r')
ax[1].plot(lags, rs, c='k', label='Cross-Correlation')
ax[1].axvline(x=corr_lag, c='r', lw=1, ls='--', label='Max. correlation')
ax[1].xticks(corr_lag/10)
ax[1].legend(fontsize=12)

if SAVE_IMG: plt.savefig('out/img/cross_corr_example.png')
plt.show()

md(f"By shifting the second signal by {corr_lag} ms we get a maximum "
   f"correlation of r={corr_val} between both signals.")

## Windowed Time Lagged Cross-Correlation

https://towardsdatascience.com/four-ways-to-quantify-synchrony-between-time-series-data-b99136c4a9c9

In [None]:
import seaborn as sns

lag_seconds = 5
lag_steps = LOGGING_RATE  # At least LOGGING_RATE
window_size = 5

def vectorized_crosscorr(df):
    s1 = df.iloc[:, 0]
    s2 = df.iloc[:, 1]
    rs = [crosscorr(s1,s2, lag) for lag in range(-int(lag_seconds/lag_steps),int(lag_seconds/lag_steps+1))]
    rs = np.nan_to_num(rs)
    return rs

def fisher_z(corr_r):
    """ Compute Fisher's Z transformation
    for given correlation value.
    :param corr_r: correlation value
    :return: Z-value
    """
    if corr_r == 1:
        return 0
    return np.arctanh(corr_r)

def v_fisher_z(corr_r):
    """ Compute Fisher's Z transformation
    for given correlation value.
    :param corr_r: correlation value
    :return: Z-value
    """
    if corr_r == 1:
        return 0
    return np.arctanh(corr_r)

def windowed_crosscorr(ts1, ts2, plot=False):
    """ Cross-correlate time series over time windows """
    df_comb = ts1.to_frame()
    df_comb[ts2.name + '_subj2'] = ts2

    # Resample in <window_size> intervals
    # resampler = df_comb.resample(f"{window_size}s")
    resampler = df_comb.groupby(pd.Grouper(freq=f"{window_size}s"))
    rss = resampler.apply(vectorized_crosscorr)
    rss = pd.DataFrame.from_dict(dict(zip(rss.index, rss.values)), orient='index')

    if plot:
        f,ax = plt.subplots(figsize=(10,5))
        sns.heatmap(rss,cmap='RdBu_r',ax=ax)
        ax.set(title=f'Windowed Time Lagged Cross Correlation',xlim=[0,100],
               xlabel='Time-lag of cross-correlation',ylabel='Time Frame')
        x = 2 * lag_seconds/LOGGING_RATE
        ax.set_xticks([0, int(x/4), int(x/2), int(3*x/4), int(x)])
        ax.set_xticklabels([-int(x/2),-int(x/4), 0, int(x/4), int(x/2)])
        if SAVE_IMG: plt.savefig("out/img/windowed_time_lagged_crosscorr.png")

    sync = rss.apply(lambda row: [fisher_z(r) for r in row]).max(axis=1).abs().mean()
    mean_lag = rss.apply(lambda row: [fisher_z(r) for r in row]).idxmax(axis=1).mean()

    return sync, mean_lag

# df_sync = pd.DataFrame(columns=["feature", "synchrony"])
# for i in range(0, len(df_subj1.columns)):
#     sync, mean_lag = windowed_crosscorr(df_subj1.iloc[:, i], df_subj2.iloc[:, i])
#     df_sync = df_sync.append({"feature": df_subj1.columns[i], "synchrony": sync, "lag": mean_lag}, ignore_index=True)
#
# df_sync = df_sync.set_index("feature")
prefix_cols = df_subj1.columns.str.startswith(prefix)
windowed_crosscorr(df_subj1["Mouth_LowerRight_Down"], df_subj2["Mouth_LowerRight_Down"], plot=True)

### Rolling Window Time Lagged Cross-Correlation

In [None]:


if SAVE_IMG: plt.savefig('out/img/rolling_window_cross_corr.png')
plt.show()





<h3>Cross-Correlation between <i>all</i> signals</h3>
We can compute the Cross-Correlation of the signals of all FaceShapes.


In [None]:
#Activate by removing md

lags = np.arange(-200, 200, 1)

corr_lags = []
corr_vals = []

for i, col in enumerate(df_subj1.columns):
    # Compute Cross-Correlation at different time lags
    rs = np.nan_to_num(
        [crosscorr(df_subj1[col], df_subj2[col], lag) for lag in lags])

    # Calculate maximum correlation and maximizing time lag
    max_rs, min_rs = np.abs(np.max(rs)), np.abs(np.min(rs))
    if np.abs(max_rs) >= np.abs(min_rs):
        corr_val = max_rs
        corr_lag = lags[np.argmax(rs)]
    else:
        corr_val = min_rs
        corr_lag = lags[np.argmin(rs)]

    corr_lags.append(corr_lag)
    corr_vals.append(corr_val)

By looking at the distribution of time lags of all Cross-Correlations, we can assess the
synchrony of all facial expressions between both subjects. We use Fisher's Z transformation
to standardize the correlation values.

In [None]:
df_corr = pd.DataFrame(
    data={"Correlation Value": corr_vals, "Lag": corr_lags},
    columns = ['Correlation Value', 'Lag'])

df_corr["Correlation Value"] = df_corr["Correlation Value"].apply(lambda x: fisher_z(abs(x)))
df_corr["FaceShape"] = pd.Series(df_subj1.columns)
print(df_corr.sort_values(by=["Correlation Value"], ascending=False))

ax = df_corr['Correlation Value'].plot.hist(bins=20)
print("Synchrony as the average standardized correlation value:",
    sum(df_corr['Correlation Value']) / len(df_corr['Correlation Value']))

In [None]:
df_lags = pd.DataFrame(
    corr_lags,
    columns = ['Correlation Lag Values'])

# Standardize by Fisher's Z
ax = df_lags.plot.hist(bins=20)

#### Highest correlations

In [None]:
df_corr.sort_values("Correlation Value", ascending=False).head()