In [1]:
import numpy as np
import pandas as pd
import portion as P
import numpy as np
import pandas as pd
from scipy import interpolate
from p1d.persistence1d import RunPersistence

from analysis import compute_gaps, butter_lowpass_filter, butter_highpass_filter,normalize_angle


## Configuration

- Only retain segments where a complete 30-second conversation occurred.
- Exclude pair 0-3 due to the implementation errors
- Exclude pair 7 since their avatar connection was lost midway, and only the voice remained.

In [2]:
folder_path = '../Data'

conversation_start_times = [
    0,0,0,0,0,   # 0,   1,  2,  3,  4
    30,0,0,0,0,  # 5,   6,  7,  8,  9
    0,30,0,0,0,  # 10, 11, 12, 13, 14
    0,30,0,0,0,  # 15, 16, 17, 18, 19
    0,0,0,0,0,  # 20, 21, 22, 23, 24
    0,0,0,0,0,  # 25, 26, 27, 28, 29
    0,          # 30
]

conversation_end_times = [
    600,600,600,600,600,  # 0,   1,  2,  3,  4
    600,600,600,600,600,  # 5,   6,  7,  8,  9
    600,630,600,600,630,  # 10, 11, 12, 13, 14
    600,630,600,600,600,  # 15, 16, 17, 18, 19
    630,480,630,600,600,  # 20, 21, 22, 23, 24
    600,600,600,630,600,  # 25, 26, 27, 28, 29
    600,                  # 30
]

skips = [
    7, 
]

## Process

In [4]:
turns_all = pd.read_csv("processed/turns.csv")
bins_all = pd.DataFrame()

for i in range(4,31):

    if i in set(skips): 
        continue
    
    for p in ['a', 'b']:

        pid = f'{i}{p}'
        print(f'# {pid} processing')
        starttime = conversation_start_times[i]
        endtime = conversation_end_times[i]

        # load social connection responses
        voice_start_time = pd.read_csv(f'{folder_path}/RawData/{pid}_video_timestamp.csv', names=['idx', 'ts', 'freq'], header=None)['ts'][0]
        social_connection = pd.read_table(f'{folder_path}/RawData/{pid}_video_measure.tsv')

        # load turns
        turns = turns_all[turns_all.pid == pid].reset_index(drop=True)
        self_turns = turns[(turns.speaker=='self')].copy().reset_index(drop=True)
        partner_turns = turns[(turns.speaker=='partner')].copy().reset_index(drop=True)

        # load pose
        raw_pose_self = pd.read_table(f'{folder_path}/RawData/{pid}_self.tsv')
        raw_pose_partner = pd.read_table(f'{folder_path}/RawData/{pid}_remote.tsv')

        # synchronization
        raw_pose_self['timestamp'] = raw_pose_self['POSE.timestamp']
        if pid in ['20a', '21a']:
            # due to timestamp issue for 21a, 22a...
            raw_pose_self['t'] = (raw_pose_self['timestamp'] * 0.001 - (raw_pose_self['timestamp'][0] * 0.001 - 0.003)) # avarage offset 0.003(sd=0.030)
            raw_pose_partner['t'] = (raw_pose_partner['timestamp'] * 0.001 - (raw_pose_self['timestamp'][0] * 0.001 - 0.003))
        else: 
            raw_pose_self['t'] = (raw_pose_self['timestamp'] * 0.001 - voice_start_time)
            raw_pose_partner['t'] = (raw_pose_partner['timestamp'] * 0.001 - voice_start_time)


        # normalize angle and low-pass sampling
        # Bandpass filtering of the frequency range that may correspond to nodding, as suggested by previous research.
        f_self, f_partner = len(raw_pose_self['t'])/raw_pose_self['t'].max(), len(raw_pose_partner['t'])/raw_pose_partner['t'].max()
        _head_pitch_self = raw_pose_self['POSE.Head.rotation.x'].apply(normalize_angle)
        _head_pitch_partner = raw_pose_partner['POSE.Head.rotation.x'].apply(normalize_angle)
        _head_pitch_self =  butter_lowpass_filter(butter_highpass_filter(_head_pitch_self, 0.2, f_self), 8, f_self, 5)
        _head_pitch_partner =  butter_lowpass_filter(butter_highpass_filter(_head_pitch_partner, 0.2, f_partner), 8, f_partner, 5)

        # resample (30 Hz)
        t = np.arange(starttime, endtime, 1/30)
        head_pitch_self = interpolate.interp1d(raw_pose_self['t'], _head_pitch_self, bounds_error=False, kind='nearest', fill_value='extrapolate')(t)
        head_pitch_partner = interpolate.interp1d(raw_pose_partner['t'], _head_pitch_partner, bounds_error=False, kind='nearest', fill_value='extrapolate')(t)
        del _head_pitch_self, _head_pitch_partner

        # apply persistence1d filtering to reduce jitter-induced nodding
        pitch_extrema_persistence_self = RunPersistence(head_pitch_self)
        filtered_pitch_extrema_indices_self = [t[0] for t in pitch_extrema_persistence_self if t[1] > head_pitch_self.std()*2]
        pitch_extrema_persistence_partner = RunPersistence(head_pitch_partner)
        filtered_pitch_extrema_indices_partner = [t[0] for t in pitch_extrema_persistence_partner if t[1] > head_pitch_partner.std()*2]

        # interpolation and zero-crossing
        pitch_self_interpolated = interpolate.interp1d(t[filtered_pitch_extrema_indices_self], head_pitch_self[filtered_pitch_extrema_indices_self], 
                bounds_error=False, kind='nearest', fill_value='extrapolate')(t)
        pitch_partner_interpolated = interpolate.interp1d(t[filtered_pitch_extrema_indices_partner], head_pitch_partner[filtered_pitch_extrema_indices_partner], 
                bounds_error=False, kind='nearest', fill_value='extrapolate')(t)

        zerocrossing_indices_self = np.where(np.diff(np.sign(pitch_self_interpolated)) != 0)[0]
        zerocrossing_indices_partner = np.where(np.diff(np.sign(pitch_partner_interpolated)) != 0)[0]

        # zc_self, zc_partner = np.zeros(len(t)), np.zeros(len(t))
        zc_self = t[zerocrossing_indices_self]
        zc_partner = t[zerocrossing_indices_partner]

        t_extrema_self = t[filtered_pitch_extrema_indices_self]
        t_extrema_partner = t[filtered_pitch_extrema_indices_partner]


        # append segment data
        _bins = []  # List to store the rows of the resulting DataFrame
        start, end = int(starttime) // 30, int(endtime) // 30  # Convert start and end time to segment index

        for j in range(start, end):
            t_seg_start, t_seg_end = j * 30, (j + 1) * 30

            # rating
            rating_seg = social_connection[(social_connection.TimeStamp>= t_seg_start) 
                & (social_connection.TimeStamp < t_seg_end)]['SocialConnection']

            # turn
            turns_seg = turns[(turns.start >= t_seg_start) & (turns.start < t_seg_end)].copy().reset_index(drop=True)
            self_turns_seg = turns_seg[(turns_seg.speaker=='self')].copy().reset_index(drop=True)
            partner_turns_seg = turns_seg[(turns_seg.speaker=='partner')].copy().reset_index(drop=True)
            gaps_seg = compute_gaps(turns_seg)

            # zero-crossings
            zc_self_seg = zc_self[(zc_self >= t_seg_start) & (zc_self < t_seg_end)]
            zc_partner_seg = zc_partner[(zc_partner >= t_seg_start) & (zc_partner < t_seg_end)]

            # duration
            turn_durations = P.Interval(*[P.closed(turns_seg.loc[i, 'start'], turns_seg.loc[i, 'end']) for i in range(len(turns_seg))])
            self_turn_durations = P.Interval(*[P.closed(self_turns_seg.loc[i, 'start'], self_turns_seg.loc[i, 'end']) for i in range(len(self_turns_seg))])
            partner_turn_durations = P.Interval(*[P.closed(partner_turns_seg.loc[i, 'start'], partner_turns_seg.loc[i, 'end']) for i in range(len(partner_turns_seg))])
            gap_durations = P.Interval(*[P.closed(gaps_seg.loc[i, 'start'], gaps_seg.loc[i, 'end']) for i in range(len(gaps_seg))])

            turn_self_nodding_count = [(zc_self_seg[i] in turn_durations) for i in range(len(zc_self_seg))].count(True)
            turn_partner_nodding_count = [(zc_partner_seg[i] in turn_durations) for i in range(len(zc_partner_seg))].count(True)
            gap_self_nodding_count = [(zc_self_seg[i] in gap_durations) for i in range(len(zc_self_seg))].count(True)
            gap_partner_nodding_count = [(zc_partner_seg[i] in gap_durations) for i in range(len(zc_partner_seg))].count(True)

            self_turn_self_nodding_count = [(zc_self_seg[i] in self_turn_durations) for i in range(len(zc_self_seg))].count(True)
            partner_turn_self_nodding_count = [(zc_self_seg[i] in partner_turn_durations) for i in range(len(zc_self_seg))].count(True)
            self_turn_partner_nodding_count = [(zc_partner_seg[i] in self_turn_durations) for i in range(len(zc_partner_seg))].count(True)
            partner_turn_partner_nodding_count = [(zc_partner_seg[i] in partner_turn_durations) for i in range(len(zc_partner_seg))].count(True)


            _bins.append(dict(Pid=pid,
                Session=pid[:-1],
                Segment=j,
                StartTime=t_seg_start,
                EndTime=t_seg_end,

                SocialConnection=rating_seg.mean(),
                SocialConnectionStd=rating_seg.std(),

                NoddingCount = len(zc_self_seg)/2 + len(zc_partner_seg)/2,
                NoddingCountSelf = len(zc_self_seg)/2,
                NoddingCountPartner = len(zc_partner_seg)/2,

                TurnNod = turn_self_nodding_count/2 + turn_partner_nodding_count/2,
                TurnSelfNod = turn_self_nodding_count/2,
                TurnPartnerNod = turn_partner_nodding_count/2,
                GapNod = gap_self_nodding_count/2 + gap_partner_nodding_count/2,
                GapSelfNod = gap_self_nodding_count/2,
                GapPartnerNod = gap_partner_nodding_count/2,

                SelfTurnSelfNod = self_turn_self_nodding_count/2,
                SelfTurnPartnerNod = self_turn_partner_nodding_count/2,
                SelfTurnAllNod = self_turn_self_nodding_count/2 + self_turn_partner_nodding_count/2,
                PartnerTurnSelfNod = partner_turn_self_nodding_count/2,
                PartnerTurnPartnerNod = partner_turn_partner_nodding_count/2,
                PartnerTurnAllNod = partner_turn_self_nodding_count/2 + partner_turn_partner_nodding_count/2,


            ))

        bins_all = pd.concat([bins_all, pd.DataFrame(_bins)]).reset_index(drop=True)


# 4a processing
# 4b processing
# 5a processing
# 5b processing
# 6a processing
# 6b processing
# 8a processing
# 8b processing
# 9a processing
# 9b processing
# 10a processing
# 10b processing
# 11a processing
# 11b processing
# 12a processing
# 12b processing
# 13a processing
# 13b processing
# 14a processing
# 14b processing
# 15a processing
# 15b processing
# 16a processing
# 16b processing
# 17a processing
# 17b processing
# 18a processing
# 18b processing
# 19a processing
# 19b processing
# 20a processing
# 20b processing
# 21a processing
# 21b processing
# 22a processing
# 22b processing
# 23a processing
# 23b processing
# 24a processing
# 24b processing
# 25a processing
# 25b processing
# 26a processing
# 26b processing
# 27a processing
# 27b processing
# 28a processing
# 28b processing
# 29a processing
# 29b processing
# 30a processing
# 30b processing


In [6]:
bins_all.to_csv("processed/nodding_within.csv")

In [11]:
bins_across = bins_all.groupby(by='Pid', as_index=False).mean(numeric_only=True)
bins_across['pid'] = bins_across['Pid']
bins_across.to_csv("processed/nodding_across.csv")

# Descriptive Statistics

In [7]:
pd.set_option('display.float_format', '{:.3f}'.format)


In [8]:
bins_all.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Segment,1038.0,9.615,5.795,0.0,5.0,10.0,15.0,20.0
StartTime,1038.0,288.439,173.853,0.0,150.0,300.0,450.0,600.0
EndTime,1038.0,318.439,173.853,30.0,180.0,330.0,480.0,630.0
SocialConnection,1028.0,55.037,23.411,-1.0,37.438,59.963,74.658,100.0
SocialConnectionStd,1028.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
NoddingCount,1038.0,10.418,3.852,0.0,8.0,10.5,13.0,23.5
NoddingCountSelf,1038.0,5.214,2.785,0.0,3.0,5.0,7.0,17.5
NoddingCountPartner,1038.0,5.204,2.792,0.0,3.0,5.0,7.0,18.5
TurnNod,1038.0,7.982,3.506,0.0,5.5,8.0,10.0,20.5
TurnSelfNod,1038.0,3.987,2.409,0.0,2.0,4.0,5.5,15.0
