How do participant’s reports of connection changed when long gaps naturally occurred in their conversations? 

We compute average connection ratings for every instance of a long gap across all the conversations. As a comparison, we compute average connection ratings computed in 2-second increments for the 6 seconds immediately preceding the long gap and the 6 seconds immediately following the long gap. 

To make comparisons across conversations, we compute connection difference scores for every time-point by subtracting the connection ratings during the long gap from the connection ratings at each time-point. 

In [1]:
import pandas as pd 
import numpy as np
import os.path
import glob
from scipy import stats
import statsmodels

base_dir = os.path.dirname(os.getcwd())

In [2]:
def find_video(file):
    """Need video name to use key in assign_idenity function.

    """
    id1 = file.split('/')[-1].split('_')[0]
    id2 = file.split('_')[-1].split('.csv')[0]
    video_name_1 = id1 + '_' + id2
    video_name_2 = id2 + '_' + id1
    return video_name_1, video_name_2


def assign_identity(file):
    """Use key to determine which subject ID corresponds to S1 (speaker 1)
    and which subject ID corresponds to S2 (speaker 2).

    """
    key = pd.read_csv(os.path.join(base_dir, 'Data', 'subject_id_key.csv'))
    video_name_1, video_name_2 = find_video(file)
    for i in range(len((key))):
        if (key.at[i,'video_name'] == video_name_1) | (key.at[i,'video_name'] == video_name_2):
            subID_S1 = key.at[i,'S1']
            subID_S2 = key.at[i,'S2']
            return subID_S1, subID_S2

## Strangers

In [3]:
long_gap_connection = pd.DataFrame()
long_gap_connection = long_gap_connection.fillna(0)
counter = 0

flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'strangers_pure', '*.csv'))

for file in flist:
    
    subID_S1, subID_S2 = assign_identity(file)
    convo = file.split('/')[-1].split('.csv')[0]
    
    data = pd.read_csv(file)
    
    long_gaps = data.loc[data['gap_length'] > 2000]
    
    if len(long_gaps) > 0:
        
        connection_1 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'strangers', '{}_{}.csv'.format(subID_S1, subID_S2)))
        connection_2 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'strangers', '{}_{}.csv'.format(subID_S2, subID_S1)))
        
        connection_1['time_msec'] = connection_1['adjustedTime'] * 1000
        connection_2['time_msec'] = connection_2['adjustedTime'] * 1000
        
        for i in list(long_gaps.index):
            
            if (i > 0):
                
                if (data.at[i-1, 'turn_end_msec'] > 6000) & (data.at[i, 'turn_start_msec'] < 594000): # might change and just give NaNs for ones that are too close

                    # the long gap
                    start_of_gap = data.at[i-1, 'turn_end_msec']
                    end_of_gap = data.at[i, 'turn_start_msec']
                    
                    connection_1_long_gap_subset = connection_1.loc[(connection_1['time_msec'] > start_of_gap) & (connection_1['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_1_during_long_gap = np.mean(connection_1_long_gap_subset['Rating'])
                    
                    connection_2_long_gap_subset = connection_2.loc[(connection_2['time_msec'] > start_of_gap) & (connection_2['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_2_during_long_gap = np.mean(connection_2_long_gap_subset['Rating'])

                    # 1 chunk before long gap

                    start_connection_before_1 = start_of_gap - 2000
                    end_connection_before_1 = start_of_gap
                    
                    connection_1_before_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_1) & (connection_1['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_1_before_long_gap_1 = np.mean(connection_1_before_1_subset['Rating'])
                    
                    connection_2_before_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_1) & (connection_2['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_2_before_long_gap_1 = np.mean(connection_2_before_1_subset['Rating'])

                    # 2 chunks before long gap

                    start_connection_before_2 = start_of_gap - 4000 
                    end_connection_before_2 = start_of_gap - 2000
                    
                    connection_1_before_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_2) & (connection_1['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_1_before_long_gap_2 = np.mean(connection_1_before_2_subset['Rating'])
                    
                    connection_2_before_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_2) & (connection_2['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_2_before_long_gap_2 = np.mean(connection_2_before_2_subset['Rating'])

                    # 3 chunks before long gap

                    start_connection_before_3 = start_of_gap - 6000
                    end_connection_before_3 = start_of_gap - 4000

                    connection_1_before_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_3) & (connection_1['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_1_before_long_gap_3 = np.mean(connection_1_before_3_subset['Rating'])
                    
                    connection_2_before_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_3) & (connection_2['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_2_before_long_gap_3 = np.mean(connection_2_before_3_subset['Rating'])
                    

                    # 1 chunk after long gap 

                    start_connection_after_1 = end_of_gap
                    end_connection_after_1 = end_of_gap + 2000
                    
                    connection_1_after_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_1) & (connection_1['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_1_after_long_gap_1 = np.mean(connection_1_after_1_subset['Rating'])
                    
                    connection_2_after_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_1) & (connection_2['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_2_after_long_gap_1 = np.mean(connection_2_after_1_subset['Rating'])

                    # 2 chunks after long gap 

                    start_connection_after_2 = end_of_gap + 2000 
                    end_connection_after_2 = end_of_gap + 4000
                    
                    connection_1_after_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_2) & (connection_1['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_1_after_long_gap_2 = np.mean(connection_1_after_2_subset['Rating'])
                    
                    connection_2_after_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_2) & (connection_2['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_2_after_long_gap_2 = np.mean(connection_2_after_2_subset['Rating'])

                    # 3 chunks after long gap 

                    start_connection_after_3 = end_of_gap + 4000
                    end_connection_after_3 = end_of_gap + 6000
                    
                    connection_1_after_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_3) & (connection_1['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_1_after_long_gap_3 = np.mean(connection_1_after_3_subset['Rating'])
                    
                    connection_2_after_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_3) & (connection_2['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_2_after_long_gap_3 = np.mean(connection_2_after_3_subset['Rating'])


                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'connection_3_before'] = connection_1_before_long_gap_3
                    long_gap_connection.at[counter, 'connection_2_before'] = connection_1_before_long_gap_2
                    long_gap_connection.at[counter, 'connection_1_before'] = connection_1_before_long_gap_1
                    long_gap_connection.at[counter, 'connection_long_gap'] = connection_1_during_long_gap
                    long_gap_connection.at[counter, 'connection_1_after'] = connection_1_after_long_gap_1
                    long_gap_connection.at[counter, 'connection_2_after'] = connection_1_after_long_gap_2
                    long_gap_connection.at[counter, 'connection_3_after'] = connection_1_after_long_gap_3
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S1':
                        long_gap_connection.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection.at[counter, 'speaker'] = 'other'

                    counter += 1

                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'connection_3_before'] = connection_2_before_long_gap_3
                    long_gap_connection.at[counter, 'connection_2_before'] = connection_2_before_long_gap_2
                    long_gap_connection.at[counter, 'connection_1_before'] = connection_2_before_long_gap_1
                    long_gap_connection.at[counter, 'connection_long_gap'] = connection_2_during_long_gap
                    long_gap_connection.at[counter, 'connection_1_after'] = connection_2_after_long_gap_1
                    long_gap_connection.at[counter, 'connection_2_after'] = connection_2_after_long_gap_2
                    long_gap_connection.at[counter, 'connection_3_after'] = connection_2_after_long_gap_3
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S2':
                        long_gap_connection.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection.at[counter, 'speaker'] = 'other'

                    counter += 1
        
long_gap_connection['change_in_connection_3_before'] = long_gap_connection['connection_3_before'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_2_before'] = long_gap_connection['connection_2_before'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_1_before'] = long_gap_connection['connection_1_before'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_1_after'] = long_gap_connection['connection_1_after'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_2_after'] = long_gap_connection['connection_2_after'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_3_after'] = long_gap_connection['connection_3_after'] - long_gap_connection['connection_long_gap']

long_gap_connection.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_strangers.csv'),
                        encoding='utf-8', index=False)


## Friends

In [4]:
long_gap_connection_friends = pd.DataFrame()
long_gap_connection_friends = long_gap_connection_friends.fillna(0)
counter = 0

flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'friends', '*.csv'))

for file in flist:
    
    subID_S1, subID_S2 = assign_identity(file)
    convo = file.split('/')[-1].split('.csv')[0]
    
    data = pd.read_csv(file)
    
    long_gaps = data.loc[data['gap_length'] > 2000]
    
    if len(long_gaps) > 0:
        
        connection_1 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'friends', '{}_{}.csv'.format(subID_S1, subID_S2)))
        connection_2 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'friends', '{}_{}.csv'.format(subID_S2, subID_S1)))
        
        connection_1['time_msec'] = connection_1['adjustedTime'] * 1000
        connection_2['time_msec'] = connection_2['adjustedTime'] * 1000
        
        for i in list(long_gaps.index):
            
            if (i > 0):
                
                if (data.at[i-1, 'turn_end_msec'] > 6000) & (data.at[i, 'turn_start_msec'] < 594000): # might change and just give NaNs for ones that are too close

                    # the long gap
                    start_of_gap = data.at[i-1, 'turn_end_msec']
                    end_of_gap = data.at[i, 'turn_start_msec']
                    
                    connection_1_long_gap_subset = connection_1.loc[(connection_1['time_msec'] > start_of_gap) & (connection_1['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_1_during_long_gap = np.mean(connection_1_long_gap_subset['Rating'])
                    
                    connection_2_long_gap_subset = connection_2.loc[(connection_2['time_msec'] > start_of_gap) & (connection_2['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_2_during_long_gap = np.mean(connection_2_long_gap_subset['Rating'])

                    # 1 chunk before long gap

                    start_connection_before_1 = start_of_gap - 2000
                    end_connection_before_1 = start_of_gap
                    
                    connection_1_before_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_1) & (connection_1['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_1_before_long_gap_1 = np.mean(connection_1_before_1_subset['Rating'])
                    
                    connection_2_before_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_1) & (connection_2['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_2_before_long_gap_1 = np.mean(connection_2_before_1_subset['Rating'])

                    # 2 chunks before long gap

                    start_connection_before_2 = start_of_gap - 4000 
                    end_connection_before_2 = start_of_gap - 2000
                    
                    connection_1_before_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_2) & (connection_1['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_1_before_long_gap_2 = np.mean(connection_1_before_2_subset['Rating'])
                    
                    connection_2_before_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_2) & (connection_2['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_2_before_long_gap_2 = np.mean(connection_2_before_2_subset['Rating'])

                    # 3 chunks before long gap

                    start_connection_before_3 = start_of_gap - 6000
                    end_connection_before_3 = start_of_gap - 4000

                    connection_1_before_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_3) & (connection_1['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_1_before_long_gap_3 = np.mean(connection_1_before_3_subset['Rating'])
                    
                    connection_2_before_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_3) & (connection_2['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_2_before_long_gap_3 = np.mean(connection_2_before_3_subset['Rating'])
                    

                    # 1 chunk after long gap 

                    start_connection_after_1 = end_of_gap
                    end_connection_after_1 = end_of_gap + 2000
                    
                    connection_1_after_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_1) & (connection_1['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_1_after_long_gap_1 = np.mean(connection_1_after_1_subset['Rating'])
                    
                    connection_2_after_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_1) & (connection_2['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_2_after_long_gap_1 = np.mean(connection_2_after_1_subset['Rating'])

                    # 2 chunks after long gap 

                    start_connection_after_2 = end_of_gap + 2000 
                    end_connection_after_2 = end_of_gap + 4000
                    
                    connection_1_after_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_2) & (connection_1['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_1_after_long_gap_2 = np.mean(connection_1_after_2_subset['Rating'])
                    
                    connection_2_after_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_2) & (connection_2['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_2_after_long_gap_2 = np.mean(connection_2_after_2_subset['Rating'])

                    # 3 chunks after long gap 

                    start_connection_after_3 = end_of_gap + 4000
                    end_connection_after_3 = end_of_gap + 6000
                    
                    connection_1_after_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_3) & (connection_1['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_1_after_long_gap_3 = np.mean(connection_1_after_3_subset['Rating'])
                    
                    connection_2_after_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_3) & (connection_2['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_2_after_long_gap_3 = np.mean(connection_2_after_3_subset['Rating'])


                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'connection_3_before'] = connection_1_before_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_2_before'] = connection_1_before_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_1_before'] = connection_1_before_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_long_gap'] = connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'connection_1_after'] = connection_1_after_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_2_after'] = connection_1_after_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_3_after'] = connection_1_after_long_gap_3
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'turn_num'] = i
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S1':
                        long_gap_connection_friends.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection_friends.at[counter, 'speaker'] = 'other'

                    counter += 1

                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'connection_3_before'] = connection_2_before_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_2_before'] = connection_2_before_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_1_before'] = connection_2_before_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_long_gap'] = connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'connection_1_after'] = connection_2_after_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_2_after'] = connection_2_after_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_3_after'] = connection_2_after_long_gap_3
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'turn_num'] = i
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S2':
                        long_gap_connection_friends.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection_friends.at[counter, 'speaker'] = 'other'

                    counter += 1
        
long_gap_connection_friends['change_in_connection_3_before'] = long_gap_connection_friends['connection_3_before'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_2_before'] = long_gap_connection_friends['connection_2_before'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_1_before'] = long_gap_connection_friends['connection_1_before'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_1_after'] = long_gap_connection_friends['connection_1_after'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_2_after'] = long_gap_connection_friends['connection_2_after'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_3_after'] = long_gap_connection_friends['connection_3_after'] - long_gap_connection_friends['connection_long_gap']

long_gap_connection_friends.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_friends.csv'),
                        encoding='utf-8', index=False)


## combine stranger and friend data

In [5]:
long_gap_connection['condition'] = 'strangers'
long_gap_connection_friends['condition'] = 'friends'

long_gaps_all = pd.merge(long_gap_connection, long_gap_connection_friends, how='outer')

long_gaps_all.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_all.csv'),
                        encoding='utf-8', index=False)

# Output a long-form version for plotting purposes

In [12]:
long_gap_connection = pd.DataFrame()
long_gap_connection = long_gap_connection.fillna(0)
counter = 0

flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'strangers_pure', '*.csv'))

for file in flist:
    
    subID_S1, subID_S2 = assign_identity(file)
    convo = file.split('/')[-1].split('.csv')[0]
    
    data = pd.read_csv(file)
    
    long_gaps = data.loc[data['gap_length'] > 2000]
    
    if len(long_gaps) > 0:
        
        connection_1 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'strangers', '{}_{}.csv'.format(subID_S1, subID_S2)))
        connection_2 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'strangers', '{}_{}.csv'.format(subID_S2, subID_S1)))
        
        connection_1['time_msec'] = connection_1['adjustedTime'] * 1000
        connection_2['time_msec'] = connection_2['adjustedTime'] * 1000
        
        for i in list(long_gaps.index): 
            
            if (i > 0):
                
                if (data.at[i-1, 'turn_end_msec'] > 6000) & (data.at[i, 'turn_start_msec'] < 594000): # might change and just give NaNs for ones that are too close

                    # speaker info 
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S1':
                        speaker_S1 = 'self'
                        speaker_S2 = 'other'
                    else:
                        speaker_S1 = 'other'
                        speaker_S2 = 'self'
                    
                    # the long gap
                    
                    start_of_gap = data.at[i-1, 'turn_end_msec']
                    end_of_gap = data.at[i, 'turn_start_msec']
                    
                    connection_1_long_gap_subset = connection_1.loc[(connection_1['time_msec'] > start_of_gap) & (connection_1['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_1_during_long_gap = np.mean(connection_1_long_gap_subset['Rating'])
                    
                    connection_2_long_gap_subset = connection_2.loc[(connection_2['time_msec'] > start_of_gap) & (connection_2['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_2_during_long_gap = np.mean(connection_2_long_gap_subset['Rating'])
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = 'long_gap'
                    long_gap_connection.at[counter, 'connection'] = connection_1_during_long_gap
                    long_gap_connection.at[counter, 'connection_change'] = connection_1_during_long_gap - connection_1_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection.at[counter, 'turn_num'] = i

                    counter += 1
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = 'long_gap'
                    long_gap_connection.at[counter, 'connection'] = connection_2_during_long_gap
                    long_gap_connection.at[counter, 'connection_change'] = connection_2_during_long_gap - connection_2_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection.at[counter, 'turn_num'] = i

                    counter += 1
                    
                    # 1 chunk before long gap

                    start_connection_before_1 = start_of_gap - 2000
                    end_connection_before_1 = start_of_gap
                    
                    connection_1_before_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_1) & (connection_1['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_1_before_long_gap_1 = np.mean(connection_1_before_1_subset['Rating'])
                    
                    connection_2_before_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_1) & (connection_2['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_2_before_long_gap_1 = np.mean(connection_2_before_1_subset['Rating'])
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '1_before'
                    long_gap_connection.at[counter, 'connection'] = connection_1_before_long_gap_1
                    long_gap_connection.at[counter, 'connection_change'] = connection_1_before_long_gap_1 - connection_1_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '1_before'
                    long_gap_connection.at[counter, 'connection'] = connection_2_before_long_gap_1
                    long_gap_connection.at[counter, 'connection_change'] = connection_2_before_long_gap_1 - connection_2_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1

                    # 2 chunks before long gap

                    start_connection_before_2 = start_of_gap - 4000 
                    end_connection_before_2 = start_of_gap - 2000
                    
                    connection_1_before_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_2) & (connection_1['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_1_before_long_gap_2 = np.mean(connection_1_before_2_subset['Rating'])
                    
                    connection_2_before_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_2) & (connection_2['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_2_before_long_gap_2 = np.mean(connection_2_before_2_subset['Rating'])
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '2_before'
                    long_gap_connection.at[counter, 'connection'] = connection_1_before_long_gap_2
                    long_gap_connection.at[counter, 'connection_change'] = connection_1_before_long_gap_2 - connection_1_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '2_before'
                    long_gap_connection.at[counter, 'connection'] = connection_2_before_long_gap_2
                    long_gap_connection.at[counter, 'connection_change'] = connection_2_before_long_gap_2 - connection_2_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1

                    # 3 chunks before long gap

                    start_connection_before_3 = start_of_gap - 6000
                    end_connection_before_3 = start_of_gap - 4000

                    connection_1_before_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_3) & (connection_1['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_1_before_long_gap_3 = np.mean(connection_1_before_3_subset['Rating'])
                    
                    connection_2_before_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_3) & (connection_2['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_2_before_long_gap_3 = np.mean(connection_2_before_3_subset['Rating'])
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '3_before'
                    long_gap_connection.at[counter, 'connection'] = connection_1_before_long_gap_3
                    long_gap_connection.at[counter, 'connection_change'] = connection_1_before_long_gap_3 - connection_1_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '3_before'
                    long_gap_connection.at[counter, 'connection'] = connection_2_before_long_gap_3
                    long_gap_connection.at[counter, 'connection_change'] = connection_2_before_long_gap_3 - connection_2_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1

                    # 1 chunk after long gap 

                    start_connection_after_1 = end_of_gap
                    end_connection_after_1 = end_of_gap + 2000
                    
                    connection_1_after_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_1) & (connection_1['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_1_after_long_gap_1 = np.mean(connection_1_after_1_subset['Rating'])
                    
                    connection_2_after_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_1) & (connection_2['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_2_after_long_gap_1 = np.mean(connection_2_after_1_subset['Rating'])
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '1_after'
                    long_gap_connection.at[counter, 'connection'] = connection_1_after_long_gap_1
                    long_gap_connection.at[counter, 'connection_change'] = connection_1_after_long_gap_1 - connection_1_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '1_after'
                    long_gap_connection.at[counter, 'connection'] = connection_2_after_long_gap_1
                    long_gap_connection.at[counter, 'connection_change'] = connection_2_after_long_gap_1 - connection_2_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1

                    # 2 chunks after long gap 

                    start_connection_after_2 = end_of_gap + 2000 
                    end_connection_after_2 = end_of_gap + 4000
                    
                    connection_1_after_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_2) & (connection_1['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_1_after_long_gap_2 = np.mean(connection_1_after_2_subset['Rating'])
                    
                    connection_2_after_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_2) & (connection_2['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_2_after_long_gap_2 = np.mean(connection_2_after_2_subset['Rating'])
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '2_after'
                    long_gap_connection.at[counter, 'connection'] = connection_1_after_long_gap_2
                    long_gap_connection.at[counter, 'connection_change'] = connection_1_after_long_gap_2 - connection_1_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '2_after'
                    long_gap_connection.at[counter, 'connection'] = connection_2_after_long_gap_2
                    long_gap_connection.at[counter, 'connection_change'] = connection_2_after_long_gap_2 - connection_2_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1

                    # 3 chunks after long gap 

                    start_connection_after_3 = end_of_gap + 4000
                    end_connection_after_3 = end_of_gap + 6000
                    
                    connection_1_after_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_3) & (connection_1['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_1_after_long_gap_3 = np.mean(connection_1_after_3_subset['Rating'])
                    
                    connection_2_after_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_3) & (connection_2['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_2_after_long_gap_3 = np.mean(connection_2_after_3_subset['Rating'])

                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '3_after'
                    long_gap_connection.at[counter, 'connection'] = connection_1_after_long_gap_3
                    long_gap_connection.at[counter, 'connection_change'] = connection_1_after_long_gap_3 - connection_1_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'timepoint'] = '3_after'
                    long_gap_connection.at[counter, 'connection'] = connection_2_after_long_gap_3
                    long_gap_connection.at[counter, 'connection_change'] = connection_2_after_long_gap_3 - connection_2_during_long_gap
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
long_gap_connection.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_all_long_format_strangers.csv'),
                        encoding='utf-8', index=False)
                    

In [13]:
long_gap_connection_friends = pd.DataFrame()
long_gap_connection_friends = long_gap_connection_friends.fillna(0)
counter = 0

flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'friends', '*.csv'))

for file in flist:
    
    subID_S1, subID_S2 = assign_identity(file)
    convo = file.split('/')[-1].split('.csv')[0]
    
    data = pd.read_csv(file)
    
    long_gaps = data.loc[data['gap_length'] > 2000]
    
    if len(long_gaps) > 0:
        
        connection_1 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'friends', '{}_{}.csv'.format(subID_S1, subID_S2)))
        connection_2 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'friends', '{}_{}.csv'.format(subID_S2, subID_S1)))
        
        connection_1['time_msec'] = connection_1['adjustedTime'] * 1000
        connection_2['time_msec'] = connection_2['adjustedTime'] * 1000
        
        for i in list(long_gaps.index): 
            
            if (i > 0):
                
                if (data.at[i-1, 'turn_end_msec'] > 6000) & (data.at[i, 'turn_start_msec'] < 594000): # might change and just give NaNs for ones that are too close

                    # speaker info 
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S1':
                        speaker_S1 = 'self'
                        speaker_S2 = 'other'
                    else:
                        speaker_S1 = 'other'
                        speaker_S2 = 'self'
                        
                    # the long gap
                    
                    start_of_gap = data.at[i-1, 'turn_end_msec']
                    end_of_gap = data.at[i, 'turn_start_msec']
                    
                    connection_1_long_gap_subset = connection_1.loc[(connection_1['time_msec'] > start_of_gap) & (connection_1['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_1_during_long_gap = np.mean(connection_1_long_gap_subset['Rating'])
                    
                    connection_2_long_gap_subset = connection_2.loc[(connection_2['time_msec'] > start_of_gap) & (connection_2['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_2_during_long_gap = np.mean(connection_2_long_gap_subset['Rating'])
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = 'long_gap'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_1_during_long_gap - connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection_friends.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = 'long_gap'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_2_during_long_gap - connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection_friends.at[counter, 'turn_num'] = i
                    
                    counter += 1

                    # 1 chunk before long gap

                    start_connection_before_1 = start_of_gap - 2000
                    end_connection_before_1 = start_of_gap
                    
                    connection_1_before_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_1) & (connection_1['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_1_before_long_gap_1 = np.mean(connection_1_before_1_subset['Rating'])
                    
                    connection_2_before_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_1) & (connection_2['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_2_before_long_gap_1 = np.mean(connection_2_before_1_subset['Rating'])
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '1_before'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_1_before_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_1_before_long_gap_1 - connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection_friends.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '1_before'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_2_before_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_2_before_long_gap_1 - connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection_friends.at[counter, 'turn_num'] = i
                    
                    counter += 1

                    # 2 chunks before long gap

                    start_connection_before_2 = start_of_gap - 4000 
                    end_connection_before_2 = start_of_gap - 2000
                    
                    connection_1_before_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_2) & (connection_1['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_1_before_long_gap_2 = np.mean(connection_1_before_2_subset['Rating'])
                    
                    connection_2_before_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_2) & (connection_2['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_2_before_long_gap_2 = np.mean(connection_2_before_2_subset['Rating'])
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '2_before'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_1_before_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_1_before_long_gap_2 - connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection_friends.at[counter, 'turn_num'] = i
                    
                    counter += 1
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '2_before'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_2_before_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_2_before_long_gap_2 - connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1

                    # 3 chunks before long gap

                    start_connection_before_3 = start_of_gap - 6000
                    end_connection_before_3 = start_of_gap - 4000

                    connection_1_before_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_3) & (connection_1['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_1_before_long_gap_3 = np.mean(connection_1_before_3_subset['Rating'])
                    
                    connection_2_before_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_3) & (connection_2['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_2_before_long_gap_3 = np.mean(connection_2_before_3_subset['Rating'])
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '3_before'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_1_before_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_1_before_long_gap_3 - connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '3_before'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_2_before_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_2_before_long_gap_3 - connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1

                    # 1 chunk after long gap 

                    start_connection_after_1 = end_of_gap
                    end_connection_after_1 = end_of_gap + 2000
                    
                    connection_1_after_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_1) & (connection_1['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_1_after_long_gap_1 = np.mean(connection_1_after_1_subset['Rating'])
                    
                    connection_2_after_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_1) & (connection_2['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_2_after_long_gap_1 = np.mean(connection_2_after_1_subset['Rating'])
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '1_after'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_1_after_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_1_after_long_gap_1 - connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '1_after'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_2_after_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_2_after_long_gap_1 - connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1

                    # 2 chunks after long gap 

                    start_connection_after_2 = end_of_gap + 2000 
                    end_connection_after_2 = end_of_gap + 4000
                    
                    connection_1_after_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_2) & (connection_1['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_1_after_long_gap_2 = np.mean(connection_1_after_2_subset['Rating'])
                    
                    connection_2_after_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_2) & (connection_2['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_2_after_long_gap_2 = np.mean(connection_2_after_2_subset['Rating'])
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '2_after'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_1_after_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_1_after_long_gap_2 - connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '2_after'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_2_after_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_2_after_long_gap_2 - connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1

                    # 3 chunks after long gap 

                    start_connection_after_3 = end_of_gap + 4000
                    end_connection_after_3 = end_of_gap + 6000
                    
                    connection_1_after_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_3) & (connection_1['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_1_after_long_gap_3 = np.mean(connection_1_after_3_subset['Rating'])
                    
                    connection_2_after_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_3) & (connection_2['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_2_after_long_gap_3 = np.mean(connection_2_after_3_subset['Rating'])

                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '3_after'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_1_after_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_1_after_long_gap_3 - connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S1
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1
                    
                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'timepoint'] = '3_after'
                    long_gap_connection_friends.at[counter, 'connection'] = connection_2_after_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_change'] = connection_2_after_long_gap_3 - connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    long_gap_connection_friends.at[counter, 'speaker'] = speaker_S2
                    long_gap_connection_friends.at[counter, 'turn_num'] = i

                    
                    counter += 1
                    
long_gap_connection_friends.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_all_long_format_friends.csv'),
                        encoding='utf-8', index=False)
                    

In [14]:
long_gap_connection['condition'] = 'strangers'
long_gap_connection_friends['condition'] = 'friends'

long_gaps_all = pd.merge(long_gap_connection, long_gap_connection_friends, how='outer')

long_gaps_all.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_all_long_format.csv'),
                        encoding='utf-8', index=False)