In two-person conversations, people typically take turns back and forth. After one person stops speaking, the other person begins speaking. Occasionally, the speaker who last spoke is the one who decides to speak again next. This would be an example of a ‘speaker-stay’ (vs a ‘speaker-switch’).

Here we examined the frequency of speaker-switches vs speaker-stays for the turns surrounding the long gaps in our stranger and friend datasets. 

In [1]:
import pandas as pd 
import numpy as np
import os.path
import glob
from scipy import stats
import statsmodels

base_dir = os.path.dirname(os.getcwd())

In [2]:
def find_video(file):
    """Need video name to use key in assign_idenity function.

    """
    id1 = file.split('/')[-1].split('_')[0]
    id2 = file.split('_')[-1].split('.csv')[0]
    video_name_1 = id1 + '_' + id2
    video_name_2 = id2 + '_' + id1
    return video_name_1, video_name_2


def assign_identity(file):
    """Use key to determine which subject ID corresponds to S1 (speaker 1)
    and which subject ID corresponds to S2 (speaker 2).

    """
    key = pd.read_csv(os.path.join(base_dir, 'Data', 'subject_id_key.csv'))
    video_name_1, video_name_2 = find_video(file)
    for i in range(len((key))):
        if (key.at[i,'video_name'] == video_name_1) | (key.at[i,'video_name'] == video_name_2):
            subID_S1 = key.at[i,'S1']
            subID_S2 = key.at[i,'S2']
            return subID_S1, subID_S2

In [3]:
long_gap_connection = pd.DataFrame()
long_gap_connection = long_gap_connection.fillna(0)
counter = 0

flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'strangers_pure', '*.csv'))

for file in flist:
    
    subID_S1, subID_S2 = assign_identity(file)
    convo = file.split('/')[-1].split('.csv')[0]
    
    data = pd.read_csv(file)
    
    long_gaps = data.loc[data['gap_length'] > 2000]
    
    if len(long_gaps) > 0:
        
        connection_1 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'strangers', '{}_{}.csv'.format(subID_S1, subID_S2)))
        connection_2 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'strangers', '{}_{}.csv'.format(subID_S2, subID_S1)))
        
        connection_1['time_msec'] = connection_1['adjustedTime'] * 1000
        connection_2['time_msec'] = connection_2['adjustedTime'] * 1000
        
        for i in list(long_gaps.index):
            
            if (i > 0):
                
                if (data.at[i-1, 'turn_end_msec'] > 6000) & (data.at[i, 'turn_start_msec'] < 594000): # might change and just give NaNs for ones that are too close
                    
                    # check if there was a speaker switch
                    speaker_current = data.at[i, 'speaker']
                    speaker_previous = data.at[i-1, 'speaker']
                    
                    if speaker_current == speaker_previous:
                        speaker_switch = 'no'
                    else:
                        speaker_switch = 'yes'
                    
                    # the long gap
                    start_of_gap = data.at[i-1, 'turn_end_msec']
                    end_of_gap = data.at[i, 'turn_start_msec']
                    
                    connection_1_long_gap_subset = connection_1.loc[(connection_1['time_msec'] > start_of_gap) & (connection_1['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_1_during_long_gap = np.mean(connection_1_long_gap_subset['Rating'])
                    
                    connection_2_long_gap_subset = connection_2.loc[(connection_2['time_msec'] > start_of_gap) & (connection_2['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_2_during_long_gap = np.mean(connection_2_long_gap_subset['Rating'])

                    # 1 chunk before long gap

                    start_connection_before_1 = start_of_gap - 2000
                    end_connection_before_1 = start_of_gap
                    
                    connection_1_before_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_1) & (connection_1['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_1_before_long_gap_1 = np.mean(connection_1_before_1_subset['Rating'])
                    
                    connection_2_before_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_1) & (connection_2['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_2_before_long_gap_1 = np.mean(connection_2_before_1_subset['Rating'])

                    # 2 chunks before long gap

                    start_connection_before_2 = start_of_gap - 4000 
                    end_connection_before_2 = start_of_gap - 2000
                    
                    connection_1_before_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_2) & (connection_1['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_1_before_long_gap_2 = np.mean(connection_1_before_2_subset['Rating'])
                    
                    connection_2_before_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_2) & (connection_2['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_2_before_long_gap_2 = np.mean(connection_2_before_2_subset['Rating'])

                    # 3 chunks before long gap

                    start_connection_before_3 = start_of_gap - 6000
                    end_connection_before_3 = start_of_gap - 4000

                    connection_1_before_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_3) & (connection_1['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_1_before_long_gap_3 = np.mean(connection_1_before_3_subset['Rating'])
                    
                    connection_2_before_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_3) & (connection_2['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_2_before_long_gap_3 = np.mean(connection_2_before_3_subset['Rating'])
                    

                    # 1 chunk after long gap 

                    start_connection_after_1 = end_of_gap
                    end_connection_after_1 = end_of_gap + 2000
                    
                    connection_1_after_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_1) & (connection_1['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_1_after_long_gap_1 = np.mean(connection_1_after_1_subset['Rating'])
                    
                    connection_2_after_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_1) & (connection_2['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_2_after_long_gap_1 = np.mean(connection_2_after_1_subset['Rating'])

                    # 2 chunks after long gap 

                    start_connection_after_2 = end_of_gap + 2000 
                    end_connection_after_2 = end_of_gap + 4000
                    
                    connection_1_after_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_2) & (connection_1['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_1_after_long_gap_2 = np.mean(connection_1_after_2_subset['Rating'])
                    
                    connection_2_after_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_2) & (connection_2['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_2_after_long_gap_2 = np.mean(connection_2_after_2_subset['Rating'])

                    # 3 chunks after long gap 

                    start_connection_after_3 = end_of_gap + 4000
                    end_connection_after_3 = end_of_gap + 6000
                    
                    connection_1_after_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_3) & (connection_1['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_1_after_long_gap_3 = np.mean(connection_1_after_3_subset['Rating'])
                    
                    connection_2_after_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_3) & (connection_2['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_2_after_long_gap_3 = np.mean(connection_2_after_3_subset['Rating'])


                    long_gap_connection.at[counter, 'subID'] = subID_S1
                    long_gap_connection.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'connection_3_before'] = connection_1_before_long_gap_3
                    long_gap_connection.at[counter, 'connection_2_before'] = connection_1_before_long_gap_2
                    long_gap_connection.at[counter, 'connection_1_before'] = connection_1_before_long_gap_1
                    long_gap_connection.at[counter, 'connection_long_gap'] = connection_1_during_long_gap
                    long_gap_connection.at[counter, 'connection_1_after'] = connection_1_after_long_gap_1
                    long_gap_connection.at[counter, 'connection_2_after'] = connection_1_after_long_gap_2
                    long_gap_connection.at[counter, 'connection_3_after'] = connection_1_after_long_gap_3
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S1':
                        long_gap_connection.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection.at[counter, 'speaker'] = 'other'
                        
                    long_gap_connection.at[counter, 'speaker_switch'] = speaker_switch

                    counter += 1

                    long_gap_connection.at[counter, 'subID'] = subID_S2
                    long_gap_connection.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection.at[counter, 'dyad'] = convo
                    long_gap_connection.at[counter, 'connection_3_before'] = connection_2_before_long_gap_3
                    long_gap_connection.at[counter, 'connection_2_before'] = connection_2_before_long_gap_2
                    long_gap_connection.at[counter, 'connection_1_before'] = connection_2_before_long_gap_1
                    long_gap_connection.at[counter, 'connection_long_gap'] = connection_2_during_long_gap
                    long_gap_connection.at[counter, 'connection_1_after'] = connection_2_after_long_gap_1
                    long_gap_connection.at[counter, 'connection_2_after'] = connection_2_after_long_gap_2
                    long_gap_connection.at[counter, 'connection_3_after'] = connection_2_after_long_gap_3
                    long_gap_connection.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S2':
                        long_gap_connection.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection.at[counter, 'speaker'] = 'other'

                    long_gap_connection.at[counter, 'speaker_switch'] = speaker_switch
                    
                    counter += 1
                    
                    
        
long_gap_connection['change_in_connection_3_before'] = long_gap_connection['connection_3_before'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_2_before'] = long_gap_connection['connection_2_before'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_1_before'] = long_gap_connection['connection_1_before'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_1_after'] = long_gap_connection['connection_1_after'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_2_after'] = long_gap_connection['connection_2_after'] - long_gap_connection['connection_long_gap']
long_gap_connection['change_in_connection_3_after'] = long_gap_connection['connection_3_after'] - long_gap_connection['connection_long_gap']

long_gap_connection.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_strangers_switches.csv'),
                        encoding='utf-8', index=False)


In [4]:
long_gap_connection_friends = pd.DataFrame()
long_gap_connection_friends = long_gap_connection_friends.fillna(0)
counter = 0

flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'friends', '*.csv'))

for file in flist:
    
    subID_S1, subID_S2 = assign_identity(file)
    convo = file.split('/')[-1].split('.csv')[0]
    
    data = pd.read_csv(file)
    
    long_gaps = data.loc[data['gap_length'] > 2000]
    
    if len(long_gaps) > 0:
        
        connection_1 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'friends', '{}_{}.csv'.format(subID_S1, subID_S2)))
        connection_2 = pd.read_csv(os.path.join(base_dir, 'Data', 'continuous_connection_ratings', 'friends', '{}_{}.csv'.format(subID_S2, subID_S1)))
        
        connection_1['time_msec'] = connection_1['adjustedTime'] * 1000
        connection_2['time_msec'] = connection_2['adjustedTime'] * 1000
        
        for i in list(long_gaps.index):
            
            if (i > 0):
                
                if (data.at[i-1, 'turn_end_msec'] > 6000) & (data.at[i, 'turn_start_msec'] < 594000): # might change and just give NaNs for ones that are too close

                    # check if there was a speaker switch
                    speaker_current = data.at[i, 'speaker']
                    speaker_previous = data.at[i-1, 'speaker']
                    
                    if speaker_current == speaker_previous:
                        speaker_switch = 'no'
                    else:
                        speaker_switch = 'yes'
                    
                    # the long gap
                    start_of_gap = data.at[i-1, 'turn_end_msec']
                    end_of_gap = data.at[i, 'turn_start_msec']
                    
                    connection_1_long_gap_subset = connection_1.loc[(connection_1['time_msec'] > start_of_gap) & (connection_1['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_1_during_long_gap = np.mean(connection_1_long_gap_subset['Rating'])
                    
                    connection_2_long_gap_subset = connection_2.loc[(connection_2['time_msec'] > start_of_gap) & (connection_2['time_msec'] < end_of_gap)].reset_index(drop=True)
                    connection_2_during_long_gap = np.mean(connection_2_long_gap_subset['Rating'])

                    # 1 chunk before long gap

                    start_connection_before_1 = start_of_gap - 2000
                    end_connection_before_1 = start_of_gap
                    
                    connection_1_before_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_1) & (connection_1['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_1_before_long_gap_1 = np.mean(connection_1_before_1_subset['Rating'])
                    
                    connection_2_before_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_1) & (connection_2['time_msec'] < end_connection_before_1)].reset_index(drop=True)
                    connection_2_before_long_gap_1 = np.mean(connection_2_before_1_subset['Rating'])

                    # 2 chunks before long gap

                    start_connection_before_2 = start_of_gap - 4000 
                    end_connection_before_2 = start_of_gap - 2000
                    
                    connection_1_before_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_2) & (connection_1['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_1_before_long_gap_2 = np.mean(connection_1_before_2_subset['Rating'])
                    
                    connection_2_before_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_2) & (connection_2['time_msec'] < end_connection_before_2)].reset_index(drop=True)
                    connection_2_before_long_gap_2 = np.mean(connection_2_before_2_subset['Rating'])

                    # 3 chunks before long gap

                    start_connection_before_3 = start_of_gap - 6000
                    end_connection_before_3 = start_of_gap - 4000

                    connection_1_before_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_before_3) & (connection_1['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_1_before_long_gap_3 = np.mean(connection_1_before_3_subset['Rating'])
                    
                    connection_2_before_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_before_3) & (connection_2['time_msec'] < end_connection_before_3)].reset_index(drop=True)
                    connection_2_before_long_gap_3 = np.mean(connection_2_before_3_subset['Rating'])
                    

                    # 1 chunk after long gap 

                    start_connection_after_1 = end_of_gap
                    end_connection_after_1 = end_of_gap + 2000
                    
                    connection_1_after_1_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_1) & (connection_1['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_1_after_long_gap_1 = np.mean(connection_1_after_1_subset['Rating'])
                    
                    connection_2_after_1_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_1) & (connection_2['time_msec'] < end_connection_after_1)].reset_index(drop=True)
                    connection_2_after_long_gap_1 = np.mean(connection_2_after_1_subset['Rating'])

                    # 2 chunks after long gap 

                    start_connection_after_2 = end_of_gap + 2000 
                    end_connection_after_2 = end_of_gap + 4000
                    
                    connection_1_after_2_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_2) & (connection_1['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_1_after_long_gap_2 = np.mean(connection_1_after_2_subset['Rating'])
                    
                    connection_2_after_2_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_2) & (connection_2['time_msec'] < end_connection_after_2)].reset_index(drop=True)
                    connection_2_after_long_gap_2 = np.mean(connection_2_after_2_subset['Rating'])

                    # 3 chunks after long gap 

                    start_connection_after_3 = end_of_gap + 4000
                    end_connection_after_3 = end_of_gap + 6000
                    
                    connection_1_after_3_subset = connection_1.loc[(connection_1['time_msec'] > start_connection_after_3) & (connection_1['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_1_after_long_gap_3 = np.mean(connection_1_after_3_subset['Rating'])
                    
                    connection_2_after_3_subset = connection_2.loc[(connection_2['time_msec'] > start_connection_after_3) & (connection_2['time_msec'] < end_connection_after_3)].reset_index(drop=True)
                    connection_2_after_long_gap_3 = np.mean(connection_2_after_3_subset['Rating'])


                    long_gap_connection_friends.at[counter, 'subID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'connection_3_before'] = connection_1_before_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_2_before'] = connection_1_before_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_1_before'] = connection_1_before_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_long_gap'] = connection_1_during_long_gap
                    long_gap_connection_friends.at[counter, 'connection_1_after'] = connection_1_after_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_2_after'] = connection_1_after_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_3_after'] = connection_1_after_long_gap_3
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S1':
                        long_gap_connection_friends.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection_friends.at[counter, 'speaker'] = 'other'
                        
                    long_gap_connection_friends.at[counter, 'speaker_switch'] = speaker_switch

                    counter += 1

                    long_gap_connection_friends.at[counter, 'subID'] = subID_S2
                    long_gap_connection_friends.at[counter, 'partnerID'] = subID_S1
                    long_gap_connection_friends.at[counter, 'dyad'] = convo
                    long_gap_connection_friends.at[counter, 'connection_3_before'] = connection_2_before_long_gap_3
                    long_gap_connection_friends.at[counter, 'connection_2_before'] = connection_2_before_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_1_before'] = connection_2_before_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_long_gap'] = connection_2_during_long_gap
                    long_gap_connection_friends.at[counter, 'connection_1_after'] = connection_2_after_long_gap_1
                    long_gap_connection_friends.at[counter, 'connection_2_after'] = connection_2_after_long_gap_2
                    long_gap_connection_friends.at[counter, 'connection_3_after'] = connection_2_after_long_gap_3
                    long_gap_connection_friends.at[counter, 'gap_length'] = data.at[i, 'gap_length']
                    
                    speaker = data.at[i, 'speaker']
                    if speaker == 'S2':
                        long_gap_connection_friends.at[counter, 'speaker'] = 'self'
                    else:
                        long_gap_connection_friends.at[counter, 'speaker'] = 'other'
                        
                    long_gap_connection_friends.at[counter, 'speaker_switch'] = speaker_switch

                    counter += 1
                          
        
long_gap_connection_friends['change_in_connection_3_before'] = long_gap_connection_friends['connection_3_before'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_2_before'] = long_gap_connection_friends['connection_2_before'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_1_before'] = long_gap_connection_friends['connection_1_before'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_1_after'] = long_gap_connection_friends['connection_1_after'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_2_after'] = long_gap_connection_friends['connection_2_after'] - long_gap_connection_friends['connection_long_gap']
long_gap_connection_friends['change_in_connection_3_after'] = long_gap_connection_friends['connection_3_after'] - long_gap_connection_friends['connection_long_gap']

long_gap_connection_friends.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_friends_switches.csv'),
                        encoding='utf-8', index=False)


In [5]:
long_gap_connection['condition'] = 'strangers'
long_gap_connection_friends['condition'] = 'friends'

long_gaps_all = pd.merge(long_gap_connection, long_gap_connection_friends, how='outer')

long_gaps_all.to_csv(os.path.join(base_dir, 'Analyses',
                              'long_gap_connection_all_switches.csv'),
                        encoding='utf-8', index=False)

# Get frequencies (overall and for each conversation type)

In [6]:
strangers_switch = long_gap_connection.loc[long_gap_connection['speaker_switch'] == 'yes'].reset_index(drop=True)
strangers_no_switch = long_gap_connection.loc[long_gap_connection['speaker_switch'] == 'no'].reset_index(drop=True)

print(len(strangers_switch)/2)
print(len(strangers_no_switch)/2)
print((len(strangers_no_switch)/2) / (len(long_gap_connection)/2))


248.0
14.0
0.05343511450381679


In [7]:
friends_switch = long_gap_connection_friends.loc[long_gap_connection_friends['speaker_switch'] == 'yes'].reset_index(drop=True)
friends_no_switch = long_gap_connection_friends.loc[long_gap_connection_friends['speaker_switch'] == 'no'].reset_index(drop=True)

print(len(friends_switch)/2)
print(len(friends_no_switch)/2)
print((len(friends_no_switch)/2) / (len(long_gap_connection_friends)/2))


183.0
19.0
0.09405940594059406


In [8]:
(14+19) / (262+202)

0.07112068965517242