As a preliminary investigation into the content of what participants tend to say near a long gap, we examined the text of the turns immediately preceding and immediately following each gap. 

Gaps were categorized based on their lengths (>2000ms (indicating a long gap) and ≤ 2000ms (all other gaps)). 

We coded for (i) the number of words (as a proxy for minimal responses) and (ii) the presence of a question mark (as a proxy for question asking). 

In [1]:
import pandas as pd 
import numpy as np
import os.path
import glob
from scipy import stats
import statsmodels

import matplotlib.pyplot as plt
import pandas as pd
%matplotlib inline
import seaborn as sns

base_dir = os.path.dirname(os.getcwd())
transcript_dir = '/Users/emma/Dropbox/Manuscripts/clicking/data/turn_taking' # text of transcripts
                                                                            # not publicly available
    

# Extract information from gaps > 2000ms (long gaps)

In [2]:
df_strangers = pd.DataFrame()
df_strangers = df_strangers.fillna(0)
counter = 0

file = os.path.join(base_dir, 'Analyses', 'long_gap_connection_strangers.csv')
data = pd.read_csv(file)

for i in range(len(data)):
    
    convo = data.at[i, 'dyad']
    turn = data.at[i, 'turn_num']
    
    text_file = pd.read_csv(os.path.join(transcript_dir, 'strangers', '{}.csv'.format(convo)))
    
    text_before = text_file.at[turn - 1, 'text']
    
    if len(text_before.split('?')) > 1:
        question_before = 'yes'
    else:
        question_before = 'no'
    
    df_strangers.at[counter, 'convo'] = convo
    df_strangers.at[counter, 'turn_num'] = turn
    df_strangers.at[counter, 'position'] = 'before'
    df_strangers.at[counter, 'text'] = text_before
    df_strangers.at[counter, 'question'] = question_before
    df_strangers.at[counter, 'word_count'] = len(text_before.split())
    
    counter += 1
    
    text_after = text_file.at[turn, 'text']
       
    if len(text_after.split('?')) > 1:
        question_after = 'yes'
    else:
        question_after = 'no'
        
    df_strangers.at[counter, 'convo'] = convo
    df_strangers.at[counter, 'turn_num'] = turn
    df_strangers.at[counter, 'position'] = 'after'
    df_strangers.at[counter, 'text'] = text_after
    df_strangers.at[counter, 'question'] = question_after
    df_strangers.at[counter, 'word_count'] = len(text_after.split())
    
    counter += 1

df_strangers = df_strangers.drop_duplicates(['convo','turn_num', 'position'],keep= 'last').reset_index(drop=True)
df_strangers['condition'] = 'strangers'
df_strangers['gap'] = 'long'


In [3]:
df_friends = pd.DataFrame()
df_friends = df_friends.fillna(0)
counter = 0

file = os.path.join(base_dir, 'Analyses', 'long_gap_connection_friends.csv')
data = pd.read_csv(file)

for i in range(len(data)):
    
    convo = data.at[i, 'dyad']
    turn = data.at[i, 'turn_num']
    
    text_file = pd.read_csv(os.path.join(transcript_dir, 'friends', '{}.csv'.format(convo)))
    
    text_before = text_file.at[turn - 1, 'text']
    
    if len(text_before.split('?')) > 1:
        question_before = 'yes'
    else:
        question_before = 'no'
    
    df_friends.at[counter, 'convo'] = convo
    df_friends.at[counter, 'turn_num'] = turn
    df_friends.at[counter, 'position'] = 'before'
    df_friends.at[counter, 'text'] = text_before
    df_friends.at[counter, 'question'] = question_before
    df_friends.at[counter, 'word_count'] = len(text_before.split())
    
    counter += 1
    
    text_after = text_file.at[turn, 'text']
       
    if len(text_after.split('?')) > 1:
        question_after = 'yes'
    else:
        question_after = 'no'
        
    df_friends.at[counter, 'convo'] = convo
    df_friends.at[counter, 'turn_num'] = turn
    df_friends.at[counter, 'position'] = 'after'
    df_friends.at[counter, 'text'] = text_after
    df_friends.at[counter, 'question'] = question_after
    df_friends.at[counter, 'word_count'] = len(text_after.split())
    
    counter += 1

df_friends = df_friends.drop_duplicates(['convo','turn_num', 'position'],keep= 'last').reset_index(drop=True)
df_friends['condition'] = 'friends'
df_friends['gap'] = 'long'


In [4]:
df_all = pd.concat([df_strangers, df_friends], ignore_index=True)

# Extract information from gaps ≤ 2000ms

In [6]:
df_strangers_comparision = pd.DataFrame()
df_strangers_comparision = df_strangers_comparision.fillna(0)
counter = 0

file = os.path.join(base_dir, 'Analyses', 'long_gap_connection_strangers.csv')
data = pd.read_csv(file)

for i in range(len(data)):
    
    convo = data.at[i, 'dyad']
    
    text_file = pd.read_csv(os.path.join(transcript_dir, 'strangers', '{}.csv'.format(convo)))
    
    not_long_gaps = text_file.loc[text_file['turn_latency'] < 2000]
    
    for i in list(not_long_gaps.index):
        
        if i > 1:
    
            text_before = text_file.at[i - 1, 'text']

            if len(text_before.split('?')) > 1:
                question_before = 'yes'
            else:
                question_before = 'no'
                
            df_strangers_comparision.at[counter, 'convo'] = convo
            df_strangers_comparision.at[counter, 'turn_num'] = i
            df_strangers_comparision.at[counter, 'position'] = 'before'
            df_strangers_comparision.at[counter, 'text'] = text_before
            df_strangers_comparision.at[counter, 'question'] = question_before
            df_strangers_comparision.at[counter, 'word_count'] = len(text_before.split())

            counter += 1
            
            text_after = text_file.at[i, 'text']

            if len(text_after.split('?')) > 1:
                question_after = 'yes'
            else:
                question_after = 'no'

            df_strangers_comparision.at[counter, 'convo'] = convo
            df_strangers_comparision.at[counter, 'turn_num'] = i
            df_strangers_comparision.at[counter, 'position'] = 'after'
            df_strangers_comparision.at[counter, 'text'] = text_after
            df_strangers_comparision.at[counter, 'question'] = question_after
            df_strangers_comparision.at[counter, 'word_count'] = len(text_after.split())

            counter += 1

df_strangers_comparision['condition'] = 'strangers'
df_strangers_comparision['gap'] = 'not_long'


In [11]:
df_friends_comparision = pd.DataFrame()
df_friends_comparision = df_friends_comparision.fillna(0)
counter = 0

file = os.path.join(base_dir, 'Analyses', 'long_gap_connection_friends.csv')
data = pd.read_csv(file)

for i in range(len(data)):
    
    convo = data.at[i, 'dyad']
    
    text_file = pd.read_csv(os.path.join(transcript_dir, 'friends', '{}.csv'.format(convo)))
    
    not_long_gaps = text_file.loc[text_file['turn_latency'] < 2000]
    
    for i in list(not_long_gaps.index):
        
        if i > 1:
    
            text_before = text_file.at[i - 1, 'text']

            if len(text_before.split('?')) > 1:
                question_before = 'yes'
            else:
                question_before = 'no'
                
            df_friends_comparision.at[counter, 'convo'] = convo
            df_friends_comparision.at[counter, 'turn_num'] = i
            df_friends_comparision.at[counter, 'position'] = 'before'
            df_friends_comparision.at[counter, 'text'] = text_before
            df_friends_comparision.at[counter, 'question'] = question_before
            df_friends_comparision.at[counter, 'word_count'] = len(text_before.split())

            counter += 1
            
            text_after = text_file.at[i, 'text']

            if len(text_after.split('?')) > 1:
                question_after = 'yes'
            else:
                question_after = 'no'

            df_friends_comparision.at[counter, 'convo'] = convo
            df_friends_comparision.at[counter, 'turn_num'] = i
            df_friends_comparision.at[counter, 'position'] = 'after'
            df_friends_comparision.at[counter, 'text'] = text_after
            df_friends_comparision.at[counter, 'question'] = question_after
            df_friends_comparision.at[counter, 'word_count'] = len(text_after.split())

            counter += 1

df_friends_comparision['condition'] = 'friends'
df_friends_comparision['gap'] = 'not_long'


In [12]:
df_all_comparison = pd.concat([df_strangers_comparision, df_friends_comparision], ignore_index=True)
df_total = pd.concat([df_all, df_all_comparison], ignore_index=True)

df_total.to_csv(os.path.join(base_dir, 'Analyses',
                              'semantic_content_all.csv'),
                        encoding='utf-8', index=False)


Note that the 'text' variable has been removed from the .csv file in this repo because it may contain identifying information about participants.