In [1]:
import numpy as np
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline
sns.set_style("white")

base_dir = os.path.dirname(os.getcwd())

# Count the number of long gaps in each conversation

Record the number of long gaps and the total number of turns to compute frequency. Use different cutoffs for defining a long gap. Main text defines gaps over 2000ms as long.

In [2]:
long_gaps = pd.DataFrame()
long_gaps = long_gaps.fillna(0)
counter = 0

cutoffs = [500, 750, 1000, 1250, 1500, 1750, 2000,
          2250, 2500, 2750, 3000]

folders = ['strangers_pure', 'friends']

for folder in folders:
    
    flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', '{}'.format(folder), '*.csv'))

    for file in flist:

        name = file.split('/')[-1].split('.csv')[0]
        id_1 = file.split('/')[-1].split('_')[0]
        id_2 = file.split('_')[-1].split('.csv')[0]

        data = pd.read_csv(file)
        n_total = len(data)
        
        long_gaps.at[counter, 'convo'] = name
        long_gaps.at[counter, 'subID'] = id_1
        long_gaps.at[counter, 'partnerID'] = id_2
        long_gaps.at[counter, 'n_total'] = n_total
        long_gaps.at[counter, 'condition'] = folder
        
        for cutoff in cutoffs:

            turns_long = data.loc[data['gap_length'] > cutoff].reset_index(drop=True) 

            long_gaps.at[counter, 'n_long_{}'.format(cutoff)] = len(turns_long)
            long_gaps.at[counter, 'freq_long_{}'.format(cutoff)] = len(turns_long) / n_total

        counter += 1
        
        long_gaps.at[counter, 'convo'] = name
        long_gaps.at[counter, 'subID'] = id_2
        long_gaps.at[counter, 'partnerID'] = id_1
        long_gaps.at[counter, 'n_total'] = n_total
        long_gaps.at[counter, 'condition'] = folder
        
        for cutoff in cutoffs:

            turns_long = data.loc[data['gap_length'] > cutoff].reset_index(drop=True) 

            long_gaps.at[counter, 'n_long_{}'.format(cutoff)] = len(turns_long)
            long_gaps.at[counter, 'freq_long_{}'.format(cutoff)] = len(turns_long) / n_total

        counter += 1

long_gaps.to_csv(os.path.join(base_dir, 'Analyses', 'long_gap_counts.csv'), encoding='utf-8', index=False)


# How frequent are long gaps in each threshold?

In [81]:
cutoffs = [500, 750, 1000, 1250, 1500, 1750, 2000,
          2250, 2500, 2750, 3000]

strangers = long_gaps.loc[long_gaps['condition'] == 'strangers_pure'].reset_index(drop=True)
friends = long_gaps.loc[long_gaps['condition'] == 'friends'].reset_index(drop=True)

n_turns_total_strangers = np.sum(strangers['n_total']) / 2
n_turns_total_friends = np.sum(friends['n_total']) / 2

for cutoff in cutoffs:
    
    n_turns_cutoff_strangers = np.sum(strangers['n_long_{}'.format(cutoff)]) / 2
    n_turns_cutoff_friends = np.sum(friends['n_long_{}'.format(cutoff)]) / 2
    
    print(cutoff)
    print('strangers')
    print(n_turns_cutoff_strangers)
    print(n_turns_cutoff_strangers / n_turns_total_strangers)
    print('friends')
    print(n_turns_cutoff_friends)
    print(n_turns_cutoff_friends / n_turns_total_friends)
    print('_________')
    
    

500
strangers
8508.0
0.21996432172496705
friends
2812.0
0.2940192388122125
_________
750
strangers
4197.0
0.1085084929806872
friends
1717.0
0.17952739439565035
_________
1000
strangers
2316.0
0.05987745288140851
friends
1122.0
0.11731493099121706
_________
1250
strangers
1315.0
0.03399777657126606
friends
741.0
0.0774780426599749
_________
1500
strangers
755.0
0.019519635978179375
friends
476.0
0.049769970723546636
_________
1750
strangers
455.0
0.011763489231882933
friends
343.0
0.03586365537432037
_________
2000
strangers
274.0
0.007083947361617415
friends
218.0
0.022793810121288165
_________
2250
strangers
178.0
0.004601980402802555
friends
150.0
0.015683814303638646
_________
2500
strangers
122.0
0.0031541663434938856
friends
108.0
0.011292346298619825
_________
2750
strangers
84.0
0.0021717210889630034
friends
85.0
0.008887494772061898
_________
3000
strangers
66.0
0.0017063522841852168
friends
70.0
0.007319113341698034
_________


# Plot individual distributions

In [193]:
flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'strangers_pure', '*.csv'))

for file in flist:

    name = file.split('/')[-1].split('.csv')[0]

    data = pd.read_csv(file)
    
    ax=sns.histplot(x=data['gap_length'], color='#f9c74f', alpha=1, kde=False, binwidth=100, binrange=(-2000,2000))
    ax=sns.histplot(x=data['gap_length'], color='#f94144', alpha=1, kde=False, binwidth=100, binrange=(2000,5000))
    sns.despine()
    plt.title(name)

    ax.set_xlabel('gap lengths (ms)', fontsize=15, labelpad=10)
    ax.set_ylabel('count', fontsize=15, labelpad=10)
    ax.tick_params(labelsize=12)
    ax.set(xlim=(-2000, 5000))
    ax.set(ylim=(0, 22))
    
    plt.axvline(x=2000, color='#277da1', linestyle='dotted')
    
    plt.tight_layout()
    
    plt.savefig(os.path.join(base_dir,'Figures','individual_distributions',
                             'strangers', '{}.pdf'.format(name)), dpi=300)
    plt.close()
    

In [189]:
flist = glob.glob(os.path.join(base_dir, 'Analyses', 'turn_taking', 'friends', '*.csv'))

for file in flist:

    name = file.split('/')[-1].split('.csv')[0]

    data = pd.read_csv(file)
    
    ax=sns.histplot(x=data['gap_length'], color='#f9c74f', alpha=1, kde=False, binwidth=100, binrange=(-2000,2000))
    ax=sns.histplot(x=data['gap_length'], color='#f94144', alpha=1, kde=False, binwidth=100, binrange=(2000,5000))
    sns.despine()
    plt.title(name)

    ax.set_xlabel('gap lengths (ms)', fontsize=15, labelpad=10)
    ax.set_ylabel('count', fontsize=15, labelpad=10)
    ax.tick_params(labelsize=12)
    ax.set(xlim=(-2000, 5000))
    
    plt.axvline(x=2000, color='#277da1', linestyle='dotted')
    
    plt.tight_layout()
    
    plt.savefig(os.path.join(base_dir,'Figures','individual_distributions',
                             'friends', '{}.pdf'.format(name)), dpi=300)
    plt.close()
    