In [38]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

df = pd.read_csv('raw_experiments.csv', index_col=0)
print(df.columns)

Index(['experiment', 'human_role', 'map', 'pid', 'game_time',
       'is_time_success', 'user_num_of_uter', 'user_mean_uter',
       'user_total_uter', 'user_num_of_en', 'user_num_of_es',
       'user_num_of_mix', 'user_num_of_inter_cs', '% entrainment - all dialog',
       '% entrainment - on bot inter-sentential cs', 'bot_num_of_uter',
       'bot_mean_uter', 'bot_total_uter', 'bot_num_of_en', 'bot_num_of_es',
       'bot_num_of_mix', 'bot_num_of_inter_cs',
       'How much did you enjoy the task?',
       'How difficult was it to communicate with your partner?',
       'How successful do you think you were at completing the task?',
       'How difficult was it to understand your partner's instructions?',
       'How likely is your partner to be a fluent speaker of English?',
       'How likely is your partner to be a fluent speaker of Spanish?',
       'How likely do you think it is that your partner is bilingual?',
       'Please rate your partner according to the following attribu

In [39]:
def get_experiment_df(exp: list[str], role = 'all'):
#     new_df = df[df['experiment'] == exp]
    new_df = df[df['experiment'].isin(exp)]
    if role == 'all':
        return new_df
    return new_df[new_df['human_role'] == role]

def fix_lng_cols(text):
    if type(text) == float:
        return 'NONE'
    
    text = text.strip().lower()
    if text in ['english', 'enlgish', 'englis', 'eng']:
        return 'ENG'
    if text in ['spanish', 'portuguese']:
        return 'ES'
    if 'eng' in text and 'spa' in text:
        return 'BOTH'
    else:
        return 'NONE'

def remove_rename_columns(df, remove=True):
    questions_rename = {'How much did you enjoy the task?' : 'Q_enjoy',
                        "How successful do you think you were at completing the task?" : 'Q_successful',
                        "How likely would you be to mix languages in the following contexts? Interacting with friends": "Q_mix_friends",
                        'Do you enjoy mixing languages in conversation?': "Q_enjoy_mix"
                       }
    if remove:
        remove_cols = ['human_role', 'experiment', 'map', 'pid']
        all_cols = list(df.columns)
        remove_cols.extend(list(filter(lambda x: 'bot_' in x, all_cols)))
        df = df.drop(remove_cols, axis=1)
    
    df = df.rename(columns=questions_rename, errors="raise")
    
    
    
    df["English %"] = df["user_num_of_en"] / df["user_num_of_uter"] *100 
    df["Spanish %"] = df["user_num_of_es"] / df["user_num_of_uter"]  *100
    df["Mix %"] = df["user_num_of_mix"] / df["user_num_of_uter"]  *100
    df["Inter-Sentential CS %"] = df["user_num_of_inter_cs"] / (df["user_num_of_uter"] - 1)  *100

    df['% entrainment - on bot inter-sentential cs'] = df['% entrainment - on bot inter-sentential cs'] * 100
    df['% entrainment - all dialog'] = df['% entrainment - all dialog'] * 100
    df['Q_mix_friends'] = df['Q_mix_friends'].astype(int)
    df['Q_enjoy_mix'] = df['Q_enjoy_mix'].astype(int)
    
    df['lng_1'] = df['Enter your native language, or the language you are providing answers for, here:'].apply(fix_lng_cols)
    df['lng_2'] = df['Enter your most proficient second language here:'].apply(fix_lng_cols)
        
    
    return df


In [40]:
def plot_corr_matt(df, title):
    f = plt.figure(figsize=(15, 15))
    plt.matshow(df.corr(), fignum=f.number)
    plt.xticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=14, rotation=45)
    plt.yticks(range(df.select_dtypes(['number']).shape[1]), df.select_dtypes(['number']).columns, fontsize=14)
    cb = plt.colorbar()
    cb.ax.tick_params(labelsize=14)
    plt.title(f'{title} - Correlation Matrix', fontsize=16)

    
def plot_2d_scatter(df, title, x, y, color, size):
    fig = px.scatter(df, x=x, y=y, color=color, size=size, title=title)
    fig.show()

In [4]:
experiments = ['Baseline', 'Random CS', 'Short-context CS', 'Alignment CS', 'Adversarial CS']
feature = 'user_num_of_mix'#"% entrainment - all dialog"
for exp in experiments:
    exp_df = get_experiment_df([exp])
    exp_df = remove_rename_columns(exp_df)
#     plot_corr_matt(exp_df, exp)
    
#     for role in ['navigator', 'instructor']:
#         exp_df = get_experiment_df([exp], role)
#         exp_df = remove_rename_columns(exp_df)
#         plot_corr_matt(exp_df, f'{exp} - {role}')
        
#         if role == 'navigator':
#             plot_2d_scatter(df=exp_df,
#             x=feature,
#             y="dist_score",
#             color='is_time_success',
#             size='game_time',
#             title=f'{exp} - {role}')
        
        
#         plot_2d_scatter(df=exp_df,
#         x=feature,
#         y="Q_enjoy",
#         color='is_time_success',
#         size='game_time',
#         title=f'{exp} - {role}')

In [124]:
df_copy = get_experiment_df(experiments)
df_copy = remove_rename_columns(df_copy, False)
print(df_copy.shape)


# plot_corr_matt(df_copy, 'all')
plot_2d_scatter(df=df_copy,
        x="Q_mix_friends",
        y="Mix %",
        color='experiment',
        size='game_time',
        title=f'all')

plot_2d_scatter(df=df_copy,
        x="Q_enjoy_mix",
        y="Mix %",
        color='experiment',
        size='game_time',
        title=f'all')

plot_2d_scatter(df=df_copy,
        x="Q_enjoy_mix",
        y="lng_1",
        color='experiment',
        size='game_time',
        title=f'all')

# for role in ['navigator', 'instructor']:
#     exp_df = get_experiment_df(experiments, role)
#     exp_df = remove_rename_columns(exp_df, False)
# #     plot_corr_matt(exp_df, f'all - {role}')

#     if role == 'navigator':
#         plot_2d_scatter(df=exp_df,
#         x="% entrainment - all dialog",
#         y="dist_score",
#         color='experiment',
#         size='game_time',
#         title=f'all - {role}')


#     plot_2d_scatter(df=exp_df,
#     x="% entrainment - all dialog",
#     y="Q_enjoy",
#     color='experiment',
#     size='game_time',
#     title=f'all - {role}')

(593, 89)


In [129]:
# df['Enter your native language, or the language you are providing answers for, here:'].value_counts()
df_copy['lng_2'].value_counts()

ES      346
ENG     207
NONE     40
Name: lng_2, dtype: int64

In [133]:
px.bar(df_copy,
             x='experiment',
             color='lng_2',
#                y='Q_enjoy',
               barmode="group"
            ).show()

px.histogram(df_copy,
             x='lng_2',
#              y='Q_enjoy',
             color='experiment',
             text_auto='.2f',
             histfunc='avg',
             barmode="group",
            ).show()
# px.bar(df_copy,
#              x='experiment',
#              color='lng_2',
#                barmode="group"
#             ).show()

Experiment comparison

In [44]:
features=['English %', 'Spanish %', 'Mix %', "Inter-Sentential CS %"]
px.histogram(df_copy,
             x='experiment',
             y=features,
             text_auto='.2f',
             histfunc='avg',
             barmode="group",
            ).show()


px.histogram(df_copy,
             x='experiment',
             y=['% entrainment - all dialog','% entrainment - on bot inter-sentential cs'],
             text_auto='.2f',
             histfunc='avg',
             barmode="group",
            ).show()

features=['Q_successful', 'Q_enjoy', 'Q_mix_friends', 'Q_enjoy_mix']
px.histogram(df_copy,
             x='experiment',
             y=features,
             text_auto='.2f',
             histfunc='avg',
             barmode="group",
            ).show()

features=['is_time_success', 'dist_score']
px.histogram(df_copy,
             x='experiment',
             y=features,
             text_auto='.2f',
             histfunc='avg',
             barmode="group",
            ).show()