In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats
from scipy.stats import chi2_contingency

from functions import *

import warnings
warnings.filterwarnings('ignore')

In [33]:
data = pd.read_csv('data/survey_data.csv')

In [None]:
# count unique user_ids
print(f'Unique user_ids: {data.user_id.nunique()}\n')

# get average size of each group
print(f'Average group size: {data.groupby("treatment").size().mean()}\n')

# get average size of each group and topic
print(f'Average group size by topic: {data.groupby(["treatment", "topic"]).size().mean()}\n')

### Demographics

In [None]:
demographics_subset = data[['user_id', 'treatment', 'age', 'gender', 'education', 'politics']]
demographics_subset = demographics_subset.drop_duplicates()

# translate education
education_dict = {
    'Hochschulabschluss': 'University degree',
    'Abitur': 'A-levels',
    'Realschulabschluss': 'High school',
    'Hauptschulabschluss': 'Secondary school',
    'Kein Abschluss': 'No degree',
    'Keine Angabe': 'No information'
}

education_dict = {
    'Hochschulabschluss': 'University degree',
    'Abitur': 'High school diploma',
    'Realschulabschluss': 'Intermediate secondary\n school certificate',
    'Hauptschulabschluss': 'Basic secondary\n school certificate',
    'Kein Abschluss': 'No degree',
    'Keine Angabe': 'No information'
}

demographics_subset['education'] = demographics_subset['education'].map(education_dict)

# translate political interest
politics_dict = {
    'Sehr stark': 'Very strong',
    'stark': 'Strong',
    'mittelmäßig': 'Moderate',
    'weniger stark': 'Weak',
    'überhaupt nicht': 'Not at all'
}

demographics_subset['politics'] = demographics_subset['politics'].map(politics_dict)

# translate gender
gender_dict = {
    'Männlich': 'Male',
    'Weiblich': 'Female',
    'Divers': 'Diverse',
    'Keine Angabe': 'No information'
}

demographics_subset['gender'] = demographics_subset['gender'].map(gender_dict)

fig, axs = plt.subplots(2, 2, figsize=(16/2.54, 16/2.54))

fontsize = 6

sns.set_theme(style='whitegrid')

# Plot Age
# order age groups alphabetically
demographics_subset['age'] = demographics_subset['age'].astype(str)
demographics_subset['age'] = pd.Categorical(demographics_subset['age'], categories=sorted(demographics_subset['age'].unique()))
sns.countplot(data=demographics_subset, x='age', ax=axs[0, 0], hue='treatment', palette='Greys')
axs[0, 0].set_title('Age', size=fontsize, fontweight='bold')
axs[0, 0].set_xlabel('', size=fontsize)
axs[0, 0].set_ylabel('Count', size=fontsize)
axs[0, 0].get_legend().remove()

# Plot Gender
demographics_subset['gender'] = pd.Categorical(demographics_subset['gender'], categories=gender_dict.values())
sns.countplot(data=demographics_subset, x='gender', ax=axs[0, 1], hue='treatment', palette='Greys')
axs[0, 1].set_title('Gender', size=fontsize, fontweight='bold')
axs[0, 1].set_xlabel('', size=fontsize)
axs[0, 1].set_ylabel('Count', size=fontsize)
axs[0, 1].get_legend().remove()

# Plot Education
demographics_subset['education'] = pd.Categorical(demographics_subset['education'], categories=education_dict.values())
demographics_subset = demographics_subset.sort_values('education')
sns.countplot(data=demographics_subset, x='education', ax=axs[1, 0], hue='treatment', palette='Greys')
axs[1, 0].set_title('Education', size=fontsize, fontweight='bold')
axs[1, 0].set_xlabel('', size=fontsize)
axs[1, 0].set_ylabel('Count', size=fontsize)
axs[1, 0].get_legend().remove()

# Plot Politics
demographics_subset['politics'] = pd.Categorical(demographics_subset['politics'], categories=politics_dict.values())
sns.countplot(data=demographics_subset, x='politics', ax=axs[1, 1], hue='treatment', palette='Greys')
axs[1, 1].set_title('Interest in Politics', size=fontsize, fontweight='bold')
axs[1, 1].set_xlabel('', size=fontsize)
axs[1, 1].set_ylabel('Count', size=fontsize)
axs[1, 1].get_legend().remove()

# Rotate x-axis labels and set horizontal alignment to right
axs[0, 0].tick_params(axis='x', rotation=45, labelsize=fontsize)
axs[0, 1].tick_params(axis='x', rotation=45, labelsize=fontsize)
axs[1, 0].tick_params(axis='x', rotation=45, labelsize=fontsize)
axs[1, 1].tick_params(axis='x', rotation=45, labelsize=fontsize)

for ax in axs.flatten():
    for label in ax.get_xticklabels():
        label.set_horizontalalignment('right')


# Set size of y-axis labels
axs[0, 0].tick_params(axis='y', labelsize=fontsize)
axs[0, 1].tick_params(axis='y', labelsize=fontsize)
axs[1, 0].tick_params(axis='y', labelsize=fontsize)
axs[1, 1].tick_params(axis='y', labelsize=fontsize)

# Create a single legend at the bottom
handles, labels = axs[0, 0].get_legend_handles_labels()

# Capitalize labels
labels = [label.capitalize() for label in labels]

fig.legend(handles, labels, loc='lower center', ncol=3, title='Treatment', fontsize=fontsize, title_fontsize=fontsize)

for ax in axs.flatten():
    ax.grid(axis='y', linewidth=0.5)

# Addjust linewidth of frames of the subplots
for ax in axs.flatten():
    for spine in ax.spines.values():
        spine.set_linewidth(0.5)

# Adjust layout and add more space at the bottom
plt.subplots_adjust(left=0.075, right=0.95, top=0.975, bottom=0.15)
plt.subplots_adjust(hspace=0.35)

plt.show()

# Save plot
fig.savefig('figures/demographics.pdf')

# perform chi-squared test 

# contingency table
contingency_table = pd.crosstab(demographics_subset['treatment'], demographics_subset['age'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Age')
print(f'Chi-squared test: p-value = {p}')

contingency_table = pd.crosstab(demographics_subset['treatment'], demographics_subset['gender'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Gender')
print(f'Chi-squared test: p-value = {p}')

contingency_table = pd.crosstab(demographics_subset['treatment'], demographics_subset['education'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Education')
print(f'Chi-squared test: p-value = {p}')

contingency_table = pd.crosstab(demographics_subset['treatment'], demographics_subset['politics'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Politics')
print(f'Chi-squared test: p-value = {p}')

### Politcal Topics

In [None]:
topics_subset = data[['user_id', 'treatment', 'topic_1', 'topic_2', 'topic_3', 'topic_4']]
topics_subset = topics_subset.drop_duplicates()

# 2x2 grid of countplots
fig, axes = plt.subplots(2, 2, figsize=(16/2.54, 16/2.54))

fontsize = 6

sns.countplot(ax=axes[0, 0], data=topics_subset, x='topic_1', hue='treatment', palette='Greys')
axes[0, 0].set_title('Topic 1: Nuclear Power', size=fontsize, fontweight='bold')
axes[0, 0].set_xticklabels(['Disagree', 'Agree'], size=fontsize)
axes[0, 0].set_xlabel('', size=fontsize)
axes[0, 0].set_ylabel('Count', size=fontsize)
axes[0, 0].yaxis.set_tick_params(labelsize=fontsize)
axes[0, 0].get_legend().remove()

sns.countplot(ax=axes[0, 1], data=topics_subset, x='topic_2', hue='treatment', palette='Greys')
axes[0, 1].set_title('Topic 2: Party Ban', size=fontsize, fontweight='bold')
axes[0, 1].set_xticklabels(['Disagree', 'Agree'], size=fontsize)
axes[0, 1].set_xlabel('', size=fontsize)
axes[0, 1].set_ylabel('Count', size=fontsize)
axes[0, 1].yaxis.set_tick_params(labelsize=fontsize)
axes[0, 1].get_legend().remove()

sns.countplot(ax=axes[1, 0], data=topics_subset, x='topic_3', hue='treatment', palette='Greys')
axes[1, 0].set_title('Topic 3: Debt Brake', size=fontsize, fontweight='bold')
axes[1, 0].set_xticklabels(['Disagree', 'Agree'], size=fontsize)
axes[1, 0].set_xlabel('', size=fontsize)
axes[1, 0].set_ylabel('Count', size=fontsize)
axes[1, 0].yaxis.set_tick_params(labelsize=fontsize)
axes[1, 0].get_legend().remove()

sns.countplot(ax=axes[1, 1], data=topics_subset, x='topic_4', hue='treatment', palette='Greys')
axes[1, 1].set_title('Topic 4: Speed Limit', size=fontsize, fontweight='bold')
axes[1, 1].set_xticklabels(['Disagree', 'Agree'], size=fontsize)
axes[1, 1].set_xlabel('', size=fontsize)
axes[1, 1].set_ylabel('Count', size=fontsize)
axes[1, 1].yaxis.set_tick_params(labelsize=fontsize)
axes[1, 1].get_legend().remove()

# Create a single legend at the bottom
handles, labels = axes[0, 0].get_legend_handles_labels()

# Capitalize labels
labels = [label.capitalize() for label in labels]

for ax in axes.flatten():
    ax.grid(axis='y', linewidth=0.5)

# Addjust linewidth of frames of the subplots
for ax in axes.flatten():
    for spine in ax.spines.values():
        spine.set_linewidth(0.5)

# Adjust layout and add more space at the bottom
plt.tight_layout(rect=[0, 0, 1, 0.94])  # Leave space at the bottom
plt.subplots_adjust(bottom=0.1)  # Adjust bottom spacing

fig.legend(handles, labels, loc='lower center', ncol=3, title='Treatment', fontsize=fontsize, title_fontsize=fontsize)

plt.show()

# save plot as pdf
fig.savefig('figures/political_topics.pdf')

# chi-squared test for independence

# topic 1
contingency_table = pd.crosstab(topics_subset['topic_1'], topics_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print(f'Chi2: {chi2}, p: {p}')

# topic 2
contingency_table = pd.crosstab(topics_subset['topic_2'], topics_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print(f'Chi2: {chi2}, p: {p}')

# topic 3
contingency_table = pd.crosstab(topics_subset['topic_3'], topics_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print(f'Chi2: {chi2}, p: {p}')

# topic 4
contingency_table = pd.crosstab(topics_subset['topic_4'], topics_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print(f'Chi2: {chi2}, p: {p}')

### Political Dimensions

In [None]:
dimensions_subset = data[['user_id', 'treatment', 'dimension_1', 'dimension_2', 'dimension_3', 'dimension_4']]
dimensions_subset = dimensions_subset.drop_duplicates()

# 2x2 grid of countplots
fig, axes = plt.subplots(2, 2, figsize=(16/2.54, 16/2.54))

fontsize = 6

sns.countplot(ax=axes[0, 0], data=dimensions_subset, x='dimension_1', hue='treatment', palette='Greys')
axes[0, 0].set_title('Dimension 1: Economic Liberalism vs. Welfare State', size=fontsize, fontweight='bold')
axes[0, 0].set_xticklabels([int(x + 1) for x in axes[0, 0].get_xticks()], size=fontsize)
axes[0, 0].set_xlabel('1 = Prioritize Lower Taxes, 5 = Neutral, 9 = Prioritize Higher Welfare Spending', size=fontsize)
axes[0, 0].set_ylabel('Count', size=fontsize)
axes[0, 0].yaxis.set_tick_params(labelsize=fontsize)
axes[0, 0].get_legend().remove()

sns.countplot(ax=axes[0, 1], data=dimensions_subset, x='dimension_2', hue='treatment', palette='Greys')
axes[0, 1].set_title('Dimension 2: Social Liberalism vs. Conservatism', size=fontsize, fontweight='bold')
axes[0, 1].set_xticklabels([int(x + 1) for x in axes[0, 1].get_xticks()], size=fontsize)
axes[0, 1].set_xlabel('1 = Prioritize Individual Freedom, 5 = Neutral, 9 = Prioritize Traditional Values', size=fontsize)
axes[0, 1].set_ylabel('Count', size=fontsize)
axes[0, 1].yaxis.set_tick_params(labelsize=fontsize)
axes[0, 1].get_legend().remove()

sns.countplot(ax=axes[1, 0], data=dimensions_subset, x='dimension_3', hue='treatment', palette='Greys')
axes[1, 0].set_title('Dimension 3: Climate Protection vs. Standard of Living', size=fontsize, fontweight='bold')
axes[1, 0].set_xticklabels([int(x + 1) for x in axes[1, 0].get_xticks()], size=fontsize)
axes[1, 0].set_xlabel('1 = Prioritize Climate Protection, 5 = Neutral, 9 = Prioritize Standard of Living', size=fontsize)
axes[1, 0].set_ylabel('Count', size=fontsize)
axes[1, 0].yaxis.set_tick_params(labelsize=fontsize)
axes[1, 0].get_legend().remove()

sns.countplot(ax=axes[1, 1], data=dimensions_subset, x='dimension_4', hue='treatment', palette='Greys')
axes[1, 1].set_title('Dimension 4: Internationalism vs. Nationalism', size=fontsize, fontweight='bold')
axes[1, 1].set_xticklabels([int(x + 1) for x in axes[1, 1].get_xticks()], size=fontsize)
axes[1, 1].set_xlabel('1 = Prioritize International Cooperation, 5 = Neutral, 9 = Prioritize National Interests', size=fontsize)
axes[1, 1].set_ylabel('Count', size=fontsize)
axes[1, 1].yaxis.set_tick_params(labelsize=fontsize)
axes[1, 1].get_legend().remove()

# Create a single legend at the bottom
handles, labels = axes[0, 0].get_legend_handles_labels()

# Capitalize labels
labels = [label.capitalize() for label in labels]

for ax in axes.flatten():
    ax.grid(axis='y', linewidth=0.5)

# Addjust linewidth of frames of the subplots
for ax in axes.flatten():
    for spine in ax.spines.values():
        spine.set_linewidth(0.5)

# Adjust layout and add more space at the bottom
plt.subplots_adjust(left=0.075, right=0.95, top=0.975, bottom=0.125)
plt.subplots_adjust(hspace=0.35)

fig.legend(handles, labels, loc='lower center', ncol=3, title='Treatment', fontsize=fontsize, title_fontsize=fontsize)

plt.show()

# save plot as pdf
fig.savefig('figures/political_dimensions.pdf')

# Perform one-way ANOVA

# Dimension 1
f_stat, p_value = stats.f_oneway(dimensions_subset[dimensions_subset['treatment'] == 'targeted']['dimension_1'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'non-targeted']['dimension_1'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'false-targeted']['dimension_1'])

# Output the results
print("One-way ANOVA: Dimension 1")
print(f"F-Statistic: {f_stat}")
print(f"P-Value: {p_value}")
print()

# Dimension 2
f_stat, p_value = stats.f_oneway(dimensions_subset[dimensions_subset['treatment'] == 'targeted']['dimension_2'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'non-targeted']['dimension_2'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'false-targeted']['dimension_2'])

# Output the results
print("One-way ANOVA: Dimension 2")
print(f"F-Statistic: {f_stat}")
print(f"P-Value: {p_value}")
print()

# Dimension 3
f_stat, p_value = stats.f_oneway(dimensions_subset[dimensions_subset['treatment'] == 'targeted']['dimension_3'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'non-targeted']['dimension_3'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'false-targeted']['dimension_3'])

# Output the results
print("One-way ANOVA: Dimension 3")
print(f"F-Statistic: {f_stat}")
print(f"P-Value: {p_value}")
print()

# Dimension 4

f_stat, p_value = stats.f_oneway(dimensions_subset[dimensions_subset['treatment'] == 'targeted']['dimension_4'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'non-targeted']['dimension_4'],
                                 dimensions_subset[dimensions_subset['treatment'] == 'false-targeted']['dimension_4'])

# Output the results
print("One-way ANOVA: Dimension 4")
print(f"F-Statistic: {f_stat}")
print(f"P-Value: {p_value}")
print()

# chi-squared test for independence

# dimension 1
contingency_table = pd.crosstab(dimensions_subset['dimension_1'], dimensions_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Chi-squared test for independence: Dimension 1')
print(f'Chi2: {chi2}, p: {p}')
print()

# dimension 2
contingency_table = pd.crosstab(dimensions_subset['dimension_2'], dimensions_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Chi-squared test for independence: Dimension 2')
print(f'Chi2: {chi2}, p: {p}')
print()

# dimension 3
contingency_table = pd.crosstab(dimensions_subset['dimension_3'], dimensions_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Chi-squared test for independence: Dimension 3')
print(f'Chi2: {chi2}, p: {p}')
print()

# dimension 4
contingency_table = pd.crosstab(dimensions_subset['dimension_4'], dimensions_subset['treatment'])
chi2, p, dof, expected = chi2_contingency(contingency_table)
print('Chi-squared test for independence: Dimension 4')
print(f'Chi2: {chi2}, p: {p}')
print()


### Perceived Audience

In [None]:
fig = plt.figure(figsize=(16/2.54, 8/2.54))  # Increase the figure size to give more space

# Updated color palette for better contrast between groups
sns.countplot(data=data, x='post_treatment_1', hue='treatment', stat='proportion', palette='Greys')

# Print means of post_treatment_1 by treatment (optional for debugging purposes)
print(data.groupby(['treatment'])['post_treatment_1'].mean())

# Update legend for better readability
plt.legend(title='Treatment Group',
           loc='upper left',
           labels=['Targeted', 'False-Targeted', 'Non-Targeted'],
           fontsize=8,
           title_fontsize=8).get_frame().set_linewidth(0.5)

# Update x-tick labels with shorter text and rotate them 45 degrees for better readability
plt.xticks(
    [0, 1, 2, 3, 4, 5, 6, 7, 8], 
    [
        '1. Very similar', '2.', '3.', '4.', '5. General audience', 
        '6.', '7.', '8.', '9. Very different'
    ], 
    rotation=0, ha='center', size=8
)

plt.yticks(size=8)

# Set labels for the axes
plt.xlabel('Perceived Audience of Message', size=8)
plt.ylabel('Proportion', size=8)

# Add horizontal gridlines for better comparison
plt.grid(axis='y', linestyle='-', alpha=0.7, linewidth=0.5)

# Get the current axes
ax = plt.gca()

# Set the linewidth for all sides of the frame
for spine in ax.spines.values():
    spine.set_linewidth(1)

# Show and save the plot
plt.tight_layout()  # Ensure everything fits nicely
plt.show()

# Save the figure as a PDF
fig.savefig('figures/perceived_audience.pdf')

targeted = data[data['treatment'] == 'targeted']
non_targeted = data[data['treatment'] == 'non-targeted']
false_targeted = data[data['treatment'] == 'false-targeted']

# Perform the Chi-Square test
chi2, p, dof, expected = chi2_contingency([targeted['post_treatment_1'].value_counts(), non_targeted['post_treatment_1'].value_counts(), false_targeted['post_treatment_1'].value_counts()])

# Output the results
print(f"Chi2 Statistic: {chi2}")
print(f"P-Value: {p}")
print(f"Degrees of Freedom: {dof}")

### Results

In [None]:
plot_messaging_strategies_by_topic(data, 
                                   'treatment_var_1', 
                                   ['False-Targeted', 'Non-Targeted', 'Targeted'],
                                   topics_dict,
                                   calculate_confidence_interval, 
                                   'Estimate of Persuasive Impact (95% CI)', 
                                   'figures/messaging_strategies_by_topic.pdf', 
                                   x_limits=(1, 4.25), 
                                   colors=colors,
                                   markers=markers)

targeted = data[data['treatment'] == 'targeted']['treatment_var_1']
non_targeted = data[data['treatment'] == 'non-targeted']['treatment_var_1']
false_targeted = data[data['treatment'] == 'false-targeted']['treatment_var_1']

print(f"Targeted vs. Non-Targeted: {targeted.mean():.2f} vs. {non_targeted.mean():.2f}, p-value = {st.ttest_ind(targeted, non_targeted, equal_var=False).pvalue / 2:.4f}")
print(f"Targeted vs. False-Targeted: {targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p-value = {st.ttest_ind(targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
print(f"Non-Targeted vs. False-Targeted: {non_targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p-value = {st.ttest_ind(non_targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
print("")

for topic in data['topic'].unique():
    targeted = data[(data['treatment'] == 'targeted') & (data['topic'] == topic)]['treatment_var_1']
    non_targeted = data[(data['treatment'] == 'non-targeted') & (data['topic'] == topic)]['treatment_var_1']
    false_targeted = data[(data['treatment'] == 'false-targeted') & (data['topic'] == topic)]['treatment_var_1']

    print(f"Targeted vs. Non-Targeted ({topic}): p-value = {st.ttest_ind(targeted, non_targeted, equal_var=False).pvalue / 2:.4f}")
    print(f"Targeted vs. False-Targeted ({topic}): p-value = {st.ttest_ind(targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
    print(f"Non-Targeted vs. False-Targeted ({topic}): p-value = {st.ttest_ind(non_targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
    print("")

In [None]:
plot_messaging_strategies_by_topic(data,
                                   'treatment_var_2',
                                   ['False-Targeted', 'Non-Targeted', 'Targeted'],
                                   topics_dict, calculate_confidence_interval,
                                   'Perceived Quality of Messages (95% CI)',
                                   'figures/quality_of_messaging_strategies_by_topic.pdf',
                                   x_limits=(1, 6.5))

# perform t-test for each topic
topics_dict = {
    'Atomkraft': 'nuclear power',
    'Parteiverbot': 'party ban',
    'Schuldenbremse': 'debt brake',
    'Tempolimit': 'general speed limit'
}

# overall t-test
targeted = data[data['treatment'] == 'targeted']['treatment_var_2']
non_targeted = data[data['treatment'] == 'non-targeted']['treatment_var_2']
false_targeted = data[data['treatment'] == 'false-targeted']['treatment_var_2']

print(f"Targeted vs. Non-Targeted (overall: {targeted.mean():.2f} vs. {non_targeted.mean():.2f}, p = {st.ttest_ind(targeted, non_targeted, equal_var=False).pvalue / 2:.4f})")
print(f"Targeted vs. False-Targeted (overall: {targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(targeted, false_targeted, equal_var=False).pvalue / 2:.4f})")
print(f"Non-Targeted vs. False-Targeted (overall: {non_targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(non_targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
print("")

for topic in data['topic'].unique():
    targeted = data[(data['treatment'] == 'targeted') & (data['topic'] == topic)]['treatment_var_2']
    non_targeted = data[(data['treatment'] == 'non-targeted') & (data['topic'] == topic)]['treatment_var_2']
    false_targeted = data[(data['treatment'] == 'false-targeted') & (data['topic'] == topic)]['treatment_var_2']

    print(f"Targeted vs. Non-Targeted ({topics_dict[topic]}: {targeted.mean():.2f} vs. {non_targeted.mean():.2f}, p = {st.ttest_ind(targeted, non_targeted, equal_var=False).pvalue / 2:.4f})")
    print(f"Targeted vs. False-Targeted ({topics_dict[topic]}: {targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(targeted, false_targeted, equal_var=False).pvalue / 2:.4f})")
    print(f"Non-Targeted vs. False-Targeted ({topics_dict[topic]}: {non_targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(non_targeted, false_targeted, equal_var=False).pvalue / 2:.4f})")
    print("")

In [None]:
# get number of words in argument
data['argument_length'] = data['argument'].str.split().apply(len)

plot_messaging_strategies_by_topic(data,
                                   'argument_length',
                                   ['False-Targeted', 'Non-Targeted', 'Targeted'],
                                   topics_dict, calculate_confidence_interval,
                                   'Argument Length in Words (95% CI)', 'figures/argument_length_by_topic.pdf',
                                   x_limits=(100, 150))

# perform t-test for argument length

targeted = data[data['treatment'] == 'targeted']['argument_length']
non_targeted = data[data['treatment'] == 'non-targeted']['argument_length']
false_targeted = data[data['treatment'] == 'false-targeted']['argument_length']

print(f"Targeted vs. Non-Targeted (overall: {targeted.mean():.2f} vs. {non_targeted.mean():.2f}, p = {st.ttest_ind(targeted, non_targeted, equal_var=False).pvalue / 2:.4f})")
print(f"Targeted vs. False-Targeted (overall: {targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
print(f"Non-Targeted vs. False-Targeted (overall: {non_targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(non_targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
print("")

for topic in data['topic'].unique():
    targeted = data[(data['treatment'] == 'targeted') & (data['topic'] == topic)]['argument_length']
    non_targeted = data[(data['treatment'] == 'non-targeted') & (data['topic'] == topic)]['argument_length']
    false_targeted = data[(data['treatment'] == 'false-targeted') & (data['topic'] == topic)]['argument_length']

    print(f"Targeted vs. Non-Targeted ({topics_dict[topic]}: {targeted.mean():.2f} vs. {non_targeted.mean():.2f}, p = {st.ttest_ind(targeted, non_targeted, equal_var=False).pvalue / 2:.4f})")
    print(f"Targeted vs. False-Targeted ({topics_dict[topic]}: {targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(targeted, false_targeted, equal_var=False).pvalue / 2:.4f})")
    print(f"Non-Targeted vs. False-Targeted ({topics_dict[topic]}: {non_targeted.mean():.2f} vs. {false_targeted.mean():.2f}, p = {st.ttest_ind(non_targeted, false_targeted, equal_var=False).pvalue / 2:.4f}")
    print("")