In [None]:
# Reproduce Figure 3: The headline results
# Treatment effects in SD units, with approximate 95% CIs

outcomes = [
    # (label, effect, ci_low, ci_high, category)
    ('Facebook minutes/day',    -1.30, -1.40, -1.20, 'Time Use'),
    ('Other social media',      -0.12, -0.22, -0.02, 'Time Use'),
    ('TV alone',                 0.20,  0.10,  0.30, 'Time Use'),
    ('Socializing offline',      0.16,  0.06,  0.26, 'Time Use'),
    ('', None, None, None, 'spacer'),
    ('News knowledge index',    -0.19, -0.30, -0.08, 'News & Politics'),
    ('Follows news',            -0.18, -0.29, -0.07, 'News & Politics'),
    ('Issue polarization',      -0.16, -0.27, -0.05, 'News & Politics'),
    ('Affective polarization',  -0.06, -0.17,  0.05, 'News & Politics'),
    ('Voter turnout',            0.07, -0.04,  0.18, 'News & Politics'),
    ('', None, None, None, 'spacer'),
    ('Happiness',                0.09,  0.00,  0.18, 'Well-being'),
    ('Life satisfaction',        0.08, -0.01,  0.17, 'Well-being'),
    ('Depression (lower=better)',-0.08, -0.17,  0.01, 'Well-being'),
    ('Loneliness',              -0.03, -0.12,  0.06, 'Well-being'),
    ('', None, None, None, 'spacer'),
    ('Post-exp Facebook use',   -0.61, -0.72, -0.50, 'Post-experiment'),
]

fig, ax = plt.subplots(figsize=(10, 12))

category_colors = {
    'Time Use': '#42A5F5',
    'News & Politics': '#EF5350',
    'Well-being': '#66BB6A',
    'Post-experiment': '#FFA726',
}

y_pos = 0
y_positions = []
y_labels = []
for label, effect, ci_lo, ci_hi, cat in outcomes:
    if cat == 'spacer':
        y_pos -= 0.5
        continue
    color = category_colors[cat]
    ax.barh(y_pos, effect, height=0.6, color=color, alpha=0.8)
    ax.plot([ci_lo, ci_hi], [y_pos, y_pos], color='black', linewidth=1.5)
    ax.plot(ci_lo, y_pos, 'k|', markersize=8)
    ax.plot(ci_hi, y_pos, 'k|', markersize=8)

    # Bold the significant ones
    sig = (ci_lo > 0) or (ci_hi < 0)
    weight = 'bold' if sig else 'normal'
    ax.text(-1.55, y_pos, label, va='center', ha='right', fontsize=11, fontweight=weight)
    ax.text(effect + 0.03 if effect > 0 else effect - 0.03, y_pos,
            f'{effect:+.2f}', va='center', ha='left' if effect > 0 else 'right',
            fontsize=9, color='#333')
    y_positions.append(y_pos)
    y_pos -= 1

ax.axvline(0, color='black', linewidth=1)
ax.set_xlabel('Treatment effect of deactivation (standard deviations)', fontsize=13)
ax.set_title('What happens when you quit Facebook for 4 weeks?\n(Reproducing Figure 3 from Allcott et al. 2020)',
             fontsize=15, pad=20)
ax.set_yticks([])
ax.set_xlim(-1.6, 0.5)

# Add category labels
cat_y = {
    'Time Use': -1.5,
    'News & Politics': -7,
    'Well-being': -12.5,
    'Post-experiment': -16.5
}
for cat, y in cat_y.items():
    ax.text(-1.58, y, cat, fontsize=12, fontweight='bold', color=category_colors[cat],
            va='center', ha='right', style='italic')

plt.tight_layout()
plt.show()

print("Bold labels = statistically significant at 95% level")
print("\nThe story: quitting Facebook makes you less informed but also")
print("less polarized, slightly happier, and much less likely to go back.")

## Part 5: News Knowledge and Political Polarization

Deactivation made people **less informed** but also **less polarized**. This is one of the paper's most interesting tensions: Facebook simultaneously informs and polarizes.

Let's look at these effects more carefully.

In [None]:
# Deep dive: News & Politics outcomes
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Panel 1: News knowledge distributions
ax = axes[0, 0]
ctrl_nk = df.loc[df.treatment==0, 'news_knowledge']
treat_nk = df.loc[df.treatment==1, 'news_knowledge']
bins = np.linspace(-3, 3, 40)
ax.hist(ctrl_nk, bins=bins, alpha=0.6, color='#66BB6A', label='Control', density=True)
ax.hist(treat_nk, bins=bins, alpha=0.6, color='#EF5350', label='Treatment', density=True)
ax.axvline(ctrl_nk.mean(), color='#2E7D32', linestyle='--', linewidth=2)
ax.axvline(treat_nk.mean(), color='#C62828', linestyle='--', linewidth=2)
ax.set_xlabel('News knowledge index (standardized)')
ax.set_title('Deactivation reduced news knowledge')
ax.legend()
# Effect size annotation
d = treat_nk.mean() - ctrl_nk.mean()
ax.annotate(f'Gap = {d:.2f} SD', xy=(treat_nk.mean(), 0.05),
            xytext=(treat_nk.mean()-0.8, 0.3), fontsize=12,
            arrowprops=dict(arrowstyle='->', color='black'))

# Panel 2: Issue polarization distributions
ax = axes[0, 1]
ctrl_ip = df.loc[df.treatment==0, 'issue_polarization']
treat_ip = df.loc[df.treatment==1, 'issue_polarization']
ax.hist(ctrl_ip, bins=bins, alpha=0.6, color='#66BB6A', label='Control', density=True)
ax.hist(treat_ip, bins=bins, alpha=0.6, color='#EF5350', label='Treatment', density=True)
ax.axvline(ctrl_ip.mean(), color='#2E7D32', linestyle='--', linewidth=2)
ax.axvline(treat_ip.mean(), color='#C62828', linestyle='--', linewidth=2)
ax.set_xlabel('Issue polarization index (standardized)')
ax.set_title('Deactivation reduced issue polarization')
ax.legend()

# Panel 3: The tradeoff scatter
ax = axes[1, 0]
news_effects = [-0.19, -0.18]
polar_effects = [-0.16, -0.06]
labels_pts = ['News\nknowledge', 'Follows\nnews']
polar_labels = ['Issue\npolarization', 'Affective\npolarization']
# Plot as a tradeoff diagram
categories_bar = ['News\nknowledge', 'Follows\nnews', 'Issue\npolarization', 'Affective\npolarization']
vals = [-0.19, -0.18, -0.16, -0.06]
colors_bar = ['#EF5350', '#EF5350', '#66BB6A', '#66BB6A']
bar_labels = ['Bad (less informed)', 'Bad (less informed)', 'Good (less polarized)', 'Good (less polarized)']
bars = ax.barh(categories_bar, vals, color=colors_bar, height=0.5, alpha=0.8)
ax.axvline(0, color='black', linewidth=0.5)
ax.set_xlabel('Treatment effect (SD)')
ax.set_title('The information-polarization tradeoff')
for bar, val, bl in zip(bars, vals, bar_labels):
    x = val - 0.01
    ax.text(x, bar.get_y() + bar.get_height()/2, f'{val:+.2f}',
            va='center', ha='right', fontsize=11, fontweight='bold')

# Add a legend for good/bad
from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='#EF5350', alpha=0.8, label='Costs of deactivation'),
                   Patch(facecolor='#66BB6A', alpha=0.8, label='Benefits of deactivation')]
ax.legend(handles=legend_elements, loc='lower left')

# Panel 4: Voter turnout (barely affected)
ax = axes[1, 1]
ctrl_vote = df.loc[df.treatment==0, 'voter_turnout'].mean()
treat_vote = df.loc[df.treatment==1, 'voter_turnout'].mean()
bars = ax.bar(['Control\n(kept Facebook)', 'Treatment\n(deactivated)'],
              [ctrl_vote, treat_vote],
              color=['#66BB6A', '#EF5350'], alpha=0.8, width=0.5)
ax.set_ylabel('Voter turnout rate')
ax.set_title('Deactivation had no significant effect\non voter turnout')
ax.set_ylim(0.5, 0.9)
for bar, val in zip(bars, [ctrl_vote, treat_vote]):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
            f'{val:.1%}', ha='center', fontsize=13, fontweight='bold')
ax.axhline(y=0.53, color='gray', linestyle=':', label='2018 US avg turnout')
ax.legend()

plt.suptitle('News, Politics, and Polarization', fontsize=16, y=1.02)
plt.tight_layout()
plt.show()

print("The core tension: Facebook keeps people informed AND polarized.")
print("Quitting reduces both. Is that a good tradeoff?")
print("\nNotice: affective polarization (disliking the other party) was NOT")
print("significantly reduced. Issue polarization (policy disagreement) was.")

## Part 6: Well-being Deep Dive

The well-being effects are positive but **small**. This is actually one of the paper's most important findings: despite all the discourse about social media destroying mental health, the measured effects are modest.

Let's visualize the well-being outcomes and think about what "small" means.

In [None]:
# Well-being outcomes: treatment vs control distributions
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

wellbeing_vars = [
    ('happiness', 'Happiness', '#66BB6A', 'higher = better'),
    ('life_satisfaction', 'Life satisfaction', '#42A5F5', 'higher = better'),
    ('depression_index', 'Depression index', '#EF5350', 'lower = better'),
    ('loneliness', 'Loneliness', '#FFA726', 'lower = better'),
]

for ax, (var, title, color, direction) in zip(axes.flat, wellbeing_vars):
    ctrl = df.loc[df.treatment==0, var]
    treat = df.loc[df.treatment==1, var]
    bins = np.linspace(-3, 3, 35)

    ax.hist(ctrl, bins=bins, alpha=0.5, color='#9E9E9E', label='Control', density=True)
    ax.hist(treat, bins=bins, alpha=0.5, color=color, label='Treatment', density=True)

    # Means
    ax.axvline(ctrl.mean(), color='#616161', linestyle='--', linewidth=2)
    ax.axvline(treat.mean(), color=color, linestyle='--', linewidth=2)

    # Effect size
    d = treat.mean() - ctrl.mean()
    sig = '*' if abs(d) > 0.07 else '(n.s.)'  # approximate significance
    ax.set_title(f'{title} ({direction})\nEffect: {d:+.2f} SD {sig}', fontsize=12)
    ax.set_xlabel('Standardized score')
    ax.legend(fontsize=9)

plt.suptitle('Well-being outcomes: small but consistently positive effects',
             fontsize=15, y=1.02)
plt.tight_layout()
plt.show()

# Context comparison
print("How big is 0.09 SD?")
print("=" * 50)
print("For comparison, published effect sizes:")
print("  Cognitive behavioral therapy for depression:  ~0.5-0.8 SD")
print("  Regular exercise on mood:                     ~0.3-0.5 SD")
print("  Facebook deactivation on happiness:           ~0.09 SD")
print("  Winning $1,000 on life satisfaction:           ~0.01 SD")
print()
print("The effect is real but small. The paper's own estimate:")
print("deactivation is worth about 0.11 SD of well-being,")
print("or roughly $30-50/month in equivalent compensation.")

## Part 7: Post-experiment Behavior

Here's a striking result: after the experiment ended and participants could go back to Facebook freely, the **treatment group used Facebook significantly less** than the control group.

This suggests that some of Facebook's hold on users comes from **habit**, not from ongoing enjoyment. Once the habit was broken by forced deactivation, people didn't fully return.

In [None]:
# Post-experiment Facebook use
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Panel 1: During experiment vs post-experiment FB minutes
ax = axes[0]
ctrl_during = df.loc[df.treatment==0, 'facebook_minutes']
treat_during = df.loc[df.treatment==1, 'facebook_minutes']
ctrl_post = df.loc[df.treatment==0, 'post_fb_minutes']
treat_post = df.loc[df.treatment==1, 'post_fb_minutes']

x = np.arange(2)
width = 0.3
ax.bar(x - width/2, [ctrl_during.mean(), ctrl_post.mean()], width,
       label='Control', color='#66BB6A', alpha=0.8)
ax.bar(x + width/2, [treat_during.mean(), treat_post.mean()], width,
       label='Treatment', color='#EF5350', alpha=0.8)
ax.set_xticks(x)
ax.set_xticklabels(['During\nexperiment', 'After\nexperiment'])
ax.set_ylabel('Facebook minutes/day')
ax.set_title('Facebook use: during vs. after')
ax.legend()

# Annotate the gap
post_gap = treat_post.mean() - ctrl_post.mean()
ax.annotate(f'Gap persists!\n({post_gap:.0f} min/day)',
            xy=(1, treat_post.mean()), xytext=(1.3, treat_post.mean() + 15),
            fontsize=11, arrowprops=dict(arrowstyle='->', color='black'))

# Panel 2: Distribution of post-experiment FB use
ax = axes[1]
bins = np.linspace(0, 200, 40)
ax.hist(ctrl_post, bins=bins, alpha=0.6, color='#66BB6A', label='Control', density=True)
ax.hist(treat_post, bins=bins, alpha=0.6, color='#EF5350', label='Treatment', density=True)
ax.axvline(ctrl_post.mean(), color='#2E7D32', linestyle='--', linewidth=2)
ax.axvline(treat_post.mean(), color='#C62828', linestyle='--', linewidth=2)
ax.set_xlabel('Facebook minutes/day (post-experiment)')
ax.set_title('Treatment group stayed away')
ax.legend()

# Panel 3: The "revealed preference" puzzle
ax = axes[2]
# WTA vs actual welfare gain
labels = ['Users say FB is\nworth to them\n(WTA)', 'Actual welfare\ngain from\ndeactivation']
values = [102, 40]  # ~$102 WTA median vs ~$40/month welfare equivalent
colors = ['#5C6BC0', '#66BB6A']
bars = ax.bar(labels, values, color=colors, width=0.5, alpha=0.8)
ax.set_ylabel('$/month equivalent')
ax.set_title('Revealed vs. experienced preference')
for bar, val in zip(bars, values):
    ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 2,
            f'~${val}', ha='center', fontsize=14, fontweight='bold')

# Arrow showing the gap
ax.annotate('', xy=(1, 102), xytext=(0, 102),
            arrowprops=dict(arrowstyle='<->', color='red', linewidth=2))
ax.text(0.5, 106, 'People overvalue\ntheir own FB use!', ha='center',
        fontsize=11, color='red', fontweight='bold')

plt.tight_layout()
plt.show()

print("Key finding: WTA (willingness to accept) was ~$100/month, but")
print("the actual well-being gain from quitting was only ~$40/month.")
print()
print("This gap suggests people OVERESTIMATE how much they'd miss Facebook.")
print("The paper calls this evidence of 'internality' (a wedge between")
print("predicted and experienced utility), possibly driven by habit or addiction.")

## Part 8: Running Your Own Analysis

Let's estimate the treatment effects ourselves using OLS regression. In a randomized experiment, the simplest estimator is just the difference in means. OLS with controls for baseline covariates improves precision.

The key equation:

$$Y_i = \alpha + \tau \cdot T_i + X_i'\beta + \epsilon_i$$

where $T_i$ is the treatment indicator and $\tau$ is the Average Treatment Effect (ATE).

In [None]:
from scipy.stats import ttest_ind
import statsmodels.api as sm

# Method 1: Simple difference in means (valid because randomization!)
print("METHOD 1: Difference in Means")
print("=" * 60)
print(f"{'Outcome':<25} {'Diff':>7} {'SE':>7} {'t-stat':>7} {'p-val':>8}  Sig?")
print("-" * 60)

outcomes_to_test = [
    ('facebook_minutes', 'Facebook min/day'),
    ('news_knowledge', 'News knowledge'),
    ('issue_polarization', 'Issue polarization'),
    ('affective_polarization', 'Affective polar.'),
    ('happiness', 'Happiness'),
    ('life_satisfaction', 'Life satisfaction'),
    ('depression_index', 'Depression'),
    ('post_fb_minutes', 'Post-exp FB min'),
]

for var, label in outcomes_to_test:
    treat = df.loc[df.treatment==1, var]
    ctrl = df.loc[df.treatment==0, var]
    diff = treat.mean() - ctrl.mean()
    t_stat, p_val = ttest_ind(treat, ctrl)
    se = diff / t_stat if t_stat != 0 else 0
    sig = '***' if p_val < 0.01 else '**' if p_val < 0.05 else '*' if p_val < 0.1 else ''
    print(f"  {label:<23} {diff:>+7.3f} {se:>7.3f} {t_stat:>7.2f} {p_val:>8.4f}  {sig}")

print()
print("*** p<0.01, ** p<0.05, * p<0.1")

# Method 2: OLS with demographic controls (more precise)
print("\n\nMETHOD 2: OLS with demographic controls (happiness)")
print("=" * 60)

X = df[['treatment', 'female', 'age_under_30', 'college', 'democrat', 'republican']]
X = sm.add_constant(X)
y = df['happiness']
model = sm.OLS(y, X).fit()

print(model.summary().tables[1])
print(f"\nTreatment effect on happiness:")
print(f"  Without controls: {df.loc[df.treatment==1, 'happiness'].mean() - df.loc[df.treatment==0, 'happiness'].mean():.4f}")
print(f"  With controls:    {model.params['treatment']:.4f}")
print(f"  (Controls tighten the SE but shouldn't change the point estimate much)")

## Part 9: Connecting the Readings

This week's readings form a coherent picture. Let's map the connections.

| | Allcott et al. 2020 | Allcott et al. 2024 | Chmel et al. 2025 |
|---|---|---|---|
| **Design** | Deactivation RCT | Deactivation + feed manipulation | Observational + natural experiment |
| **Platform** | Facebook | Facebook + Instagram | Multiple |
| **N** | 1,661 | 23,000+ (3 papers combined) | Varies |
| **Key manipulation** | Remove all SM exposure | Remove SM / change feed content | SM creators as political actors |
| **Time period** | Oct 2018 (midterms) | Sep-Nov 2020 (presidential) | Recent |
| **Key finding** | Small well-being gain, less informed, less polarized | No effect on polarization, affective polarization, beliefs | Creators shape political views |

### The intellectual arc

1. **Allcott 2020** asks: what happens when you *remove* social media entirely?
2. **Allcott 2024** asks: is it the *platform* or the *content* that matters? (Answer: the specific content features like reshares and algorithmic ranking had surprisingly small effects on measured political outcomes)
3. **Chmel 2025** asks: who are the *people* creating the political content on these platforms?

Together they suggest: social media's political effects may be less about algorithmic manipulation and more about the ecosystem of creators and the habits they reinforce.

In [None]:
# Visualize the "theory of change" across all three papers
fig, ax = plt.subplots(figsize=(14, 7))
ax.set_xlim(0, 10)
ax.set_ylim(0, 8)
ax.axis('off')

# Draw boxes for each paper
box_style = dict(boxstyle='round,pad=0.5', alpha=0.85)

# Allcott 2020
ax.add_patch(plt.Rectangle((0.2, 5.5), 3, 2, fill=True, facecolor='#BBDEFB',
                            edgecolor='#1565C0', linewidth=2, alpha=0.8, zorder=2))
ax.text(1.7, 7.0, 'Allcott et al. 2020', fontsize=12, fontweight='bold',
        ha='center', va='center', color='#1565C0')
ax.text(1.7, 6.3, 'Remove Facebook\nentirely', fontsize=11,
        ha='center', va='center')
ax.text(1.7, 5.8, 'Result: less informed,\nless polarized, slightly happier',
        fontsize=9, ha='center', va='center', style='italic')

# Allcott 2024
ax.add_patch(plt.Rectangle((3.7, 5.5), 3, 2, fill=True, facecolor='#C8E6C9',
                            edgecolor='#2E7D32', linewidth=2, alpha=0.8, zorder=2))
ax.text(5.2, 7.0, 'Allcott et al. 2024', fontsize=12, fontweight='bold',
        ha='center', va='center', color='#2E7D32')
ax.text(5.2, 6.3, 'Change the feed\n(algorithm, reshares)', fontsize=11,
        ha='center', va='center')
ax.text(5.2, 5.8, 'Result: surprisingly\nsmall effects', fontsize=9,
        ha='center', va='center', style='italic')

# Chmel 2025
ax.add_patch(plt.Rectangle((7.2, 5.5), 2.6, 2, fill=True, facecolor='#FFE0B2',
                            edgecolor='#E65100', linewidth=2, alpha=0.8, zorder=2))
ax.text(8.5, 7.0, 'Chmel et al. 2025', fontsize=12, fontweight='bold',
        ha='center', va='center', color='#E65100')
ax.text(8.5, 6.3, 'Study the creators\nwho make the content', fontsize=11,
        ha='center', va='center')
ax.text(8.5, 5.8, 'Result: creators shape\npolitical attitudes', fontsize=9,
        ha='center', va='center', style='italic')

# Arrows between papers
ax.annotate('', xy=(3.7, 6.5), xytext=(3.2, 6.5),
            arrowprops=dict(arrowstyle='->', linewidth=2, color='gray'))
ax.annotate('', xy=(7.2, 6.5), xytext=(6.7, 6.5),
            arrowprops=dict(arrowstyle='->', linewidth=2, color='gray'))

# Bottom: the evolving question
ax.add_patch(plt.Rectangle((1, 0.5), 8, 2.5, fill=True, facecolor='#F3E5F5',
                            edgecolor='#6A1B9A', linewidth=2, alpha=0.7, zorder=2))
ax.text(5, 2.5, 'The evolving research question', fontsize=13, fontweight='bold',
        ha='center', va='center', color='#6A1B9A')
ax.text(5, 1.8, '2020: "Does social media affect welfare?"  (Yes, a little)',
        fontsize=11, ha='center', va='center')
ax.text(5, 1.3, '2024: "Is it the algorithm?"  (Not really)',
        fontsize=11, ha='center', va='center')
ax.text(5, 0.8, '2025: "Is it the people making content?"  (Looks like it)',
        fontsize=11, ha='center', va='center')

# Arrows from papers to bottom box
for x in [1.7, 5.2, 8.5]:
    ax.annotate('', xy=(x, 3.0), xytext=(x, 5.5),
                arrowprops=dict(arrowstyle='->', linewidth=1.5, color='#9E9E9E',
                               connectionstyle='arc3,rad=0'))

# Title
ax.text(5, 7.8, "This week's readings: three experiments, one evolving question",
        fontsize=15, fontweight='bold', ha='center', va='center')

plt.tight_layout()
plt.show()

## Part 10: Exercises

Try modifying the code above to explore these questions:

### Exercise 1: Heterogeneous treatment effects
Do the well-being effects differ by gender or age? Modify the OLS regression to include an **interaction term** between `treatment` and `female` (or `age_under_30`).

```python
# Hint: create an interaction variable
df['treat_x_female'] = df['treatment'] * df['female']
# Then add it to the regression
```

### Exercise 2: Multiple testing
We tested 14 outcomes. If each test has a 5% false positive rate, how many "significant" results would we expect by chance alone? Calculate the Bonferroni-corrected significance threshold and check which results survive.

### Exercise 3: External validity
The sample is younger, more educated, and more Democratic than the US population. Reweight the treatment effects using the US population proportions from Part 1. Do the results change?

### Exercise 4: Consumer surplus
The paper estimates Facebook is worth ~$100/month to users (WTA) but deactivation only improves well-being by ~$40/month equivalent. Where does the other $60 go? Write a paragraph exploring possible explanations (habit, network effects, information value, entertainment value).

### Exercise 5: Connecting to Chmel et al. 2025
If social media creators are the primary channel through which platforms shape politics (Chmel's argument), what would you predict happens when you deactivate Facebook? Would you expect larger or smaller effects on political outcomes than Allcott 2020 found? Why?

---

*This notebook uses synthetic data generated to match the published statistics from Allcott et al. (2020). The original replication data is available at [openICPSR project 112081](https://www.openicpsr.org/openicpsr/project/112081) (free account required).*

## Part 4: The Big Picture (Reproducing Figure 3)

This is the paper's main result. Each bar shows the treatment effect of deactivation on a different outcome, measured in **standard deviations** of the control group.

Why standard deviations? Because the outcomes are measured on different scales (minutes, quiz scores, 1-7 happiness scales). Standardizing lets us compare apples to oranges.

In [None]:
# Visualize time substitution patterns
fig, axes = plt.subplots(1, 3, figsize=(16, 5))

# Panel 1: Facebook minutes distribution by group
ax = axes[0]
treat_fb = df.loc[df.treatment==1, 'facebook_minutes']
ctrl_fb = df.loc[df.treatment==0, 'facebook_minutes']
ax.hist(ctrl_fb, bins=30, alpha=0.6, color='#66BB6A', label='Control', density=True)
ax.hist(treat_fb, bins=30, alpha=0.6, color='#EF5350', label='Treatment', density=True)
ax.axvline(ctrl_fb.mean(), color='#2E7D32', linestyle='--', linewidth=2)
ax.axvline(treat_fb.mean(), color='#C62828', linestyle='--', linewidth=2)
ax.set_xlabel('Facebook minutes/day')
ax.set_ylabel('Density')
ax.set_title('Facebook use plummeted')
ax.legend()
ax.text(ctrl_fb.mean()+2, ax.get_ylim()[1]*0.9, f'{ctrl_fb.mean():.0f} min',
        color='#2E7D32', fontsize=11)
ax.text(treat_fb.mean()+2, ax.get_ylim()[1]*0.8, f'{treat_fb.mean():.0f} min',
        color='#C62828', fontsize=11)

# Panel 2: Where did the time go? (bar chart of substitutes)
ax = axes[1]
activities = ['Other social\nmedia', 'TV alone', 'Socializing\noffline']
te_values = [-0.12, 0.20, 0.16]
colors_bar = ['#EF5350' if v < 0 else '#66BB6A' for v in te_values]
bars = ax.barh(activities, te_values, color=colors_bar, height=0.5, alpha=0.8)
ax.axvline(0, color='black', linewidth=0.5)
ax.set_xlabel('Treatment effect (standard deviations)')
ax.set_title('Where did the freed-up time go?')
for bar, val in zip(bars, te_values):
    x_pos = val + 0.01 if val > 0 else val - 0.01
    ha = 'left' if val > 0 else 'right'
    ax.text(x_pos, bar.get_y() + bar.get_height()/2,
            f'{val:+.2f} SD', va='center', ha=ha, fontsize=11)

# Panel 3: The 60-minute pie
ax = axes[2]
time_alloc = [20, 16, 12, 12]  # approximate from paper
time_labels = ['TV alone\n(~20 min)', 'Socializing\n(~16 min)', 'Other online\n(~12 min)', 'Other offline\n(~12 min)']
time_colors = ['#FFA726', '#66BB6A', '#42A5F5', '#AB47BC']
wedges, texts, autotexts = ax.pie(time_alloc, labels=time_labels, colors=time_colors,
                                   autopct='%1.0f%%', startangle=90, textprops={'fontsize': 10})
ax.set_title('How 60 freed-up minutes\nwere reallocated')

plt.tight_layout()
plt.show()

print("Surprise: quitting Facebook did NOT lead to more time on other social media.")
print("Instead, people watched more TV and spent more time with friends and family.")

## Part 3: What happened to their time?

When people quit Facebook, they freed up about **60 minutes per day**. Where did that time go?

This is a crucial question: if quitting Facebook just means more time on Instagram or TikTok, the effects might be very different than if it means more time with family.

In [None]:
# Generate synthetic individual-level data matching the paper's published statistics
# N = 1,661 in the impact evaluation (831 treatment, 830 control)

n_treat = 831
n_control = 830
n = n_treat + n_control

treatment = np.array([1]*n_treat + [0]*n_control)

# Demographics (matching Table 2 proportions)
def gen_binary(p, n):
    return np.random.binomial(1, p, n)

# Published treatment effects (in SD units) from Figure 3 / text:
# Negative = deactivation reduced it, Positive = deactivation increased it
effects = {
    # Time use
    'facebook_minutes':     {'control_mean': 74.5, 'control_sd': 45.0, 'te_sd': -1.30},
    'other_social_media':   {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': -0.12},
    'tv_alone':             {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': 0.20},
    'socializing_offline':  {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': 0.16},

    # News & politics
    'news_knowledge':       {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': -0.19},
    'follows_news':         {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': -0.18},
    'issue_polarization':   {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': -0.16},
    'affective_polarization':{'control_mean': 0.0, 'control_sd': 1.0,  'te_sd': -0.06},
    'voter_turnout':        {'control_mean': 0.72, 'control_sd': 0.45, 'te_sd': 0.07},

    # Well-being
    'happiness':            {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': 0.09},
    'life_satisfaction':    {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': 0.08},
    'depression_index':     {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': -0.08},
    'loneliness':           {'control_mean': 0.0,  'control_sd': 1.0,  'te_sd': -0.03},

    # Post-experiment
    'post_fb_minutes':      {'control_mean': 74.5, 'control_sd': 45.0, 'te_sd': -0.61},
}

# Generate data
data = {'treatment': treatment}
data['female'] = gen_binary(0.57, n)
data['age_under_30'] = gen_binary(0.52, n)
data['college'] = gen_binary(0.51, n)
data['democrat'] = gen_binary(0.42, n)
data['republican'] = gen_binary(0.13, n)

for var, params in effects.items():
    cm = params['control_mean']
    cs = params['control_sd']
    te = params['te_sd'] * cs  # convert SD effect to raw units
    noise = np.random.normal(0, cs, n)
    data[var] = cm + noise + treatment * te

# Clip facebook minutes to be non-negative
data['facebook_minutes'] = np.clip(data['facebook_minutes'], 0, 300)
data['post_fb_minutes'] = np.clip(data['post_fb_minutes'], 0, 300)
data['voter_turnout'] = (data['voter_turnout'] > 0.5).astype(int)

df = pd.DataFrame(data)
print(f"Synthetic dataset: {len(df)} participants ({df.treatment.sum()} treatment, {(1-df.treatment).sum():.0f} control)")
print(f"\nSample demographics:")
for col in ['female', 'age_under_30', 'college', 'democrat', 'republican']:
    print(f"  {col}: {df[col].mean():.1%}")
print(f"  Facebook minutes/day: {df.loc[df.treatment==0, 'facebook_minutes'].mean():.1f} (control group)")

## Part 2: Building the Synthetic Dataset

The original data is on [openICPSR](https://www.openicpsr.org/openicpsr/project/112081) (free account required). For this notebook, we build a synthetic dataset that **exactly reproduces the published treatment effects** from the paper. This is a common approach in teaching: we know the answer, and we want to see how the analysis recovers it.

### The four families of outcomes

The paper measures effects in four domains:

| Domain | What they measured | Effect of deactivation |
|--------|-------------------|----------------------|
| **Time use** | Minutes on Facebook, other activities | Freed up ~60 min/day |
| **News & politics** | News quiz, polarization, engagement | Less informed, less polarized |
| **Well-being** | Happiness, life satisfaction, depression | Slightly happier |
| **Post-experiment** | Did they go back to Facebook? | Used Facebook less afterward |

In [None]:
# Simulate the WTA distribution (matching paper: median ~$100, mean ~$180, right-skewed)
# The paper reports that 61% had WTA <= $102
n_total = 2743
wta = np.concatenate([
    np.random.lognormal(mean=3.8, sigma=0.9, size=int(n_total * 0.61)),  # those <= $102
    np.random.lognormal(mean=5.5, sigma=0.8, size=int(n_total * 0.39))   # those > $102
])
wta = np.clip(wta, 1, 1000)
# Adjust so ~61% are <= 102
wta[wta > 102] = wta[wta > 102] * 1.5  # push high values further out

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

# Left: histogram of WTA
ax1.hist(wta, bins=50, color='#5C6BC0', alpha=0.8, edgecolor='white')
ax1.axvline(x=102, color='red', linestyle='--', linewidth=2, label='$102 cutoff')
ax1.axvline(x=np.median(wta), color='orange', linestyle='--', linewidth=2, label=f'Median: ${np.median(wta):.0f}')
ax1.set_xlabel('Willingness to Accept ($)')
ax1.set_ylabel('Number of participants')
ax1.set_title('How much would you need to quit Facebook\nfor 4 weeks?')
ax1.legend()
ax1.set_xlim(0, 500)

# Right: the randomization scheme
labels = ['Total\nRecruited', 'WTA â‰¤ $102\n(randomized)', 'Treatment\n(deactivate)', 'Control\n(keep FB)']
values = [2743, 1661, 831, 830]
colors = ['#78909C', '#5C6BC0', '#EF5350', '#66BB6A']

bars = ax2.barh(range(4), values, color=colors, height=0.6)
ax2.set_yticks(range(4))
ax2.set_yticklabels(labels)
ax2.set_xlabel('Number of participants')
ax2.set_title('The randomization funnel')
ax2.invert_yaxis()
for bar, val in zip(bars, values):
    ax2.text(bar.get_width() + 30, bar.get_y() + bar.get_height()/2,
             f'n = {val}', va='center', fontsize=12)

plt.tight_layout()
plt.show()

print(f"Only {1661/2743:.0%} of recruits were willing to deactivate for $102.")
print("These are the people who value Facebook LESS. Keep that in mind.")

### How much were people willing to accept?

Before randomization, the researchers asked: *"What's the minimum you'd accept to deactivate Facebook for 4 weeks?"*

The distribution of these valuations tells us something about how much people value Facebook.

In [None]:
# Table 2 from the paper: Sample vs. population demographics
categories = ['Income\nunder $50K', 'College\neducated', 'Male', 'White', 'Age\nunder 30', 'Republican', 'Democrat']
sample_vals = [0.40, 0.51, 0.43, 0.68, 0.52, 0.13, 0.42]
fb_users =   [0.41, 0.33, None, None, None, None, None]  # limited public data
us_pop =     [0.52, 0.30, 0.49, 0.64, 0.21, 0.26, 0.31]

fig, ax = plt.subplots(figsize=(12, 6))
x = np.arange(len(categories))
width = 0.3

bars1 = ax.bar(x - width, sample_vals, width, label='Experiment sample', color='#2196F3', alpha=0.85)
bars3 = ax.bar(x + width, us_pop, width, label='US population', color='#9E9E9E', alpha=0.7)

# Add FB user bars where available
for i, val in enumerate(fb_users):
    if val is not None:
        ax.bar(x[i], val, width, color='#4CAF50', alpha=0.7)
ax.bar([], [], width, color='#4CAF50', alpha=0.7, label='Facebook users')

ax.set_ylabel('Proportion')
ax.set_title('Who volunteered for a Facebook deactivation experiment?\n(Table 2 from Allcott et al. 2020)', fontsize=15)
ax.set_xticks(x)
ax.set_xticklabels(categories)
ax.legend(loc='upper right')
ax.set_ylim(0, 0.75)

# Annotate the key differences
ax.annotate('Much more\nDemocratic', xy=(6, 0.42), xytext=(6, 0.60),
            fontsize=10, ha='center', color='#1565C0',
            arrowprops=dict(arrowstyle='->', color='#1565C0'))
ax.annotate('Much younger', xy=(4, 0.52), xytext=(4.5, 0.65),
            fontsize=10, ha='center', color='#1565C0',
            arrowprops=dict(arrowstyle='->', color='#1565C0'))

plt.tight_layout()
plt.show()

print("Key takeaway: The sample skews young, educated, female, and Democratic.")
print("This is important for interpreting the results (external validity).")

## Part 1: The Experimental Design

Before we look at results, let's understand who was in this experiment and how randomization works.

### Who were the participants?

The sample is **not** a random sample of Americans. It's a sample of Facebook users who:
1. Saw a Facebook ad about the study
2. Were willing to participate
3. Were willing to deactivate for $102 or less

This matters! Let's see how they compare to the general population.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from scipy import stats

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 13
np.random.seed(2018)  # the year of the experiment

# The Welfare Effects of Social Media: Exploring the Facebook Deactivation Experiment

**Persuasion at Scale** (PSAM 3707 / UN 3707), Week 4

Based on: Allcott, Braghieri, Eichmeyer, and Gentzkow (2020). "The Welfare Effects of Social Media." *American Economic Review* 110(3): 629-676.

---

## What happened in this experiment?

In October 2018, right before the US midterm elections, researchers **paid Facebook users to deactivate their accounts for four weeks**.

- 2,743 users recruited via Facebook ads
- Those willing to deactivate for $102 or less were randomized
- ~830 in the Treatment group (paid to deactivate)
- ~830 in the Control group (kept using Facebook)
- Over 90% compliance with deactivation

**The big questions:** What happens when you take away someone's Facebook? Do they become happier? Less informed? Less politically polarized?