# Westwood et al. (2022) Replication - Part 2: Core Analysis & Figure 2

This notebook replicates **Figure 2** - the key finding:

> Disengaged survey respondents report 3-8x higher support for political violence than engaged respondents.

## Step 1: Setup

In [None]:
!pip install -q gdown

import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import gdown

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 11

# Colors matching the paper
ORANGE = '#D55E00'  # Disengaged
BLUE = '#0072B2'    # Engaged

## Step 2: Download Data

In [None]:
urls = {
    "study14": "https://drive.google.com/uc?id=1gKIY11FaM5RmhhXTKx3wVcwGkMoTyTUM",
    "study25": "https://drive.google.com/uc?id=1VfZM3hSDzIIIVp2AUGC-RwOy-Fk2t_Fm",
}

data = {}
for name, url in urls.items():
    gdown.download(url, f"/tmp/{name}.csv", quiet=True)
    data[name] = pd.read_csv(f"/tmp/{name}.csv")
    print(f"Loaded {name}: {len(data[name]):,} rows")

## Step 3: Preprocess Study 1

Study 1 uses a **car-ramming vignette** with two stories:
- Story 1 (partisantreatment=1): Republican driver in Florida
- Story 2 (partisantreatment=2): Democrat driver in Oregon

In [None]:
def preprocess_study1(df):
    """Preprocess Study 1 following preprocess1.R"""
    # Filter by gc and experiment
    study = df[(df['gc'] == 1) & (df['experiment'] == 1)].copy()
    
    # Recode party ID
    study['pid'] = study['Q10']
    study.loc[study['Q11'] == 'Democratic Party', 'pid'] = 'Democrat'
    study.loc[study['Q11'] == 'Republican Party', 'pid'] = 'Republican'
    
    # ENGAGEMENT CHECK (preprocess1.R lines 127-129)
    # Story 1 asks about Florida, Story 2 asks about Oregon
    study['engaged'] = 'Disengaged'
    mask1 = (study['partisantreatment'] == 1) & (study['Q43'] == 'Florida')
    mask2 = (study['partisantreatment'] == 2) & (study['Q49'] == 'Oregon')
    study.loc[mask1 | mask2, 'engaged'] = 'Engaged'
    
    # Recode justified (preprocess1.R lines 140-143)
    study['justified'] = np.nan
    m1 = study['partisantreatment'] == 1
    m2 = study['partisantreatment'] == 2
    study.loc[m1, 'justified'] = (study.loc[m1, 'Q45'] == 'Justified').astype(float)
    study.loc[m2, 'justified'] = (study.loc[m2, 'Q51'] == 'Justified').astype(float)
    
    # Alignment (preprocess1.R lines 150-164)
    study['alignment'] = np.nan
    # Version 1 + treatment 1 = Republican driver
    # Version 1 + treatment 2 = Democrat driver
    study.loc[(study['version'] == 1) & (study['partisantreatment'] == 2) & (study['pid'] == 'Democrat'), 'alignment'] = 'Out-Party Driver'
    study.loc[(study['version'] == 1) & (study['partisantreatment'] == 1) & (study['pid'] == 'Democrat'), 'alignment'] = 'In-Party Driver'
    study.loc[(study['version'] == 1) & (study['partisantreatment'] == 2) & (study['pid'] == 'Republican'), 'alignment'] = 'In-Party Driver'
    study.loc[(study['version'] == 1) & (study['partisantreatment'] == 1) & (study['pid'] == 'Republican'), 'alignment'] = 'Out-Party Driver'
    study.loc[study['version'] == 2, 'alignment'] = 'Apolitical Driver'
    
    return study

study1 = preprocess_study1(data['study14'])
print(f"Study 1: n = {len(study1):,}")
print(f"Engagement: {study1['engaged'].value_counts().to_dict()}")

## Step 4: Preprocess Study 2

Study 2 uses a **shooting vignette** with one story:
- Version 1 = Democrat shooter
- Version 2 = Republican shooter  
- Version 3 = Apolitical shooter

**Key difference:** No `partisantreatment` - `version` directly determines shooter party.

In [None]:
def preprocess_study2(df):
    """Preprocess Study 2 following preprocess2.R"""
    # Filter by gc and experiment
    study = df[(df['gc'] == 1) & (df['experiment'] == 1)].copy()
    
    # Recode party ID
    study['pid'] = study['Q10']
    study.loc[study['Q11'] == 'Democratic Party', 'pid'] = 'Democrat'
    study.loc[study['Q11'] == 'Republican Party', 'pid'] = 'Republican'
    
    # ENGAGEMENT CHECK (preprocess2.R line 118)
    # Q43 asks about Iowa (where shooting took place)
    study['engaged'] = 'Disengaged'
    study.loc[study['Q43'] == 'Iowa', 'engaged'] = 'Engaged'
    
    # Recode justified (preprocess2.R lines 132-134)
    # Single set of questions Q44/Q45/Q46
    study['justified'] = (study['Q45'] == 'Justified').astype(float)
    
    # Alignment (preprocess2.R lines 141-150)
    # Version directly determines shooter party
    study['alignment'] = np.nan
    # Version 1 = Democrat shooter
    study.loc[(study['version'] == 1) & (study['pid'] == 'Democrat'), 'alignment'] = 'Out-Party Shooter'
    study.loc[(study['version'] == 1) & (study['pid'] == 'Republican'), 'alignment'] = 'In-Party Shooter'
    # Version 2 = Republican shooter
    study.loc[(study['version'] == 2) & (study['pid'] == 'Democrat'), 'alignment'] = 'In-Party Shooter'
    study.loc[(study['version'] == 2) & (study['pid'] == 'Republican'), 'alignment'] = 'Out-Party Shooter'
    # Version 3 = Apolitical
    study.loc[study['version'] == 3, 'alignment'] = 'Apolitical Shooter'
    
    return study

study2 = preprocess_study2(data['study25'])
print(f"Study 2: n = {len(study2):,}")
print(f"Engagement: {study2['engaged'].value_counts().to_dict()}")

## Step 5: Calculate Group Statistics

In [None]:
def calc_group_stats(df):
    """Calculate mean, SE, and 95% CI for each alignment x engagement group."""
    results = []
    df_clean = df.dropna(subset=['alignment', 'justified'])
    
    for (alignment, engaged), group in df_clean.groupby(['alignment', 'engaged']):
        y = group['justified']
        n = len(y)
        if n < 2:
            continue
        mean = y.mean()
        se = y.std(ddof=1) / np.sqrt(n)
        t_crit = stats.t.ppf(0.975, df=n-1)
        results.append({
            'alignment': alignment,
            'engaged': engaged,
            'mean': mean,
            'se': se,
            'ci_lower': mean - t_crit * se,
            'ci_upper': mean + t_crit * se,
            'n': n
        })
    return pd.DataFrame(results)

stats1 = calc_group_stats(study1)
stats2 = calc_group_stats(study2)

print("Study 1 Statistics:")
display(stats1)
print("\nStudy 2 Statistics:")
display(stats2)

## Step 6: Reproduce FIGURE 2

In [None]:
def plot_figure2(stats_df, title, ax):
    """Plot Figure 2 panel."""
    colors = {'Disengaged': ORANGE, 'Engaged': BLUE}
    alignments = stats_df['alignment'].unique()
    y_pos = {a: i for i, a in enumerate(alignments)}
    
    for engaged_status in ['Engaged', 'Disengaged']:
        subset = stats_df[stats_df['engaged'] == engaged_status]
        offset = 0.15 if engaged_status == 'Engaged' else -0.15
        y = [y_pos[a] + offset for a in subset['alignment']]
        
        ax.errorbar(subset['mean'], y,
                   xerr=[subset['mean'] - subset['ci_lower'], 
                         subset['ci_upper'] - subset['mean']],
                   fmt='o', markersize=10, capsize=5,
                   color=colors[engaged_status], label=engaged_status,
                   markerfacecolor='white', markeredgewidth=2)
        
        for x, yy, val in zip(subset['mean'], y, subset['mean']):
            ax.annotate(f'{val:.2f}', (x, yy), xytext=(0, 10),
                       textcoords='offset points', ha='center', fontsize=9,
                       color=colors[engaged_status])
    
    ax.set_yticks(range(len(alignments)))
    ax.set_yticklabels(alignments)
    ax.set_xlabel('Proportion Saying Suspect is Justified (95% CI)')
    ax.set_xlim(-0.05, 0.6)
    ax.set_title(title, fontweight='bold')
    ax.legend(loc='lower right')
    ax.axvline(x=0, color='gray', linestyle='--', alpha=0.5)

fig, axes = plt.subplots(2, 1, figsize=(10, 8))
plot_figure2(stats1, 'Study 1 (Qualtrics) - Car Ramming Vignette', axes[0])
plot_figure2(stats2, 'Study 2 (Qualtrics) - Shooting Vignette', axes[1])
fig.suptitle('Figure 2: Support for Violence Among Engaged and Disengaged Respondents', 
             fontsize=14, fontweight='bold', y=1.02)
plt.tight_layout()
plt.show()

## Step 7: Key Results

In [None]:
print("="*60)
print("KEY RESULT: Political Treatments Only")
print("="*60)

for name, study_df in [('Study 1', study1), ('Study 2', study2)]:
    political = study_df[~study_df['alignment'].str.contains('Apolitical', na=False)]
    engaged = political[political['engaged'] == 'Engaged']['justified'].mean()
    disengaged = political[political['engaged'] == 'Disengaged']['justified'].mean()
    ratio = disengaged / engaged if engaged > 0 else float('inf')
    
    print(f"\n{name}:")
    print(f"  Engaged: {engaged:.1%}")
    print(f"  Disengaged: {disengaged:.1%}")
    print(f"  Ratio: {ratio:.1f}x inflation")

---

# Appendix: Diagnostic Figures

## A1: Engagement Rates

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

for ax, (name, study) in zip(axes, [('Study 1', study1), ('Study 2', study2)]):
    counts = study['engaged'].value_counts()
    colors = [BLUE if x == 'Engaged' else ORANGE for x in counts.index]
    bars = ax.bar(counts.index, counts.values, color=colors)
    ax.set_title(f'{name}: Engagement Check Results', fontweight='bold')
    ax.set_ylabel('Count')
    for bar, val in zip(bars, counts.values):
        pct = val / counts.sum() * 100
        ax.annotate(f'{val}\n({pct:.0f}%)', (bar.get_x() + bar.get_width()/2, val),
                   ha='center', va='bottom', fontsize=10)

plt.suptitle('A1: How Many Passed the Engagement Check?', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.show()

## A2: The Key Finding - Simple Bar Chart

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

for ax, (name, study) in zip(axes, [('Study 1 (Car Ramming)', study1), ('Study 2 (Shooting)', study2)]):
    political = study[~study['alignment'].str.contains('Apolitical', na=False)]
    means = political.groupby('engaged')['justified'].agg(['mean', 'sem'])
    
    colors = [ORANGE if x == 'Disengaged' else BLUE for x in means.index]
    bars = ax.bar(means.index, means['mean'], yerr=1.96*means['sem'], 
                  color=colors, capsize=5)
    ax.set_title(name, fontweight='bold')
    ax.set_ylabel('Proportion Justified')
    ax.set_ylim(0, 0.5)
    
    for bar, val in zip(bars, means['mean']):
        ax.annotate(f'{val:.1%}', (bar.get_x() + bar.get_width()/2, val + 0.03),
                   ha='center', fontsize=12, fontweight='bold')

plt.suptitle('A2: The Key Finding (Political Treatments Only)', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.show()

## A3: Sample Sizes by Condition

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

for ax, (name, study) in zip(axes, [('Study 1', study1), ('Study 2', study2)]):
    counts = study.dropna(subset=['alignment']).groupby(['alignment', 'engaged']).size().unstack(fill_value=0)
    counts.plot(kind='bar', ax=ax, color=[ORANGE, BLUE])
    ax.set_title(f'{name}: Sample Sizes', fontweight='bold')
    ax.set_ylabel('Count')
    ax.set_xlabel('')
    ax.legend(title='')
    ax.tick_params(axis='x', rotation=45)

plt.suptitle('A3: Sample Sizes by Condition', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.show()

## A4: Justified vs Unjustified by Engagement

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 4))

for ax, (name, study) in zip(axes, [('Study 1', study1), ('Study 2', study2)]):
    props = study.groupby('engaged')['justified'].value_counts(normalize=True).unstack(fill_value=0)
    props.columns = ['Unjustified', 'Justified']
    props[['Unjustified', 'Justified']].plot(kind='bar', stacked=True, ax=ax,
                                              color=['#56B4E9', '#E69F00'])
    ax.set_title(name, fontweight='bold')
    ax.set_ylabel('Proportion')
    ax.set_xlabel('')
    ax.legend(loc='upper right')
    ax.tick_params(axis='x', rotation=0)
    
    # Add percentage labels
    for i, (idx, row) in enumerate(props.iterrows()):
        ax.annotate(f"{row['Justified']:.0%}", (i, row['Justified']/2 + row['Unjustified']),
                   ha='center', va='center', fontsize=10, color='black')

plt.suptitle('A4: Response Distribution by Engagement', fontsize=12, fontweight='bold')
plt.tight_layout()
plt.show()

## Summary

**Figure 2 shows the key finding:**

- **Blue (Engaged)**: ~10-12% say violence is justified
- **Orange (Disengaged)**: ~35-40% say violence is justified

This 3-8x gap explains why prior research reported ~20% support - they averaged engaged and disengaged respondents.

**Next:** Run notebook 03 for partial identification bounds