In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Load data
file_path = "immigration_comments_final_sentiment.csv"
df = pd.read_csv(file_path)

# Parse dates
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Filter to target cities
cities = ['Chicago', 'Denver', 'New York City', 'Philadelphia', 'Minneapolis', 'Los Angeles']
df = df[df['City'].isin(cities)].copy()

# Extract month
df['Month'] = pd.to_datetime(df['Date'].dt.to_period('M').astype(str))

# Map stance to sentiment labels
sentiment_labels = {-1: 'Negative', 0: 'Neutral', 1: 'Positive'}
df['Sentiment'] = df['Stance'].map(sentiment_labels)

# Aggregate sentiment prevalence
grouped = df.groupby(['City', 'Month', 'Sentiment']).size().reset_index(name='Count')
total_by_month_city = df.groupby(['City', 'Month']).size().reset_index(name='Total')
merged = pd.merge(grouped, total_by_month_city, on=['City', 'Month'])
merged['Prevalence'] = merged['Count'] / merged['Total']
merged = merged.sort_values(by='Month')

# Treatment dates per city
treatment_dates = {
    'New York City': pd.to_datetime('2022-06-01'),
    'Chicago': pd.to_datetime('2022-07-01'),
    'Denver': pd.to_datetime('2022-11-01')
}

# Color mapping
color_map = {'Negative': 'red', 'Neutral': 'green', 'Positive': 'blue'}

# Plotting style
plt.rcParams.update({
    'font.family': 'serif',
    'axes.edgecolor': 'black',
    'axes.linewidth': 1,
    'axes.labelsize': 12,
    'axes.titlesize': 14,
    'legend.fontsize': 10,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10,
    'figure.facecolor': 'white',
    'axes.facecolor': 'white',
    'savefig.facecolor': 'white',
    'savefig.edgecolor': 'white',
    'figure.dpi': 300
})

# One plot per city with 3-month rolling average
for city in cities:
    city_data = merged[merged['City'] == city]

    fig, ax = plt.subplots(figsize=(10, 5))
    for sentiment in ['Negative', 'Neutral', 'Positive']:
        sentiment_data = city_data[city_data['Sentiment'] == sentiment].copy()
        sentiment_data = sentiment_data.set_index('Month').sort_index()

        # Apply 3-month rolling average
        sentiment_data['Smoothed'] = sentiment_data['Prevalence'].rolling(window=3, center=True, min_periods=1).mean()

        ax.plot(sentiment_data.index, sentiment_data['Smoothed'],
                label=sentiment, color=color_map[sentiment], linewidth=2)

    ax.set_title(f'Smoothed Sentiment Prevalence in {city}')
    ax.set_xlabel('Month')
    ax.set_ylabel('Proportion of Comments')
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
    plt.xticks(rotation=45)

    # Add treatment period line if available
    if city in treatment_dates:
        ax.axvline(treatment_dates[city], color='gray', linestyle='--', linewidth=1.5)

    ax.legend()
    ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    plt.tight_layout()
    filename = f'sentiment_prevalence_{city.lower().replace(" ", "_")}.png'
    plt.savefig(filename)
    plt.close()


  df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
