# Community Sentiment Analysis

In [None]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

sns.set_style('whitegrid')
warnings.filterwarnings('ignore', message='Precision loss occurred in moment calculation')

In [None]:
df = pd.read_csv('../../1_data_collection/data/cleaned/community_survey_clean.csv')
print(f"Total community members: n={len(df)}")
df.head()

## Descriptive Statistics

In [None]:
print("="*70)
print("VOTING BEHAVIOR")
print("="*70)
vote_counts = df['voted_for_zakho'].value_counts()
print(vote_counts)
print(f"\nVoted 'yes': {(df['voted_for_zakho']=='yes').sum()/len(df)*100:.1f}%")
print(f"Voted 'no': {(df['voted_for_zakho']=='no').sum()/len(df)*100:.1f}%")

In [None]:
print("="*70)
print("SENTIMENT SCORES (1-5 SCALE)")
print("="*70)

sentiment_vars = ['feel_support_zakho', 'football_stress_relief', 'proud_when_team_plays']
sentiment_labels = ['Support for Zakho', 'Football Stress Relief', 'Pride When Team Plays']

for var, label in zip(sentiment_vars, sentiment_labels):
    data = pd.to_numeric(df[var], errors='coerce').dropna()
    print(f"\n{label}:")
    print(f"  M={data.mean():.2f}, SD={data.std(ddof=1):.2f}")
    print(f"  Range: {data.min():.0f}-{data.max():.0f}")
    print(f"  High scores (4-5): {(data >= 4).sum()/len(data)*100:.1f}%")

In [None]:
print("="*70)
print("DEMOGRAPHIC DISTRIBUTION")
print("="*70)

print("\nGender:")
print(df['gender'].value_counts())

print("\nResidence Status:")
print(df['residence_status'].value_counts())

print("\nAge Groups:")
print(df['age_group_comm'].value_counts().sort_index())