# Visualizations for Publication

Publication-quality visualizations following APA 7th edition guidelines.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings

# Set style for publication
sns.set_style('whitegrid')
sns.set_context('paper', font_scale=1.2)
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = ['Arial']

# Use colorblind-friendly palette
colors = sns.color_palette('colorblind')
warnings.filterwarnings('ignore')

## Load Data

In [None]:
# Load girls survey data
girls_df = pd.read_csv('../../1_data_collection/data/cleaned/girls_survey_clean.csv')
print(f"Girls sample: n={len(girls_df)}")

# Load community survey data
community_df = pd.read_csv('../../1_data_collection/data/cleaned/community_survey_clean.csv')
print(f"Community sample: n={len(community_df)}")

## Figure 1: Sample Characteristics

Demographic composition of study participants.

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Girls: Program participation
ax1 = axes[0, 0]
participation = girls_df['in_program'].value_counts()
colors_part = [colors[0], colors[1]]
ax1.pie(participation.values, labels=['Participants (n=79)', 'Non-participants (n=23)'], 
        autopct='%1.1f%%', startangle=90, colors=colors_part)
ax1.set_title('A. Program Participation', fontweight='bold', pad=20)

# Girls: Age distribution
ax2 = axes[0, 1]
age_data = girls_df.groupby(['in_program', 'age_group']).size().unstack(fill_value=0)
x = np.arange(len(age_data.columns))
width = 0.35
ax2.bar(x - width/2, age_data.loc['yes'], width, label='Participants', color=colors[0])
ax2.bar(x + width/2, age_data.loc['no'], width, label='Non-participants', color=colors[1])
ax2.set_xlabel('Age Group')
ax2.set_ylabel('Count')
ax2.set_title('B. Age Distribution', fontweight='bold', pad=20)
ax2.set_xticks(x)
ax2.set_xticklabels(age_data.columns)
ax2.legend()
ax2.grid(axis='y', alpha=0.3)

# Community: Gender distribution
ax3 = axes[1, 0]
gender_counts = community_df['gender'].value_counts()
ax3.bar(gender_counts.index, gender_counts.values, color=colors[2:4])
ax3.set_xlabel('Gender')
ax3.set_ylabel('Count')
ax3.set_title('C. Community Gender Distribution', fontweight='bold', pad=20)
ax3.grid(axis='y', alpha=0.3)

# Community: Residence status
ax4 = axes[1, 1]
residence_counts = community_df['residence_status'].value_counts()
ax4.barh(residence_counts.index, residence_counts.values, color=colors[4:7])
ax4.set_xlabel('Count')
ax4.set_ylabel('Residence Status')
ax4.set_title('D. Community Residence Status', fontweight='bold', pad=20)
ax4.grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.savefig('../figures/figure1_sample_characteristics.png', bbox_inches='tight', dpi=300)
plt.show()

print("Figure 1 saved: figures/figure1_sample_characteristics.png")