# Demographic Factors Analysis

This notebook analyzes demographic factors affecting student performance in Bangladesh:
- Gender-based performance analysis
- Socioeconomic status impact
- Geographic distribution analysis
- Age and grade level correlations
- Urban vs rural performance differences
- Institutional type comparisons

**Key Demographic Variables:**
- Gender (Male/Female)
- Division and District
- Socioeconomic Status (Low/Medium/High)
- Institution Type (Government/Private/Madrasa)
- Age and Grade Level
- Urban/Rural Classification

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import scipy.stats as stats
from scipy.stats import chi2_contingency, mannwhitneyu, kruskal
import warnings
import sys
from pathlib import Path

# Add project root to Python path
sys.path.append('../..')
from src.data_processing.data_processor import DataProcessor

# Configure display options
pd.set_option('display.max_columns', None)
plt.style.use('seaborn-v0_8')
warnings.filterwarnings('ignore')

# Set up plotting
plt.rcParams['figure.figsize'] = (12, 8)
sns.set_palette('Set2')

## 1. Data Loading and Demographic Variables Setup

In [None]:
# Load and process data
processor = DataProcessor()

# Create comprehensive demographic dataset
np.random.seed(42)
n_students = 3000

# Bangladesh divisions and representative districts
division_districts = {
    'Dhaka': ['Dhaka', 'Gazipur', 'Narayanganj', 'Manikganj'],
    'Chittagong': ['Chittagong', 'Cox\'s Bazar', 'Comilla', 'Feni'],
    'Khulna': ['Khulna', 'Jessore', 'Satkhira', 'Bagerhat'],
    'Rajshahi': ['Rajshahi', 'Rangpur', 'Bogra', 'Pabna'],
    'Sylhet': ['Sylhet', 'Moulvibazar', 'Habiganj', 'Sunamganj'],
    'Barishal': ['Barishal', 'Patuakhali', 'Bhola', 'Pirojpur'],
    'Rangpur': ['Rangpur', 'Dinajpur', 'Kurigram', 'Lalmonirhat'],
    'Mymensingh': ['Mymensingh', 'Netrokona', 'Jamalpur', 'Sherpur']
}

# Generate divisions and corresponding districts
divisions = np.random.choice(list(division_districts.keys()), n_students)
districts = [np.random.choice(division_districts[div]) for div in divisions]

# Create demographic dataset
demographic_data = pd.DataFrame({
    'student_id': [f'S{i:04d}' for i in range(1, n_students + 1)],
    'name': [f'Student {i}' for i in range(1, n_students + 1)],
    'division': divisions,
    'district': districts,
    'gender': np.random.choice(['Male', 'Female'], n_students, p=[0.52, 0.48]),
    'age': np.random.randint(12, 20, n_students),
    'grade_level': np.random.choice([6, 7, 8, 9, 10, 11, 12], n_students),
    'institution_type': np.random.choice(['Government', 'Private', 'Madrasa'], n_students, p=[0.65, 0.25, 0.10]),
    'socioeconomic_status': np.random.choice(['Low', 'Medium', 'High'], n_students, p=[0.45, 0.35, 0.20]),
    'area_type': np.random.choice(['Urban', 'Rural'], n_students, p=[0.35, 0.65]),
    'mother_education': np.random.choice(['No Education', 'Primary', 'Secondary', 'Higher Secondary', 'University'], n_students, p=[0.3, 0.25, 0.25, 0.15, 0.05]),
    'father_education': np.random.choice(['No Education', 'Primary', 'Secondary', 'Higher Secondary', 'University'], n_students, p=[0.25, 0.25, 0.25, 0.15, 0.10]),
    'family_income': np.random.choice(['Very Low', 'Low', 'Medium', 'High', 'Very High'], n_students, p=[0.25, 0.30, 0.25, 0.15, 0.05])
})

# Add performance variables influenced by demographics
# Gender effect (slight performance difference)
gender_effect = np.where(demographic_data['gender'] == 'Female', 0.1, 0)

# Socioeconomic effect
ses_mapping = {'Low': -0.3, 'Medium': 0, 'High': 0.4}
ses_effect = demographic_data['socioeconomic_status'].map(ses_mapping)

# Area type effect
area_effect = np.where(demographic_data['area_type'] == 'Urban', 0.2, -0.1)

# Institution type effect
inst_mapping = {'Government': 0, 'Private': 0.3, 'Madrasa': -0.2}
inst_effect = demographic_data['institution_type'].map(inst_mapping)

# Generate base performance with demographic influences
base_performance = np.random.normal(3.2, 0.8, n_students)
demographic_data['gpa'] = (base_performance + gender_effect + ses_effect + area_effect + inst_effect).clip(0, 5)

# Add subject scores
subjects = ['bangla', 'english', 'mathematics', 'science', 'social_studies']
for subject in subjects:
    subject_base = np.random.normal(3.2, 0.7, n_students)
    demographic_data[subject] = (subject_base + ses_effect * 0.5 + area_effect * 0.3).clip(0, 5)

# Add attendance with demographic influences
attendance_base = np.random.beta(8, 2, n_students)
demographic_data['attendance_rate'] = (attendance_base + ses_effect * 0.05 + area_effect * 0.02).clip(0, 1)

# Calculate derived metrics
demographic_data['days_present'] = (demographic_data['attendance_rate'] * 200).astype(int)
demographic_data['total_school_days'] = 200

print(f"Demographic data created: {demographic_data.shape}")
demographic_data.head()

## 2. Gender-based Performance Analysis

In [None]:
# Gender distribution
gender_counts = demographic_data['gender'].value_counts()
print("Gender Distribution:")
print(gender_counts)
print(f"\nGender Distribution (%):")
print((gender_counts / len(demographic_data) * 100).round(1))

# Gender performance comparison
gender_performance = demographic_data.groupby('gender').agg({
    'gpa': ['mean', 'std', 'count'],
    'attendance_rate': 'mean'
}).round(3)

print("\nGender Performance Comparison:")
print(gender_performance)

# Statistical test for gender differences
male_gpa = demographic_data[demographic_data['gender'] == 'Male']['gpa']
female_gpa = demographic_data[demographic_data['gender'] == 'Female']['gpa']

# Mann-Whitney U test
stat, p_value = mannwhitneyu(male_gpa, female_gpa, alternative='two-sided')
print(f"\nMann-Whitney U test for gender GPA difference:")
print(f"Statistic: {stat:.2f}, P-value: {p_value:.4f}")
print(f"Significant difference: {'Yes' if p_value < 0.05 else 'No'}")

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Gender distribution pie chart
ax1.pie(gender_counts.values, labels=gender_counts.index, autopct='%1.1f%%', startangle=90)
ax1.set_title('Student Gender Distribution')

# GPA comparison by gender
sns.boxplot(data=demographic_data, x='gender', y='gpa', ax=ax2)
ax2.set_title('GPA Distribution by Gender')
ax2.set_ylabel('GPA')

# Subject performance by gender
gender_subjects = demographic_data.groupby('gender')[subjects].mean()
gender_subjects.T.plot(kind='bar', ax=ax3)
ax3.set_title('Average Subject Performance by Gender')
ax3.set_ylabel('Average Score')
ax3.tick_params(axis='x', rotation=45)
ax3.legend(title='Gender')

# Gender performance across institution types
gender_institution = demographic_data.groupby(['institution_type', 'gender'])['gpa'].mean().unstack()
gender_institution.plot(kind='bar', ax=ax4)
ax4.set_title('Average GPA by Institution Type and Gender')
ax4.set_ylabel('Average GPA')
ax4.tick_params(axis='x', rotation=45)
ax4.legend(title='Gender')

plt.tight_layout()
plt.show()

## 3. Socioeconomic Status Impact Analysis

In [None]:
# Socioeconomic status distribution
ses_counts = demographic_data['socioeconomic_status'].value_counts()
print("Socioeconomic Status Distribution:")
print(ses_counts)
print(f"\nSES Distribution (%):")
print((ses_counts / len(demographic_data) * 100).round(1))

# Performance by socioeconomic status
ses_performance = demographic_data.groupby('socioeconomic_status').agg({
    'gpa': ['mean', 'std', 'count'],
    'attendance_rate': ['mean', 'std']
}).round(3)

print("\nPerformance by Socioeconomic Status:")
print(ses_performance)

# Statistical test for SES differences
low_ses = demographic_data[demographic_data['socioeconomic_status'] == 'Low']['gpa']
medium_ses = demographic_data[demographic_data['socioeconomic_status'] == 'Medium']['gpa']
high_ses = demographic_data[demographic_data['socioeconomic_status'] == 'High']['gpa']

# Kruskal-Wallis test
stat, p_value = kruskal(low_ses, medium_ses, high_ses)
print(f"\nKruskal-Wallis test for SES GPA differences:")
print(f"Statistic: {stat:.2f}, P-value: {p_value:.4f}")
print(f"Significant difference: {'Yes' if p_value < 0.05 else 'No'}")

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# SES distribution
ses_counts.plot(kind='bar', ax=ax1, color='skyblue')
ax1.set_title('Socioeconomic Status Distribution')
ax1.set_ylabel('Number of Students')
ax1.tick_params(axis='x', rotation=45)

# GPA by SES
sns.boxplot(data=demographic_data, x='socioeconomic_status', y='gpa', 
            order=['Low', 'Medium', 'High'], ax=ax2)
ax2.set_title('GPA Distribution by Socioeconomic Status')
ax2.set_ylabel('GPA')
ax2.tick_params(axis='x', rotation=45)

# SES and attendance relationship
sns.boxplot(data=demographic_data, x='socioeconomic_status', y='attendance_rate', 
            order=['Low', 'Medium', 'High'], ax=ax3)
ax3.set_title('Attendance Rate by Socioeconomic Status')
ax3.set_ylabel('Attendance Rate')
ax3.tick_params(axis='x', rotation=45)

# SES across divisions
ses_division = pd.crosstab(demographic_data['division'], demographic_data['socioeconomic_status'], normalize='index') * 100
ses_division.plot(kind='bar', stacked=True, ax=ax4)
ax4.set_title('Socioeconomic Status Distribution by Division (%)')
ax4.set_ylabel('Percentage')
ax4.tick_params(axis='x', rotation=45)
ax4.legend(title='SES', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

# Interactive SES analysis
fig_interactive = px.box(demographic_data, x='socioeconomic_status', y='gpa', 
                        color='socioeconomic_status',
                        category_orders={'socioeconomic_status': ['Low', 'Medium', 'High']},
                        title='Interactive GPA Distribution by Socioeconomic Status')
fig_interactive.show()

## 4. Geographic Distribution Analysis

In [None]:
# Division performance analysis
division_performance = demographic_data.groupby('division').agg({
    'gpa': ['mean', 'std', 'count'],
    'attendance_rate': 'mean'
}).round(3)

print("Performance by Division:")
print(division_performance)

# Urban vs Rural analysis
area_performance = demographic_data.groupby('area_type').agg({
    'gpa': ['mean', 'std', 'count'],
    'attendance_rate': 'mean'
}).round(3)

print("\nPerformance by Area Type:")
print(area_performance)

# Statistical test for urban vs rural differences
urban_gpa = demographic_data[demographic_data['area_type'] == 'Urban']['gpa']
rural_gpa = demographic_data[demographic_data['area_type'] == 'Rural']['gpa']

stat, p_value = mannwhitneyu(urban_gpa, rural_gpa, alternative='two-sided')
print(f"\nMann-Whitney U test for Urban vs Rural GPA difference:")
print(f"Statistic: {stat:.2f}, P-value: {p_value:.4f}")
print(f"Significant difference: {'Yes' if p_value < 0.05 else 'No'}")

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Division performance comparison
division_gpa = demographic_data.groupby('division')['gpa'].mean().sort_values()
division_gpa.plot(kind='barh', ax=ax1, color='lightcoral')
ax1.set_title('Average GPA by Division')
ax1.set_xlabel('Average GPA')

# Urban vs Rural performance
sns.boxplot(data=demographic_data, x='area_type', y='gpa', ax=ax2)
ax2.set_title('GPA Distribution: Urban vs Rural')
ax2.set_ylabel('GPA')

# Geographic distribution heatmap
division_area = pd.crosstab(demographic_data['division'], demographic_data['area_type'])
sns.heatmap(division_area, annot=True, cmap='Blues', ax=ax3)
ax3.set_title('Student Distribution: Division vs Area Type')
ax3.set_ylabel('Division')
ax3.set_xlabel('Area Type')

# Division performance by gender
division_gender = demographic_data.groupby(['division', 'gender'])['gpa'].mean().unstack()
division_gender.plot(kind='bar', ax=ax4)
ax4.set_title('Average GPA by Division and Gender')
ax4.set_ylabel('Average GPA')
ax4.tick_params(axis='x', rotation=45)
ax4.legend(title='Gender')

plt.tight_layout()
plt.show()

# Interactive geographic analysis
fig_geo = px.bar(demographic_data.groupby('division')['gpa'].mean().reset_index(), 
                x='division', y='gpa',
                title='Interactive Average GPA by Division',
                color='gpa',
                color_continuous_scale='Viridis')
fig_geo.update_layout(xaxis_tickangle=-45)
fig_geo.show()

## 5. Institution Type Comparison

In [None]:
# Institution type analysis
institution_counts = demographic_data['institution_type'].value_counts()
print("Institution Type Distribution:")
print(institution_counts)
print(f"\nInstitution Distribution (%):")
print((institution_counts / len(demographic_data) * 100).round(1))

# Performance by institution type
institution_performance = demographic_data.groupby('institution_type').agg({
    'gpa': ['mean', 'std', 'count'],
    'attendance_rate': ['mean', 'std']
}).round(3)

print("\nPerformance by Institution Type:")
print(institution_performance)

# Statistical test for institution differences
govt_gpa = demographic_data[demographic_data['institution_type'] == 'Government']['gpa']
private_gpa = demographic_data[demographic_data['institution_type'] == 'Private']['gpa']
madrasa_gpa = demographic_data[demographic_data['institution_type'] == 'Madrasa']['gpa']

stat, p_value = kruskal(govt_gpa, private_gpa, madrasa_gpa)
print(f"\nKruskal-Wallis test for Institution Type GPA differences:")
print(f"Statistic: {stat:.2f}, P-value: {p_value:.4f}")
print(f"Significant difference: {'Yes' if p_value < 0.05 else 'No'}")

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Institution type distribution
ax1.pie(institution_counts.values, labels=institution_counts.index, autopct='%1.1f%%')
ax1.set_title('Institution Type Distribution')

# GPA by institution type
sns.boxplot(data=demographic_data, x='institution_type', y='gpa', ax=ax2)
ax2.set_title('GPA Distribution by Institution Type')
ax2.set_ylabel('GPA')
ax2.tick_params(axis='x', rotation=45)

# Institution type across divisions
institution_division = pd.crosstab(demographic_data['division'], demographic_data['institution_type'], normalize='index') * 100
institution_division.plot(kind='bar', stacked=True, ax=ax3)
ax3.set_title('Institution Type Distribution by Division (%)')
ax3.set_ylabel('Percentage')
ax3.tick_params(axis='x', rotation=45)
ax3.legend(title='Institution Type', bbox_to_anchor=(1.05, 1), loc='upper left')

# Subject performance by institution type
institution_subjects = demographic_data.groupby('institution_type')[subjects].mean()
institution_subjects.T.plot(kind='bar', ax=ax4)
ax4.set_title('Average Subject Performance by Institution Type')
ax4.set_ylabel('Average Score')
ax4.tick_params(axis='x', rotation=45)
ax4.legend(title='Institution Type')

plt.tight_layout()
plt.show()

## 6. Parental Education Impact

In [None]:
# Parental education analysis
mother_edu_performance = demographic_data.groupby('mother_education')['gpa'].mean().sort_values()
father_edu_performance = demographic_data.groupby('father_education')['gpa'].mean().sort_values()

print("Performance by Mother's Education:")
print(mother_edu_performance.round(3))

print("\nPerformance by Father's Education:")
print(father_edu_performance.round(3))

# Education level order for plotting
edu_order = ['No Education', 'Primary', 'Secondary', 'Higher Secondary', 'University']

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Mother's education impact
sns.boxplot(data=demographic_data, x='mother_education', y='gpa', 
            order=edu_order, ax=ax1)
ax1.set_title('Student GPA by Mother\'s Education')
ax1.set_ylabel('GPA')
ax1.tick_params(axis='x', rotation=45)

# Father's education impact
sns.boxplot(data=demographic_data, x='father_education', y='gpa', 
            order=edu_order, ax=ax2)
ax2.set_title('Student GPA by Father\'s Education')
ax2.set_ylabel('GPA')
ax2.tick_params(axis='x', rotation=45)

# Combined parental education effect
# Create combined education score
edu_score_map = {'No Education': 0, 'Primary': 1, 'Secondary': 2, 'Higher Secondary': 3, 'University': 4}
demographic_data['combined_parent_edu'] = (
    demographic_data['mother_education'].map(edu_score_map) + 
    demographic_data['father_education'].map(edu_score_map)
)

sns.scatterplot(data=demographic_data, x='combined_parent_edu', y='gpa', 
                hue='socioeconomic_status', alpha=0.6, ax=ax3)
ax3.set_title('GPA vs Combined Parental Education')
ax3.set_xlabel('Combined Parental Education Score')
ax3.set_ylabel('GPA')

# Family income vs performance
income_order = ['Very Low', 'Low', 'Medium', 'High', 'Very High']
sns.boxplot(data=demographic_data, x='family_income', y='gpa', 
            order=income_order, ax=ax4)
ax4.set_title('Student GPA by Family Income')
ax4.set_ylabel('GPA')
ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

# Correlation analysis
correlation_data = demographic_data[['gpa', 'combined_parent_edu']].corr()
print(f"\nCorrelation between GPA and Combined Parental Education: {correlation_data.iloc[0, 1]:.3f}")

## 7. Multi-factor Demographic Analysis

In [None]:
# Multi-factor analysis combining key demographics
multi_factor_analysis = demographic_data.groupby(['gender', 'socioeconomic_status', 'area_type']).agg({
    'gpa': ['mean', 'count'],
    'attendance_rate': 'mean'
}).round(3)

print("Multi-factor Analysis (Gender, SES, Area Type):")
print(multi_factor_analysis)

# Create demographic risk score
def calculate_risk_score(row):
    """Calculate demographic risk score for academic performance."""
    score = 0
    
    # Socioeconomic status
    if row['socioeconomic_status'] == 'Low':
        score += 3
    elif row['socioeconomic_status'] == 'Medium':
        score += 1
    
    # Area type
    if row['area_type'] == 'Rural':
        score += 2
    
    # Institution type
    if row['institution_type'] == 'Madrasa':
        score += 2
    
    # Parental education
    if row['combined_parent_edu'] <= 2:
        score += 2
    
    return score

demographic_data['risk_score'] = demographic_data.apply(calculate_risk_score, axis=1)

# Risk categories
def categorize_risk(score):
    if score >= 7:
        return 'Very High Risk'
    elif score >= 5:
        return 'High Risk'
    elif score >= 3:
        return 'Medium Risk'
    else:
        return 'Low Risk'

demographic_data['risk_category'] = demographic_data['risk_score'].apply(categorize_risk)

# Risk analysis
risk_analysis = demographic_data.groupby('risk_category').agg({
    'gpa': ['mean', 'std'],
    'attendance_rate': 'mean',
    'student_id': 'count'
}).round(3)

print("\nRisk Category Analysis:")
print(risk_analysis)

# Visualization
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# Risk category distribution
risk_counts = demographic_data['risk_category'].value_counts()
ax1.pie(risk_counts.values, labels=risk_counts.index, autopct='%1.1f%%')
ax1.set_title('Student Risk Category Distribution')

# GPA by risk category
risk_order = ['Low Risk', 'Medium Risk', 'High Risk', 'Very High Risk']
sns.boxplot(data=demographic_data, x='risk_category', y='gpa', 
            order=risk_order, ax=ax2)
ax2.set_title('GPA Distribution by Risk Category')
ax2.set_ylabel('GPA')
ax2.tick_params(axis='x', rotation=45)

# Risk vs performance scatter
sns.scatterplot(data=demographic_data, x='risk_score', y='gpa', 
                hue='gender', alpha=0.6, ax=ax3)
ax3.set_title('Risk Score vs GPA by Gender')
ax3.set_xlabel('Demographic Risk Score')
ax3.set_ylabel('GPA')

# Risk distribution by division
risk_division = pd.crosstab(demographic_data['division'], demographic_data['risk_category'], normalize='index') * 100
risk_division[risk_order].plot(kind='bar', stacked=True, ax=ax4)
ax4.set_title('Risk Category Distribution by Division (%)')
ax4.set_ylabel('Percentage')
ax4.tick_params(axis='x', rotation=45)
ax4.legend(title='Risk Category', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()

## 8. Demographic Insights and Recommendations

In [None]:
# Generate comprehensive demographic insights
print("🎯 DEMOGRAPHIC ANALYSIS INSIGHTS")
print("=" * 50)

# Gender insights
male_avg = demographic_data[demographic_data['gender'] == 'Male']['gpa'].mean()
female_avg = demographic_data[demographic_data['gender'] == 'Female']['gpa'].mean()
print(f"👥 Gender Analysis:")
print(f"   • Male students average GPA: {male_avg:.2f}")
print(f"   • Female students average GPA: {female_avg:.2f}")
print(f"   • Gender performance gap: {abs(female_avg - male_avg):.3f}")

# Socioeconomic insights
low_ses_avg = demographic_data[demographic_data['socioeconomic_status'] == 'Low']['gpa'].mean()
high_ses_avg = demographic_data[demographic_data['socioeconomic_status'] == 'High']['gpa'].mean()
print(f"💰 Socioeconomic Impact:")
print(f"   • Low SES average GPA: {low_ses_avg:.2f}")
print(f"   • High SES average GPA: {high_ses_avg:.2f}")
print(f"   • SES achievement gap: {high_ses_avg - low_ses_avg:.3f}")

# Geographic insights
urban_avg = demographic_data[demographic_data['area_type'] == 'Urban']['gpa'].mean()
rural_avg = demographic_data[demographic_data['area_type'] == 'Rural']['gpa'].mean()
print(f"🏙️ Geographic Impact:")
print(f"   • Urban students average GPA: {urban_avg:.2f}")
print(f"   • Rural students average GPA: {rural_avg:.2f}")
print(f"   • Urban-rural gap: {urban_avg - rural_avg:.3f}")

# Institution insights
govt_avg = demographic_data[demographic_data['institution_type'] == 'Government']['gpa'].mean()
private_avg = demographic_data[demographic_data['institution_type'] == 'Private']['gpa'].mean()
madrasa_avg = demographic_data[demographic_data['institution_type'] == 'Madrasa']['gpa'].mean()
print(f"🏫 Institution Analysis:")
print(f"   • Government schools average GPA: {govt_avg:.2f}")
print(f"   • Private schools average GPA: {private_avg:.2f}")
print(f"   • Madrasa average GPA: {madrasa_avg:.2f}")

# Risk analysis
high_risk_students = len(demographic_data[demographic_data['risk_category'].isin(['High Risk', 'Very High Risk'])])
high_risk_percentage = high_risk_students / len(demographic_data) * 100
print(f"⚠️ Risk Assessment:")
print(f"   • High/Very High risk students: {high_risk_students} ({high_risk_percentage:.1f}%)")

# Recommendations
print(f"
📋 POLICY RECOMMENDATIONS")
print("=" * 50)

print(f"🎯 Targeted Interventions:")
if high_ses_avg - low_ses_avg > 0.5:
    print(f"   • Urgent: Address socioeconomic achievement gap")
    print(f"   • Implement scholarship programs for low SES students")

if urban_avg - rural_avg > 0.3:
    print(f"   • Priority: Improve rural education infrastructure")
    print(f"   • Deploy digital learning resources to rural areas")

print(f"🏫 Institutional Development:")
if private_avg - govt_avg > 0.2:
    print(f"   • Enhance government school quality and resources")

print(f"   • Strengthen teacher training programs")
print(f"   • Implement performance monitoring systems")

print(f"👨‍👩‍👧‍👦 Family Engagement:")
print(f"   • Develop parental education programs")
print(f"   • Create community learning centers")
print(f"   • Promote family literacy initiatives")

print(f"📊 Monitoring and Evaluation:")
print(f"   • Implement demographic-sensitive tracking systems")
print(f"   • Regular equity audits of educational outcomes")
print(f"   • Develop early warning systems for at-risk students")