# 03 - Comparative Report

Generate comprehensive comparison report across all municipalities.

## Overview
1. Load data and run comparison engine
2. Tax efficiency analysis
3. Value vs Tax relationship
4. Publication-ready visualizations


In [None]:
# Setup
import sys
sys.path.insert(0, '../src')

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from data_collection.data_loader import DataLoader
from analysis.comparison import ComparisonEngine
from visualization.charts import ChartGenerator

sns.set_theme(style='whitegrid')
print("Setup complete!")


## 1. Load Data & Generate Report


In [None]:
# Load data
loader = DataLoader()

# Try to load real data, fall back to sample
try:
    dataset = loader.load_unified_dataset('unified_sales.parquet')
    print(f"Loaded real data: {len(dataset)} records")
except FileNotFoundError:
    dataset = loader.generate_sample_data(
        municipalities=['bronxville', 'eastchester_unincorp', 'tuckahoe', 
                       'scarsdale', 'larchmont', 'mamaroneck_village'],
        samples_per_muni=25
    )
    print(f"Generated sample data: {len(dataset)} records")

# Generate comparison report
engine = ComparisonEngine()
engine.load_from_dataset(dataset)
report = engine.generate_full_report()

print(f"\nReport generated for {len(report.municipalities_compared)} municipalities")


## 2. Full Comparison Report


In [None]:
# Print the full report
print(report)


## 3. Visualization


In [None]:
# Generate charts
charts = ChartGenerator(report.metrics)

# Value comparison
charts.plot_value_comparison(show=True, save=False)


In [None]:
# Tax efficiency chart
charts.plot_tax_efficiency(show=True, save=False)


In [None]:
# Value vs Tax scatter plot
charts.plot_value_vs_tax(show=True, save=False)


## 4. Data Table & Export


In [None]:
# Full metrics table
metrics_df = report.to_dataframe()
display_cols = ['municipality', 'sample_size', 'value_per_sqft_median', 
                'tax_per_sqft_median', 'effective_rate_median', 'tax_efficiency_ratio']
metrics_df[display_cols].sort_values('value_per_sqft_median', ascending=False)


In [None]:
# Save report to CSV
output_file = '../data/processed/comparison_report.csv'
metrics_df.to_csv(output_file, index=False)
print(f"âœ… Report saved to {output_file}")

# To save charts:
# charts.save_all('../output/charts/')

print("\nðŸŽ‰ Analysis complete! See the dashboard for interactive exploration:")
print("   streamlit run src/visualization/dashboard.py")
