# H1 Health Analytics - Health Insurance Cost Analysis

This notebook demonstrates the ETL, analysis, and visualization capabilities of the H1 Health Analytics project.

## 1. Setup and Imports

In [None]:
import sys
import os

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'src'))

from etl import HealthInsuranceETL
from analysis import HealthInsuranceAnalyzer
from visualization import HealthInsuranceVisualizer

import warnings
warnings.filterwarnings('ignore')

## 2. ETL Pipeline

In [None]:
# Initialize ETL
etl = HealthInsuranceETL()

# Run complete pipeline
processed_data = etl.run_pipeline()

# Display first few rows
processed_data.head()

## 3. Data Analysis

In [None]:
# Initialize analyzer
analyzer = HealthInsuranceAnalyzer(processed_data)

# Basic statistics
print("Basic Statistics:")
analyzer.get_basic_statistics()

In [None]:
# Smoker impact
print("\nSmoker Impact Analysis:")
smoker_impact = analyzer.analyze_smoker_impact()
for key, value in smoker_impact.items():
    print(f"  {key}: {value}")

In [None]:
# Regional analysis
print("\nRegional Analysis:")
analyzer.analyze_regional_differences()

In [None]:
# Correlation analysis
print("\nCorrelation Matrix:")
analyzer.correlation_analysis()

## 4. Data Visualization

In [None]:
# Initialize visualizer
visualizer = HealthInsuranceVisualizer(processed_data, output_dir='../outputs')

In [None]:
# Charges distribution
visualizer.plot_charges_distribution(save=False)

In [None]:
# Smoker impact
visualizer.plot_smoker_impact(save=False)

In [None]:
# Age vs charges
visualizer.plot_age_vs_charges(save=False)

In [None]:
# BMI vs charges
visualizer.plot_bmi_vs_charges(save=False)

In [None]:
# Regional analysis
visualizer.plot_regional_analysis(save=False)

In [None]:
# Correlation heatmap
visualizer.plot_correlation_heatmap(save=False)

## 5. Key Insights Summary

In [None]:
# Print comprehensive report
analyzer.print_comprehensive_report()