# Data Visualization: Wealth Inequality Trends

This notebook focuses on creating publication-quality visualizations for the LRDWI paper.

## Contents
1. Setup and Data Loading
2. Time Series Visualizations
3. Comparative Visualizations
4. Distribution Visualizations
5. Interactive Visualizations

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# Set style for publication-quality plots
plt.style.use('seaborn-v0_8-paper')
sns.set_context('paper', font_scale=1.5)
sns.set_palette('colorblind')

# Configure matplotlib
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.family'] = 'serif'

print("Libraries imported successfully")

## 1. Setup and Data Loading

In [None]:
# Load data
data_dir = '../data/processed'

if os.path.exists(os.path.join(data_dir, 'merged_data.csv')):
    df = pd.read_csv(os.path.join(data_dir, 'merged_data.csv'))
    print(f"Data loaded: {df.shape}")
    display(df.head())
else:
    print("Data not found. Please run data_scraper.py and data_processing.py first.")

## 2. Time Series Visualizations

Create detailed time series plots showing trends in wealth inequality.

In [None]:
# Long-run trends in wealth concentration
if 'df' in locals() and 'year' in df.columns:
    fig, ax = plt.subplots(figsize=(12, 6))
    
    if 'top1_wealth_share' in df.columns and 'country' in df.columns:
        for country in df['country'].unique():
            country_data = df[df['country'] == country].sort_values('year')
            ax.plot(country_data['year'], country_data['top1_wealth_share'], 
                   marker='o', linewidth=2, markersize=6, label=country)
        
        ax.set_xlabel('Year', fontsize=14, fontweight='bold')
        ax.set_ylabel('Top 1% Wealth Share (%)', fontsize=14, fontweight='bold')
        ax.set_title('Long-Run Dynamics of Top 1% Wealth Share', 
                    fontsize=16, fontweight='bold', pad=20)
        ax.legend(loc='best', frameon=True, shadow=True)
        ax.grid(True, alpha=0.3, linestyle='--')
        
        # Add annotation for key trends
        ax.text(0.02, 0.98, 'Source: Multiple databases', 
               transform=ax.transAxes, fontsize=10, 
               verticalalignment='top', style='italic')
        
        plt.tight_layout()
        plt.savefig('../output/figures/long_run_top1_trend.png', 
                   dpi=300, bbox_inches='tight')
        plt.show()
        
        print("Figure saved: long_run_top1_trend.png")

In [None]:
# Multi-panel figure for the paper
if 'df' in locals() and 'year' in df.columns and 'country' in df.columns:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Wealth Inequality Trends Across Countries', 
                fontsize=18, fontweight='bold', y=1.00)
    
    # Panel A: Top 1% share
    if 'top1_wealth_share' in df.columns:
        for country in df['country'].unique():
            data = df[df['country'] == country].sort_values('year')
            axes[0, 0].plot(data['year'], data['top1_wealth_share'], 
                          marker='o', linewidth=2, label=country)
        axes[0, 0].set_xlabel('Year')
        axes[0, 0].set_ylabel('Top 1% Share (%)')
        axes[0, 0].set_title('Panel A: Top 1% Wealth Share', fontweight='bold')
        axes[0, 0].legend(loc='best')
        axes[0, 0].grid(True, alpha=0.3)
    
    # Panel B: Top 10% share
    if 'top10_wealth_share' in df.columns:
        for country in df['country'].unique():
            data = df[df['country'] == country].sort_values('year')
            axes[0, 1].plot(data['year'], data['top10_wealth_share'], 
                          marker='o', linewidth=2, label=country)
        axes[0, 1].set_xlabel('Year')
        axes[0, 1].set_ylabel('Top 10% Share (%)')
        axes[0, 1].set_title('Panel B: Top 10% Wealth Share', fontweight='bold')
        axes[0, 1].legend(loc='best')
        axes[0, 1].grid(True, alpha=0.3)
    
    # Panel C: Gini coefficient
    if 'gini_index' in df.columns:
        for country in df['country'].unique():
            data = df[df['country'] == country].sort_values('year')
            axes[1, 0].plot(data['year'], data['gini_index'], 
                          marker='o', linewidth=2, label=country)
        axes[1, 0].set_xlabel('Year')
        axes[1, 0].set_ylabel('Gini Index')
        axes[1, 0].set_title('Panel C: Gini Coefficient', fontweight='bold')
        axes[1, 0].legend(loc='best')
        axes[1, 0].grid(True, alpha=0.3)
    
    # Panel D: Growth rates comparison
    if 'top1_wealth_share' in df.columns:
        growth_rates = []
        countries_list = []
        
        for country in df['country'].unique():
            data = df[df['country'] == country].sort_values('year').dropna(subset=['top1_wealth_share'])
            if len(data) >= 2:
                first_value = data['top1_wealth_share'].iloc[0]
                last_value = data['top1_wealth_share'].iloc[-1]
                years_diff = data['year'].iloc[-1] - data['year'].iloc[0]
                
                if years_diff > 0 and first_value > 0:
                    annual_growth = ((last_value / first_value) ** (1 / years_diff) - 1) * 100
                    growth_rates.append(annual_growth)
                    countries_list.append(country)
        
        if growth_rates:
            axes[1, 1].bar(countries_list, growth_rates)
            axes[1, 1].set_xlabel('Country')
            axes[1, 1].set_ylabel('Annual Growth Rate (%)')
            axes[1, 1].set_title('Panel D: Top 1% Share Growth Rates', fontweight='bold')
            axes[1, 1].axhline(y=0, color='red', linestyle='--', linewidth=1)
            axes[1, 1].grid(True, alpha=0.3, axis='y')
    
    plt.tight_layout()
    plt.savefig('../output/figures/multi_panel_analysis.png', 
               dpi=300, bbox_inches='tight')
    plt.show()
    
    print("Multi-panel figure saved: multi_panel_analysis.png")

## 3. Comparative Visualizations

Compare wealth inequality across countries and time periods.

In [None]:
# Heatmap of wealth shares over time
if 'df' in locals() and 'year' in df.columns and 'country' in df.columns:
    if 'top1_wealth_share' in df.columns:
        # Pivot data for heatmap
        pivot_data = df.pivot_table(values='top1_wealth_share', 
                                    index='country', 
                                    columns='year')
        
        plt.figure(figsize=(14, 6))
        sns.heatmap(pivot_data, annot=True, fmt='.1f', cmap='YlOrRd', 
                   cbar_kws={'label': 'Top 1% Wealth Share (%)'})
        plt.title('Top 1% Wealth Share: Country-Year Heatmap', 
                 fontsize=16, fontweight='bold', pad=20)
        plt.xlabel('Year', fontsize=12, fontweight='bold')
        plt.ylabel('Country', fontsize=12, fontweight='bold')
        plt.tight_layout()
        plt.savefig('../output/figures/wealth_share_heatmap.png', 
                   dpi=300, bbox_inches='tight')
        plt.show()
        
        print("Heatmap saved: wealth_share_heatmap.png")

## 4. Distribution Visualizations

Visualize the distribution of wealth inequality metrics.

In [None]:
# Box plots for cross-country comparison
if 'df' in locals() and 'country' in df.columns:
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    if 'gini_index' in df.columns:
        df.boxplot(column='gini_index', by='country', ax=axes[0])
        axes[0].set_title('Gini Index Distribution by Country', fontweight='bold')
        axes[0].set_xlabel('Country')
        axes[0].set_ylabel('Gini Index')
        axes[0].get_figure().suptitle('')  # Remove default title
    
    if 'top10_wealth_share' in df.columns or 'wealth_share_top10' in df.columns:
        wealth_col = 'top10_wealth_share' if 'top10_wealth_share' in df.columns else 'wealth_share_top10'
        df.boxplot(column=wealth_col, by='country', ax=axes[1])
        axes[1].set_title('Top 10% Wealth Share Distribution', fontweight='bold')
        axes[1].set_xlabel('Country')
        axes[1].set_ylabel('Top 10% Share (%)')
        axes[1].get_figure().suptitle('')  # Remove default title
    
    plt.tight_layout()
    plt.savefig('../output/figures/distribution_boxplots.png', 
               dpi=300, bbox_inches='tight')
    plt.show()
    
    print("Box plots saved: distribution_boxplots.png")

## 5. Summary

All visualizations have been saved to the output/figures directory and are ready for inclusion in the paper.

In [None]:
# List all generated figures
figures_dir = '../output/figures'
if os.path.exists(figures_dir):
    figures = [f for f in os.listdir(figures_dir) if f.endswith('.png')]
    print("Generated figures:")
    print("="*50)
    for fig in figures:
        print(f"  - {fig}")
else:
    print("Figures directory not found.")