In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr
from rpy2.robjects.conversion import localconverter

# Import R package
pnadc_lib = importr('PNADcIBGE')

# Configuration for 2024
year = 2024
quarters = [1, 2, 3, 4]
selected_vars = robjects.StrVector(['Ano', 'Trimestre', 'UF', 'VD4020', 'V1028'])

# Process each quarter
for q in quarters:
    # 1. Download/Fetch data
    pnad_r_data = pnadc_lib.get_pnadc(
        year=year, 
        quarter=q, 
        vars=selected_vars, 
        design=False, 
        labels=True
    )
    
    # 2. Convert to Pandas
    with localconverter(robjects.default_converter + pandas2ri.converter):
        df_all = robjects.conversion.rpy2py(pnad_r_data)
    
    # 3. Setup Plot
    plt.figure(figsize=(10, 8))
    plt.plot([0, 1], [0, 1], linestyle='--', color='gray', label='Perfect Equality')
    
    # 4. Process each State (Mato Grosso and São Paulo)
    for state, color in [('Mato Grosso', 'blue'), ('São Paulo', 'green')]:
        # Filter: State and valid positive income
        df_state = df_all[(df_all['UF'] == state) & (df_all['VD4020'] > 0)].copy()
        
        # Sort by income to build the curve
        df_state = df_state.sort_values(by='VD4020')
        
        # Extract arrays
        inc = df_state['VD4020'].values
        w = df_state['V1028'].values
        
        # Calculate Cumulative Shares
        cum_w = np.cumsum(w)
        pop_share = cum_w / cum_w[-1]
        
        cum_inc = np.cumsum(inc * w)
        inc_share = cum_inc / cum_inc[-1]
        
        # Metrics: Gini
        # Gini = 1 - 2 * area under curve (using trapezoidal rule)
        gini = 1 - 2 * np.trapz(inc_share, pop_share)
        
        # Metrics: Pietra (Max distance between diagonal and curve)
        pietra = np.max(pop_share - inc_share)
        
        # Shares Calculation
        # Bottom 50%
        idx_50 = np.searchsorted(pop_share, 0.5)
        share_50 = inc_share[idx_50] * 100
        
        # Top 10%
        idx_90 = np.searchsorted(pop_share, 0.9)
        share_10 = (1 - inc_share[idx_90]) * 100
        
        # Top 1%
        idx_99 = np.searchsorted(pop_share, 0.99)
        share_1 = (1 - inc_share[idx_99]) * 100
        
        # Plot Curve
        plt.plot(pop_share, inc_share, color=color, label=f"{state} (Gini: {gini:.3f})")
        
        # Annotate stats on the plot
        stats_label = (
            f"--- {state} ---\n"
            f"Pietra: {pietra:.3f}\n"
            f"Bottom 50%: {share_50:.1f}%\n"
            f"Top 10%: {share_10:.1f}%\n"
            f"Top 1%: {share_1:.1f}%"
        )
        # Position text differently for each state to avoid overlap
        x_pos = 0.05 if state == 'Mato Grosso' else 0.55
        plt.text(x_pos, 0.2, stats_label, fontsize=9, bbox=dict(facecolor='white', alpha=0.7))

    # Finalize Plot
    plt.xlabel("Cumulative Share of Population")
    plt.ylabel("Cumulative Share of Income (VD4020)")
    plt.legend(loc='upper left')
    plt.grid(True, alpha=0.3)
    plt.show()

print("All quarters for 2024 processed successfully.")

PackageNotInstalledError: The R package "PNADcIBGE" is not installed.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr
from rpy2.robjects.conversion import localconverter

# Import R package
pnadc_lib = importr('PNADcIBGE')

# Configuration for 2024 Q4
year = 2024
quarter = 4
selected_vars = robjects.StrVector(['UF', 'VD4020', 'V1028'])

# 1. Download/Fetch data
pnad_r_data = pnadc_lib.get_pnadc(
    year=year, 
    quarter=quarter, 
    vars=selected_vars, 
    design=False, 
    labels=True
)

# 2. Convert R to Pandas
with localconverter(robjects.default_converter + pandas2ri.converter):
    df_all = robjects.conversion.rpy2py(pnad_r_data)

# 3. Setup Visualization
plt.figure(figsize=(14, 10))
# Reference line for perfect equality
plt.plot([0, 1], [0, 1], linestyle='--', color='lightgray', label='Perfect Equality', linewidth=1.5)

# Initialize list for the summary table
results_list = []

# Get unique states and set a colormap
states = sorted(df_all['UF'].unique())
colors = plt.cm.get_cmap('tab20', len(states))

# 4. Loop through each state to calculate metrics and plot
for i, state in enumerate(states):
    df_state = df_all[(df_all['UF'] == state) & (df_all['VD4020'] > 0)].copy()
    
    if not df_state.empty:
        df_state = df_state.sort_values(by='VD4020')
        inc = df_state['VD4020'].values
        w = df_state['V1028'].values
        
        cum_w = np.cumsum(w)
        pop_share = cum_w / cum_w[-1]
        cum_inc = np.cumsum(inc * w)
        inc_share = cum_inc / cum_inc[-1]
        
        # Metrics
        gini = 1 - 2 * np.trapz(inc_share, pop_share)
        pietra = np.max(pop_share - inc_share)
        
        # Plot State Curve (Highlight Mato Grosso)
        line_w = 3 if state == 'Mato Grosso' else 0.8
        plt.plot(pop_share, inc_share, color=colors(i), label=f"{state} (Gini: {gini:.3f})", linewidth=line_w, alpha=0.7)

        # Table data
        results_list.append({'State': state, 'Gini': gini, 'Pietra': pietra})

# 5. Calculate and Plot Brazil (National Average)
df_br = df_all[df_all['VD4020'] > 0].copy().sort_values(by='VD4020')
inc_br = df_br['VD4020'].values
w_br = df_br['V1028'].values

cum_w_br = np.cumsum(w_br)
pop_share_br = cum_w_br / cum_w_br[-1]
cum_inc_br = np.cumsum(inc_br * w_br)
inc_share_br = cum_inc_br / cum_inc_br[-1]

gini_br = 1 - 2 * np.trapz(inc_share_br, pop_share_br)
pietra_br = np.max(pop_share_br - inc_share_br)

# Plot Brazil Curve in thick black
plt.plot(pop_share_br, inc_share_br, color='black', label=f"BRASIL (Gini: {gini_br:.3f})", linewidth=4)

# Add Brazil to the table list
results_list.append({'State': 'BRASIL', 'Gini': gini_br, 'Pietra': pietra_br})

# 6. Finalize Plot
plt.xlabel("Cumulative share of population", fontsize=12)
plt.ylabel("Cumulative share of income", fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8, ncol=1)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# 7. Display Summary Table
df_summary = pd.DataFrame(results_list).sort_values(by='Gini', ascending=False)
print("\n--- Inequality Summary Table (States & Brazil) ---")
print(df_summary.to_string(index=False))

In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import rpy2.robjects as robjects
from rpy2.robjects import pandas2ri
from rpy2.robjects.packages import importr
from rpy2.robjects.conversion import localconverter

# 1. Define and Import the R library correctly
pnadc_lib = importr('PNADcIBGE')

# 2. Configuration
year = 2024
quarter = 4
selected_vars = robjects.StrVector(['UF', 'VD4020', 'V1028'])

# 3. Download/Fetch data from IBGE
# This returns an R object
pnad_r_data = pnadc_lib.get_pnadc(
    year=year, 
    quarter=quarter, 
    vars=selected_vars, 
    design=False, 
    labels=True
)

# 4. Convert R to Pandas using the localconverter context
with localconverter(robjects.default_converter + pandas2ri.converter):
    df_all = robjects.conversion.rpy2py(pnad_r_data)

# 5. Initialize the Plotly Figure
fig = go.Figure()

# Add Reference Line (Perfect Equality)
fig.add_trace(go.Scatter(
    x=[0, 1], y=[0, 1],
    mode='lines',
    name='Perfect Equality',
    line=dict(color='black', dash='dash'),
    hoverinfo='skip'
))

# 6. Loop through all states to calculate metrics and add to graph
states = sorted(df_all['UF'].unique())

for state in states:
    # Filter for the specific state and positive income
    df_state = df_all[(df_all['UF'] == state) & (df_all['VD4020'] > 0)].copy()
    
    if not df_state.empty:
        # Sort by income to generate the curve
        df_state = df_state.sort_values(by='VD4020')
        inc = df_state['VD4020'].values
        w = df_state['V1028'].values
        
        # Cumulative Shares Calculation
        cum_w = np.cumsum(w)
        pop_share = cum_w / cum_w[-1]
        
        cum_inc = np.cumsum(inc * w)
        inc_share = cum_inc / cum_inc[-1]
        
        # Gini Calculation
        gini = 1 - 2 * np.trapz(inc_share, pop_share)
        
        # Pietra Index Calculation (Max distance)
        distances = pop_share - inc_share
        max_dist_idx = np.argmax(distances)
        pietra_val = distances[max_dist_idx]
        p_x = pop_share[max_dist_idx]
        p_y = inc_share[max_dist_idx]
        
        # Percentile shares for hover/labels
        s_90 = inc_share[np.searchsorted(pop_share, 0.9)]
        s_99 = inc_share[np.searchsorted(pop_share, 0.99)]

        # Determine visibility (Highlighting Mato Grosso)
        is_mt = (state == 'Mato Grosso')
        
        # Add Lorenz Curve
        fig.add_trace(go.Scatter(
            x=pop_share, 
            y=inc_share,
            mode='lines',
            name=f"{state} (Gini: {gini:.3f})",
            visible=True if is_mt else "legendonly",
            hovertemplate = (
                f"<b>{state}</b><br>" +
                "Pop. Share: %{x:.2%}<br>" +
                "Inc. Share: %{y:.2%}<br>" +
                f"Top 10% Share: {(1-s_90):.2%}<br>" +
                f"Top 1% Share: {(1-s_99):.2%}<br>" +
                "<extra></extra>"
            )
        ))

        # Add Pietra Index Line (Vertical distance)
        fig.add_trace(go.Scatter(
            x=[p_x, p_x], y=[p_y, p_x],
            mode='lines+markers',
            name=f"Pietra ({state}): {pietra_val:.3f}",
            line=dict(width=2, color='red'),
            visible=True if is_mt else "legendonly",
            hoverinfo='text',
            text=f"{state} Pietra Index: {pietra_val:.3f} (Max Gap)"
        ))

# 7. Add vertical markers for the requested segments
fig.add_vline(x=0.5, line_width=1, line_dash="dot", line_color="green", annotation_text="Bottom 50%")
fig.add_vline(x=0.9, line_width=1, line_dash="dot", line_color="orange", annotation_text="Top 10%")
fig.add_vline(x=0.99, line_width=1, line_dash="dot", line_color="purple", annotation_text="Top 1%")

# 8. Final Layout Adjustments
fig.update_layout(
    title=f"Interactive Comparative Lorenz Curves - 2024 Q4",
    xaxis_title="Cumulative Share of Population",
    yaxis_title="Cumulative Share of Income",
    template="plotly_white",
    legend_title="Click to toggle states",
    hovermode="x unified",
    width=1000,
    height=750
)

fig.show()