In [2]:
# !pip install tqdm

import requests
import pandas as pd
from tqdm import tqdm
from datetime import datetime
import time

# === SETTINGS ===
BASE_URL = "https://www.imf.org/external/datamapper/api/v1"
YEAR_RANGE = list(range(2005, 2024))  # 2005-2023 inclusive
INDICATORS = {
    "rev": "Government revenue (% of GDP)",
    "exp": "Government expenditure (% of GDP)",
    "prim_exp": "Primary expenditure (% of GDP)",
    "ie": "Interest on public debt (% of GDP)",
    "pb": "Primary balance (% of GDP)",
    "d": "Gross public debt (% of GDP)",
    "rgc": "Real GDP growth rate (%)",
    "rltir": "Real long-term bond yield (%)"
}
G20_COUNTRIES = {
    "ARG": "Argentina",
    "AUS": "Australia",
    "BRA": "Brazil",
    "CAN": "Canada",
    "CHN": "China",
    "FRA": "France",
    "DEU": "Germany",
    "IND": "India",
    "IDN": "Indonesia",
    "ITA": "Italy",
    "JPN": "Japan",
    "KOR": "South Korea",
    "MEX": "Mexico",
    "RUS": "Russia",
    "SAU": "Saudi Arabia",
    "ZAF": "South Africa",
    "TUR": "Turkey",
    "GBR": "United Kingdom",
    "USA": "United States"
}

PEER_COUNTRIES = G20_COUNTRIES.copy()

# === MAIN FUNCTION ===
def fetch_and_process_data():
    """Fetch IMF data and process into long & wide formats"""
    print("Starting data processing...")
    
    # 1. Fetch data from IMF API
    all_data = []
    for code, label in tqdm(INDICATORS.items(), desc="Fetching indicators"):
        try:
            response = requests.get(f"{BASE_URL}/{code}", timeout=10)
            data = response.json().get("values", {}).get(code, {})
            
            for country, values in data.items():
                if country in PEER_COUNTRIES:
                    for year, value in values.items():
                        try:
                            year_int = int(year)
                            if year_int in YEAR_RANGE:
                                all_data.append({
                                    "year": year_int,
                                    "country_code": country,
                                    "country": PEER_COUNTRIES[country],
                                    "indicator": label,
                                    "value": float(value) if value else None
                                })
                        except (ValueError, TypeError):
                            continue
        except Exception as e:
            print(f"Skipping {code}: {str(e)}")
            continue
    
    if not all_data:
        raise Exception("No data fetched - check connection or API status")
    
    # 2. Create long format dataframe
    df_long = pd.DataFrame(all_data)
    df_long = df_long.sort_values(["country", "year", "indicator"])
    
    # 3. Create wide format dataframe
    df_wide = df_long.pivot_table(
        index=["country", "year"],
        columns="indicator",
        values="value"
    ).reset_index()
    
    # 4. Save both formats
    today = datetime.now().strftime("%Y%m%d")
    long_file = f"{today}_tfda_publicfinance_long_g20.csv"
    wide_file = f"{today}_tfda_publicfinance_wide_g20.csv"
    
    df_long.to_csv(long_file, index=False)
    df_wide.to_csv(wide_file, index=False)
    
    print(f"\n✅ Successfully saved:")
    print(f"- Long format: {long_file} ({len(df_long):,} rows)")
    print(f"- Wide format: {wide_file} ({len(df_wide):,} rows)")
    print("\nSample wide data:")
    print(df_wide.head())

# === EXECUTION ===
if __name__ == "__main__":
    try:
        fetch_and_process_data()
    except Exception as e:
        print(f"\n❌ Error: {e}")


Starting data processing...


Fetching indicators: 100%|██████████| 8/8 [00:23<00:00,  2.97s/it]


✅ Successfully saved:
- Long format: 20250705_tfda_publicfinance_long_g20.csv (2,862 rows)
- Wide format: 20250705_tfda_publicfinance_wide_g20.csv (361 rows)

Sample wide data:
indicator    country  year  Government expenditure (% of GDP)  \
0          Argentina  2005                          24.425560   
1          Argentina  2006                          26.616662   
2          Argentina  2007                          29.550588   
3          Argentina  2008                          30.762348   
4          Argentina  2009                          34.524436   

indicator  Government revenue (% of GDP)  Gross public debt (% of GDP)  \
0                              27.766183                     80.281799   
1                              28.266732                     70.792690   
2                              30.307563                     62.132711   
3                              31.114321                     53.813852   
4                              32.694392                     




In [20]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime
import os

def create_correlation_pdf():
    """Generate PDF with clean correlation heatmaps (no legend or axis labels)"""
    today = datetime.now().strftime("%Y%m%d")
    input_file = f"{today}_tfda_publicfinance_wide_g20.csv"
    pdf_path = f"{today}_tfda_correlation_heatmaps_g20.pdf"
    
    # Set Seaborn style
    sns.set_style("white", {
        'axes.grid': False,
        'font.family': 'sans-serif'
    })
    
    try:
        df = pd.read_csv(input_file)
        print(f"✓ Successfully loaded data from {input_file}")
    except FileNotFoundError:
        raise FileNotFoundError(f"Input file not found. Please run data collection script first.")
    
    indicators = [
        "Government revenue (% of GDP)",
        "Government expenditure (% of GDP)",
        "Primary expenditure (% of GDP)",
        "Interest on public debt (% of GDP)",
        "Primary balance (% of GDP)",
        "Gross public debt (% of GDP)",
        "Real GDP growth rate (%)",
        "Real long-term bond yield (%)"
    ]
    
    with PdfPages(pdf_path) as pdf:
        for indicator in indicators:
            if indicator not in df.columns:
                print(f"✗ Skipping missing indicator: {indicator}")
                continue
                
            try:
                # Prepare correlation matrix
                corr_matrix = df.pivot_table(
                    index="year",
                    columns="country",
                    values=indicator
                ).corr().round(2)
                
                # Create figure
                plt.figure(figsize=(12, 10))
                
                # Create clean heatmap
                ax = sns.heatmap(
                    corr_matrix,
                    annot=True,
                    fmt=".2f",
                    cmap="viridis",
                    vmin=-1,
                    vmax=1,
                    center=0,
                    linewidths=0.5,
                    cbar=False,  # No colorbar
                    annot_kws={"size": 9}
                )
                
                # Remove axis labels
                ax.set(xlabel=None, ylabel=None)  # No x/y axis labels
                ax.tick_params(left=False, bottom=False)  # No tick marks
                
                # Formatting
                plt.title(
                    f"Correlation of {indicator.split('(')[0].strip()}\n(2005-2023)",
                    pad=20,
                    fontsize=14,
                    fontweight='bold'
                )
                plt.tight_layout()
                
                # Add to PDF
                pdf.savefig()
                plt.close()
                print(f"✓ Created clean heatmap for {indicator.split('(')[0].strip()}")
                
            except Exception as e:
                print(f"✗ Failed to create heatmap for {indicator}: {str(e)}")
                continue
    
    print(f"\n✅ Clean correlation heatmaps saved to {os.path.abspath(pdf_path)}")

if __name__ == "__main__":
    print("\n" + "="*60)
    print("Generating Clean Correlation Heatmaps".center(60))
    print("="*60 + "\n")
    
    try:
        create_correlation_pdf()
    except Exception as e:
        print(f"\n❌ Error: {e}")


           Generating Clean Correlation Heatmaps            

✓ Successfully loaded data from 20250705_tfda_publicfinance_wide_g20.csv
✓ Created clean heatmap for Government revenue
✓ Created clean heatmap for Government expenditure
✓ Created clean heatmap for Primary expenditure
✓ Created clean heatmap for Interest on public debt
✓ Created clean heatmap for Primary balance
✓ Created clean heatmap for Gross public debt
✓ Created clean heatmap for Real GDP growth rate
✓ Created clean heatmap for Real long-term bond yield

✅ Clean correlation heatmaps saved to /Users/arifpras/Library/CloudStorage/OneDrive-Kemenkeu/01_Kemenkeu/DJPPR_DataAnalytics/tfda-djppr/scripts/20250705_tfda_correlation_heatmaps_g20.pdf
