In [51]:
import wbdata
import pandas as pd
import pandas_datareader as pdr
from datetime import datetime

# --------------------------
# 1. GDP Growth (World Bank)
# --------------------------
# Fetch GDP growth for the United States (annual data)
gdp_indicator = {"NY.GDP.MKTP.KD.ZG": "GDP_Growth"}
countries = ["USA"]
start_year, end_year = "1999", "2022"

gdp = wbdata.get_dataframe(gdp_indicator, country=countries, date=(start_year, end_year))
gdp = gdp.reset_index()
gdp.rename(columns={"GDP_Growth": "GDP_USA"}, inplace=True)
gdp["date"] = pd.to_datetime(gdp["date"].astype(str) + "-01-01")  # Convert to DatetimeIndex with Jan 1st
gdp.set_index("date", inplace=True)

# --------------------------
# 2. Inflation (FRED)
# --------------------------
# Fetch U.S. CPI data
inflation = pdr.DataReader(["CPIAUCSL", "CPILFESL"], "fred", start="1999-01-01")
inflation.columns = ["Inflation_US_All", "Inflation_US_Core"]

# --------------------------
# 3. USD Exchange Rate (FRED)
# --------------------------
# Fetch USD to EUR exchange rate (daily data)
usd_eur = pdr.DataReader("DEXUSEU", "fred", start="1999-01-01")
usd_eur.columns = ["USD_EUR"]

# Resample USD_EUR to monthly frequency (using the last value of each month)
usd_eur_monthly = usd_eur.resample("ME").last()  # Use "ME" for month-end frequency

# Align USD_EUR to the start of the month
usd_eur_monthly.index = usd_eur_monthly.index.to_period("M").to_timestamp("M") + pd.offsets.MonthBegin(1)

# --------------------------
# Align GDP to Monthly Frequency
# --------------------------
# Reindex GDP to match the monthly frequency of inflation and exchange rate data
gdp = gdp.reindex(inflation.index, method="ffill")  # Forward-fill annual GDP values

# --------------------------
# Merge DataFrames
# --------------------------
# Merge all datasets on their index (date)
merged_df = gdp.join(inflation, how="outer").join(usd_eur_monthly, how="outer")
merged_df = merged_df.sort_index()

# Save to CSV
merged_df.to_csv("../src/data/us_economic_data.csv")

# Display sample data
print(merged_df.head())

             GDP_USA  Inflation_US_All  Inflation_US_Core  USD_EUR
DATE                                                              
1999-01-01  4.788425             164.7              175.6      NaN
1999-02-01  4.077586             164.7              175.6   1.1371
1999-03-01  4.077586             164.8              175.7   1.0995
1999-04-01  4.077586             165.9              176.3   1.0808
1999-05-01  4.077586             166.0              176.5   1.0564
