In [5]:
from data_util import BEA
import time
import requests
import pandas as pd
from tqdm import tqdm
import os

In [2]:
bea = BEA()

In [None]:
tables = bea.get_tables()

In [4]:
tables

Unnamed: 0,TableName,Description
0,T10101,Table 1.1.1. Percent Change From Preceding Per...
1,T10102,Table 1.1.2. Contributions to Percent Change i...
2,T10103,"Table 1.1.3. Real Gross Domestic Product, Quan..."
3,T10104,Table 1.1.4. Price Indexes for Gross Domestic ...
4,T10105,Table 1.1.5. Gross Domestic Product (A) (Q)
...,...,...
308,T80106,"Table 8.1.6. Real Gross Domestic Product, Chai..."
309,T80111,Table 8.1.11. Real Gross Domestic Product: Per...
310,T80200,Table 8.2. Gross Domestic Income by Type of In...
311,T80300,Table 8.3. Federal Government Current Receipts...


In [7]:
def get_unique_metrics(df):
    """
    Extracts unique metric names from a DataFrame.
    Handles both MultiIndex and single-level column cases.

    :param df: DataFrame with or without MultiIndex
    :return: List of unique metric names or None
    """
    if isinstance(df.columns, pd.MultiIndex):
        return df.columns.get_level_values(1).unique().tolist()
    return None

def process_tables(bea, tables, save_path, sleep_time=5):
    """
    Processes tables from BEA API, saves data to CSV, handles API rate limits.

    :param bea: Instance of BEA API class
    :param tables: List or Series of table names
    :param save_path: Path to save CSV files
    :param sleep_time: Time to sleep between requests to avoid rate limits
    """
    failed_tables = []

    for table_name in tqdm(tables, desc="Processing tables"):
        try:
            response = bea.fetch_data(table_name)

            if response is None or response.empty:
                failed_tables.append(table_name)
                continue

            response.index = pd.to_datetime(response.index)
            unique_metrics = get_unique_metrics(response)

            if unique_metrics is None:
                file_path = f"{save_path}/{table_name}.csv"
                response.to_csv(file_path, index=True)
            else:
                for metric in unique_metrics:
                    table_metric = response.xs(metric, axis=1, level=1, drop_level=True)
                    table_metric.index = pd.to_datetime(table_metric.index)
                    file_path = f"{save_path}/{table_name}_{metric}.csv"
                    table_metric.to_csv(file_path, index=True)

            time.sleep(sleep_time)  # Avoid API rate limits

        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 429:
                retry_after = e.response.headers.get("Retry-After")
                wait_time = int(retry_after) if retry_after and retry_after.isdigit() else 30
                print(f"Rate limit reached. Waiting {wait_time} seconds.")
                time.sleep(wait_time)
            else:
                print(f"HTTP Error for table {table_name}: {e}")
                failed_tables.append(table_name)

        except Exception as e:
            print(f"Error processing table {table_name}: {e}")
            failed_tables.append(table_name)

    if failed_tables:
        print("Failed tables:", failed_tables)

save_path = ".BEA_all_data/"
os.makedirs(save_path, exist_ok=True)

process_tables(bea=bea, tables=tables.TableName.tail(3), save_path=save_path)

Processing tables: 100%|██████████| 3/3 [00:18<00:00,  6.06s/it]


In [8]:
%ls



BEA.ipynb                             significant_features_BEA_cleaned.csv
