<a href="https://colab.research.google.com/github/dmst0ut/importforecasting_group2/blob/main/Get_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Retrieve BEA Data using BEA API
  # GDP (2017 chained dollars)
  # Government Revenue from Customs Duties (2017 chained dollars)
  # Foreign transactions: total imports (2017 chained dollars)

import requests
import pandas as pd
from google.colab import userdata

# Retrieve BEA API key from Google Colab secrets
BEA_API_key = userdata.get('BEA_API_key')

# ====== USER INPUT ======
user_id = BEA_API_key  # Replace with your actual BEA API key
tables = {
    "T10101": {"name": "GDP & Economic Indicators", "lines": ["1"]},  # Table 1.1.3 - GDP
    "T30200": {"name": "Government Receipts & Outlays", "lines": ["6"]},  # Table 3.2 - Total Receipts
#    "T40206B": {"name": "Imports", "lines": ["94"]}  # Table 4.2.6B - Foreign Transactions: Imports
}
start_year = 1959
end_year = 2024
frequency = "Q"  # A = Annual, Q = Quarterly, M = Monthly
# ========================

# Convert year range to comma-separated string
years = ",".join(str(year) for year in range(start_year, end_year + 1))

# Base API URL
base_url = "https://apps.bea.gov/api/data"

def fetch_bea_data(table_name, lines):
    """Fetches BEA data for a specific table and returns a formatted DataFrame."""
    params = {
        "UserID": user_id,
        "method": "GetData",
        "DataSetName": "NIPA",
        "TableName": table_name,
        "Frequency": frequency,
        "Year": years,
        "ResultFormat": "JSON"
    }

    response = requests.get(base_url, params=params)

    if response.status_code == 200:
        data = response.json()
        try:
            df = pd.DataFrame(data["BEAAPI"]["Results"]["Data"])

            # Convert TimePeriod to datetime
            df["Date"] = pd.PeriodIndex(df["TimePeriod"], freq='Q').to_timestamp()

            # Ensure numerical values
            df['DataValue'] = pd.to_numeric(df['DataValue'].str.replace(',', '', regex=False), errors='coerce')

            # Filter only the required lines
            df = df[df["LineNumber"].isin(lines)]

            # Debugging: Print missing values in time periods
            print(f"\nFetched {table_name}: {df['TimePeriod'].nunique()} unique periods")

            # Pivot the table safely
            df = df.pivot_table(index="Date", columns="LineDescription", values="DataValue", aggfunc="first")

            # Rename columns with table identifiers
            df.columns = [f"{col} ({tables[table_name]['name']})" for col in df.columns]

            return df
        except KeyError:
            print(f"Unexpected JSON structure for Table {table_name}: {data}")
            return None
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

# Fetch data for selected tables & lines
dataframes = {table: fetch_bea_data(table, info["lines"]) for table, info in tables.items()}
dataframes = {k: v for k, v in dataframes.items() if v is not None}  # Remove None values

# Standardize the date index before merging
# for key, df in dataframes.items():
#    df.index = df.index.to_period("Q")  # Convert to quarterly periods to align

# Merge all DataFrames on TimePeriod
if dataframes:
    # Start merging with the first DataFrame
    merged_df = None
    for key, df in dataframes.items():
        if merged_df is None:
            merged_df = df
        else:
            merged_df = merged_df.merge(df, left_index=True, right_index=True, how="outer")  # Keep all data

    # Debugging: Print rows with NaN values
    missing_values = merged_df.isna().sum(axis=1)
    if missing_values.any():
        print("\nRows with missing values (check for time misalignment):")
        print(merged_df[missing_values > 0])

    # Display the merged DataFrame
    print("\nMerged DataFrame:")
    print(merged_df.head())
    print(merged_df.dtypes)
else:
    print("No data retrieved from BEA API.")


Fetched T10101: 264 unique periods

Fetched T30200: 264 unique periods

Merged DataFrame:
            Gross domestic product (GDP & Economic Indicators)  \
Date                                                             
1959-01-01                                                7.9    
1959-04-01                                                9.3    
1959-07-01                                                0.3    
1959-10-01                                                1.1    
1960-01-01                                                9.3    

            Customs duties (Government Receipts & Outlays)  
Date                                                        
1959-01-01                                             952  
1959-04-01                                            1076  
1959-07-01                                            1092  
1959-10-01                                            1076  
1960-01-01                                            1176  
Gross domestic prod

In [None]:
import requests
import pandas as pd
from google.colab import userdata

# Retrieve FRED API key from Google Colab secrets
FRED_API_key = userdata.get('FRED_API_key')

# ====== USER INPUT ======
series_ids = {
    "Federal Funds Rate (FRED)": "FEDFUNDS",
    "Real Imports (FRED)": "IMPGSC1"
}
start_date = "1959-01-01"
end_date = "2024-12-31"
frequency = "q"  # Quarterly
# ========================

# Base API URL for FRED
fred_base_url = "https://api.stlouisfed.org/fred/series/observations"

# Function to fetch a FRED series into a DataFrame
def fetch_fred_series(series_id, series_name):
    params = {
        "series_id": series_id,
        "api_key": FRED_API_key,
        "file_type": "json",
        "observation_start": start_date,
        "observation_end": end_date,
        "frequency": frequency,
        "aggregation_method": "avg"
    }

    response = requests.get(fred_base_url, params=params)

    if response.status_code == 200:
        data = response.json()
        df = pd.DataFrame(data["observations"])
        df["date"] = pd.to_datetime(df["date"])
        df.set_index("date", inplace=True)
        df = df.drop(columns=["realtime_start", "realtime_end"])
        df["value"] = pd.to_numeric(df["value"], errors="coerce")
        df.rename(columns={"value": series_name}, inplace=True)
        return df
    else:
        print(f"Error fetching {series_id}: {response.status_code}")
        return pd.DataFrame()

# Fetch both series
fedfunds_df = fetch_fred_series("FEDFUNDS", "Federal Funds Rate (FRED)")
imports_df = fetch_fred_series("IMPGSC1", "Real Imports (FRED)")

# Merge with existing BEA dataset
final_merged_df = merged_df.join(fedfunds_df, how="left")
final_merged_df = final_merged_df.join(imports_df, how="left")

# Display result
print(final_merged_df.head())
print(final_merged_df.dtypes)


            Gross domestic product (GDP & Economic Indicators)  \
Date                                                             
1959-01-01                                                7.9    
1959-04-01                                                9.3    
1959-07-01                                                0.3    
1959-10-01                                                1.1    
1960-01-01                                                9.3    

            Customs duties (Government Receipts & Outlays)  \
Date                                                         
1959-01-01                                             952   
1959-04-01                                            1076   
1959-07-01                                            1092   
1959-10-01                                            1076   
1960-01-01                                            1176   

            Federal Funds Rate (FRED)  Real Imports (FRED)  
Date                                     

In [3]:
%cd /content/importforecasting_group2

import sqlite3

# Connect to the SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect('economic_data.db')
cursor = conn.cursor()

# Read the DDL script from the file
with open('DDL.txt', 'r') as ddl_file:
    ddl_script = ddl_file.read()

# Execute the DDL script
cursor.executescript(ddl_script)

# Commit the changes and close the connection
conn.commit()
conn.close()


/content/importforecasting_group2


In [None]:
# Generate a new column: weighted average tariff = customs duties / imports of goods and services

try:
  final_merged_df['Weighted Average Tariff Rate'] = final_merged_df['Customs duties (Government Receipts & Outlays)'] / final_merged_df['Real Imports (FRED)']
except KeyError:
  print("Error: One or more columns are not found in the dataframe. Please verify your column names.")

print(final_merged_df.head())

            Gross domestic product (GDP & Economic Indicators)  \
Date                                                             
1959-01-01                                                7.9    
1959-04-01                                                9.3    
1959-07-01                                                0.3    
1959-10-01                                                1.1    
1960-01-01                                                9.3    

            Customs duties (Government Receipts & Outlays)  \
Date                                                         
1959-01-01                                             952   
1959-04-01                                            1076   
1959-07-01                                            1092   
1959-10-01                                            1076   
1960-01-01                                            1176   

            Federal Funds Rate (FRED)  Real Imports (FRED)  \
Date                                    

In [None]:
%cd

/root


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Store df as a Pickle
import pickle

with open('/content/drive/MyDrive/DSAPEC/importforecasting_group2/econ_data.pkl', 'wb') as f:
       pickle.dump(final_merged_df, f)