# Rebuilding the FRED Macro Dataset and Cleaning It

## Focus Areas

The previously merged dataset from four different macroeconomic indexes at FRED was imbalanced and contained many missing values.

To address this issue, the data retrieval process was started from scratch, with a focus on selecting new metrics as outlined below:

- **Brave-Butters-Kelley Real Gross Domestic Product (BBKMGDP)**: Replacing the previously used Real Gross Domestic Product (GDPC1).
- **Federal Funds Effective Rate**: This metric remains unchanged.
- **Unemployment Rate (UNRATE)**: This metric also remains unchanged.
- **Median Consumer Price Index (MEDCPIM158SFRBCLE)**: Replacing the previously used Inflation, consumer prices for the United States (FPCPITOTLZGUSA).


Fetch Data from Each API URL

In [12]:
import requests
import pandas as pd

# Function to fetch data from the FRED API
def fetch_fred_data(series_id):
    url = "https://api.stlouisfed.org/fred/series/observations"
    params = {
        "series_id": series_id,
        "realtime_start": "2000-01-01",
        "realtime_end": "2024-07-01",  # Updated end date to include 2024
        "api_key": "7bb504adcabc6f374463db2650ad84e5",
        "file_type": "json"
    }

    # Make the API request
    response = requests.get(url, params=params)

    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
        return data.get('observations', [])
    else:
        print(f"Error fetching {series_id}: {response.status_code} - {response.text}")
        return None

# Define the series IDs
series_ids = ["BBKMGDP", "FEDFUNDS", "UNRATE", "MEDCPIM158SFRBCLE"]

# Fetch data for each series and save to a CSV file
for series_id in series_ids:
    observations = fetch_fred_data(series_id)
    if observations:
        # Convert observations to a DataFrame
        df = pd.DataFrame(observations)
        df = df[['date', 'value']]  # Keep only date and value columns

        # Convert 'date' to datetime to inspect and ensure it's correct
        df['date'] = pd.to_datetime(df['date'])

        # Rename columns
        df.rename(columns={'date': 'Date', 'value': 'Value'}, inplace=True)

        # Save to CSV without any resampling or extra processing
        csv_filename = f"{series_id.lower()}_data.csv"
        df.to_csv(csv_filename, index=False)
        print(f"Data for {series_id} has been saved to {csv_filename}.")

Data for BBKMGDP has been saved to bbkmgdp_data.csv.
Data for FEDFUNDS has been saved to fedfunds_data.csv.
Data for UNRATE has been saved to unrate_data.csv.
Data for MEDCPIM158SFRBCLE has been saved to medcpim158sfrbcle_data.csv.


In [24]:
import requests
import pandas as pd

# Fetch data from the API
url = "https://api.stlouisfed.org/fred/series/observations"
params = {
    "series_id": "BBKMGDP",
    "realtime_start": "2001-01-01",
    "realtime_end": "2024-07-01",
    "api_key": "7bb504adcabc6f374463db2650ad84e5"
}

response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    try:
        data = response.json()

        # Extract observations and create a DataFrame
        observations = data['observations']
        df = pd.DataFrame(observations)

        # Rename columns to Date and Value
        df.rename(columns={"date": "Date", "value": "Value"}, inplace=True)

        # Save to CSV
        csv_file_path = 'BBKMGDP_observations.csv'
        df.to_csv(csv_file_path, index=False)

        print(f"Data saved to {csv_file_path}")

    except ValueError as e:
        print("Error decoding JSON:", e)
else:
    print(f"Request failed with status code: {response.status_code}")
    print("Response content:", response.text)  # Print the response content for debugging


Error decoding JSON: Expecting value: line 1 column 1 (char 0)


Merge the Fetched Data

In [22]:
import pandas as pd

# Define the list of CSV filenames
csv_files = ["bbkmGdp_data.csv", "fedfunds_data.csv", "unrate_data.csv", "medcpim158sfrbcle_data.csv"]

# Create a date range covering the required period, e.g., 2000 to 2024
date_range = pd.date_range(start='2000-01-01', end='2024-12-31', freq='D')
merged_df = pd.DataFrame(date_range, columns=['Date'])

# Convert 'Date' to string to match the format in the CSV files
merged_df['Date'] = merged_df['Date'].dt.strftime('%Y-%m-%d')

# Read and merge each CSV file
for csv_file in csv_files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file)

    # Rename the 'Value' column to indicate the source (e.g., 'BBKMGDP_Value')
    series_id = csv_file.split('_')[0].upper()  # Extract series_id from the filename
    df.rename(columns={'Value': f'{series_id}_Value'}, inplace=True)

    # Merge with the main DataFrame on the 'Date' column
    merged_df = pd.merge(merged_df, df, on='Date', how='left')

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('merged_data.csv', index=False)

print("Merged data has been saved to 'merged_data.csv'.")


JSONDecodeError: Expecting value: line 1 column 1 (char 0)