In [5]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

import pickle


In [6]:
with open('ei_intermediate_file_paths.json') as file_path_file:
    file_paths = json.load(file_path_file)


SI_PREDS_INPUT_PATH = file_paths.get("smoke_impact_model_predictions.csv")
NOAA_BLS_MERGED_INPUT_PATH = file_paths.get("stage2_fs_merged_output.csv")
EI_MODEL_INPUT_PATH = file_paths.get("ei_model_output")

EI_PREDS_OUTPUT_PATH = file_paths.get("economic_impact_model_predictions.csv")

In [7]:
# Load dataset
si_preds_df = pd.read_csv(SI_PREDS_INPUT_PATH)

noaa_bls_historical_df = pd.read_csv(NOAA_BLS_MERGED_INPUT_PATH)

In [8]:

# Calculate 5-year rolling averages for each variable
noaa_bls_historical_df['5yr_avg_tavg'] = noaa_bls_historical_df['noaa_tavg'].rolling(window=5, min_periods=1).mean()
noaa_bls_historical_df['5yr_avg_prcp'] = noaa_bls_historical_df['noaa_prcp'].rolling(window=5, min_periods=1).mean()
noaa_bls_historical_df['5yr_avg_unemployment_rate'] = noaa_bls_historical_df['bls_pct_diff_laus_unemployment_rate'].rolling(window=5, min_periods=1).mean()

# Initialize a DataFrame to hold predictions for each year from 2021 to 2050
future_years = range(2021, 2051)
future_predictions = pd.DataFrame({
    'year': future_years,
    'predicted_tavg': np.nan,
    'predicted_prcp': np.nan,
    'predicted_unemployment_rate': np.nan
})

# Copy historical data into a temporary DataFrame to simulate adding future data
temp_data = noaa_bls_historical_df.copy()

# Set the 5-year recalculation cycle
cycle_length = 5

# Loop through each year in the forecast period, applying 5-year recalculations
for i, year in enumerate(future_years):
    # Assign predictions for this year based on the latest 5-year averages
    future_predictions.loc[i, 'predicted_tavg'] = temp_data['5yr_avg_tavg'].iloc[-1]
    future_predictions.loc[i, 'predicted_prcp'] = temp_data['5yr_avg_prcp'].iloc[-1]
    future_predictions.loc[i, 'predicted_unemployment_rate'] = temp_data['5yr_avg_unemployment_rate'].iloc[-1]

    # Every 5 years, update rolling averages using the latest values
    if (i + 1) % cycle_length == 0:
        # Create a new row for the latest predictions as "historical" data
        new_row = pd.DataFrame({
            'year': [year],
            'noaa_tavg': [future_predictions.loc[i, 'predicted_tavg']],
            'noaa_prcp': [future_predictions.loc[i, 'predicted_prcp']],
            'bls_pct_diff_laus_unemployment_rate': [future_predictions.loc[i, 'predicted_unemployment_rate']]
        })
        
        # Concatenate the new row to `temp_data`
        temp_data = pd.concat([temp_data, new_row], ignore_index=True)

        # Recalculate the 5-year rolling averages on the updated data
        temp_data['5yr_avg_tavg'] = temp_data['noaa_tavg'].rolling(window=5, min_periods=1).mean()
        temp_data['5yr_avg_prcp'] = temp_data['noaa_prcp'].rolling(window=5, min_periods=1).mean()
        temp_data['5yr_avg_unemployment_rate'] = temp_data['bls_pct_diff_laus_unemployment_rate'].rolling(window=5, min_periods=1).mean()

# Display the updated predictions with 5-year interval adjustments
print(future_predictions)


    year  predicted_tavg  predicted_prcp  predicted_unemployment_rate
0   2021       78.960000        5.326000                    -2.526542
1   2022       78.960000        5.326000                    -2.526542
2   2023       78.960000        5.326000                    -2.526542
3   2024       78.960000        5.326000                    -2.526542
4   2025       78.960000        5.326000                    -2.526542
5   2026       79.832000        5.067200                    -0.962885
6   2027       79.832000        5.067200                    -0.962885
7   2028       79.832000        5.067200                    -0.962885
8   2029       79.832000        5.067200                    -0.962885
9   2030       79.832000        5.067200                    -0.962885
10  2031       79.678400        5.538640                    -0.584033
11  2032       79.678400        5.538640                    -0.584033
12  2033       79.678400        5.538640                    -0.584033
13  2034       79.67

In [9]:
# Load the trained model
with open(EI_MODEL_INPUT_PATH, 'rb') as model_file:
    economic_impact_model = pickle.load(model_file)

# %%
# Prepare the data for prediction
# Select the required features for the model
prediction_features = ['predicted_tavg', 'predicted_prcp', 'predicted_unemployment_rate']

# Ensure future_predictions includes the required columns
X_future = future_predictions[prediction_features]

# Make predictions using the loaded model
future_predictions['predicted_economic_impact'] = economic_impact_model.predict(X_future)

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- predicted_prcp
- predicted_tavg
- predicted_unemployment_rate
Feature names seen at fit time, yet now missing:
- bls_pct_diff_laus_labor_force
- bls_pct_diff_sae_hrs
- noaa_prcp
- scaled_avg_daily_smoke_impact


In [None]:
# Save the predictions to the output CSV file
future_predictions.to_csv(EI_PREDS_OUTPUT_PATH, index=False)
print(f"Predictions saved to {EI_PREDS_OUTPUT_PATH}")

# %%
# Display the predictions
print(future_predictions)