# Find Beta Min/Max

We want to find the min and max beta values over the time period:
 [March 2022 - June 25, 2023]. 

We will implement an alternate version of the Particle Filter that estimates
 beta over this time period. 
 
We want to apply the new filter to all states to find a 'global' max and min.

## Results
After running the particle filter on all locations over the specified dates,
 we find:
- `Beta Max: 0.3491`
- `Beta Min: 0.0165`

In [1]:
import os

import pandas as pd

from filter_forecast.algo_init import initialize_algo
from filter_forecast.state import State
from filter_forecast.helpers import get_beta_min_max_data
from forecast_all_states import generate_location_mappings

from multiprocessing import Pool

import logging

In [2]:
logger = logging.getLogger(__name__)


def run_particle_filter_on_location(state_code: str) -> None:

    state = State(state_code)

    filtered_state_data = get_beta_min_max_data(state.hosp_data)

    # Determine number of days for PF to forecast, based on length of data.
    time_steps = len(filtered_state_data)

    # Run the particle filter.
    algo = initialize_algo(state.population, state_code)
    algo.run(filtered_state_data, time_steps)
    logger.info("Finished running particle filter on location {}".format(state_code))

In [3]:
def run_pf_on_all_locations():
    location_code_to_abbr, locations = generate_location_mappings(
        "./datasets/locations.csv"
    )

    cpu_count = os.cpu_count()
    with Pool(cpu_count) as pool:
        locations = [location_code for location_code in locations["location"].unique()]
        pool.map(run_particle_filter_on_location, locations)

In [4]:
%%capture pf_output
run_pf_on_all_locations()

In [None]:
import numpy as np

# Directory where the files are stored
directory = "./datasets/pf_results/"

# Initialize an empty list to store the beta data
all_betas = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    # Check if 'average' is in the filename
    if "average" in filename:
        # Read the CSV file, ignoring the header
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path, header=0)
        # Append the column data to the list
        all_betas.extend(df.iloc[:, 1].values)

column_data_array = np.array(all_betas)

column_data_array

Verifying that I have the correct amount of data.

In [12]:
# Total number of data points, divided by 52 locations.
len(column_data_array) / 52

510.0

In [17]:
# Length of a single dataframe.
df = pd.read_csv("./datasets/pf_results/01_average_beta.csv")
len(df)

510

In [14]:
beta_max = np.max(column_data_array)
print("Beta Max: {}".format(beta_max))

Beta Max: 0.349141849603896


In [16]:
beta_min = np.min(column_data_array)
print("Beta Min: {}".format(beta_min))

Beta Min: 0.0165792956903883
