## Covariate Data
We want to run the Trend Forecasting algorithm on more locations. 

So, we need to acquire covariate data for each location. 

In [1]:
from src.trend_forecast.covariates import (
    get_covariate_data,
    CovariateSelection,
    output_covariates_to_csv,
)
import pandas as pd

In [2]:
target_dates = pd.read_csv("../datasets/target_dates.csv")

selected_covariates = CovariateSelection(
    mean_temp=True,
    max_rel_humidity=True,
    sun_duration=True,
    wind_speed=True,
    radiation=True,
    google_search=True,
    movement=False,
)

In [3]:
def one_date_covariate(target_date, loc_code):
    print(f"{loc_code}: Getting date", target_date)
    df = get_covariate_data(
        covariates=selected_covariates,
        loc_code=loc_code,
        target_date=target_date,
        series_length=50,
    )
    df.insert(0, "time_0", range(len(df)))
    output_covariates_to_csv(df, loc_code, target_date)

In [9]:
# Test on California
for date in target_dates['date']:
    one_date_covariate(date, '06')

06: Getting date 2023-10-14
06: Getting date 2023-10-21
06: Getting date 2023-10-28
06: Getting date 2023-11-04
06: Getting date 2023-11-11
06: Getting date 2023-11-18
06: Getting date 2023-11-25
06: Getting date 2023-12-02
06: Getting date 2023-12-09
06: Getting date 2023-12-16
06: Getting date 2023-12-23
06: Getting date 2023-12-30
06: Getting date 2024-01-06
06: Getting date 2024-01-13
06: Getting date 2024-01-20
06: Getting date 2024-01-27
06: Getting date 2024-02-03
06: Getting date 2024-02-10
06: Getting date 2024-02-17
06: Getting date 2024-02-24
06: Getting date 2024-03-02
06: Getting date 2024-03-09
06: Getting date 2024-03-16
06: Getting date 2024-03-23
06: Getting date 2024-03-30
06: Getting date 2024-04-06
06: Getting date 2024-04-13


In [10]:
one_date_covariate('2024-04-20', '06')
one_date_covariate('2024-04-27', '06')

06: Getting date 2024-04-20
06: Getting date 2024-04-27


In [16]:
# Forgot Google Search covariate for California.
# Adding it on.

import os
from src.trend_forecast.covariate_getters import get_google_search
import glob
import pandas as pd

for file in glob.glob("../output/covariates/06/*.csv"):
    df = pd.read_csv(file)
    filename = os.path.basename(file)
    date = filename.replace(".csv", "")
    print(date)
    df["google_search"] = get_google_search("04", 'flu symptoms', date, 50).to_numpy()
    df.to_csv(file, index=False)

2023-10-14
2023-10-21
2023-10-28
2023-11-04
2023-11-11
2023-11-18
2023-11-25
2023-12-02
2023-12-09
2023-12-16
2023-12-23
2023-12-30
2024-01-06
2024-01-13
2024-01-20
2024-01-27
2024-02-03
2024-02-10
2024-02-17
2024-02-24
2024-03-02
2024-03-09
2024-03-16
2024-03-23
2024-03-30
2024-04-06
2024-04-13
2024-04-20
2024-04-27


In [4]:
# Acquire covariate data for the following locations:
# Colorado, New York, Tennessee, Wyoming
for loc_code in ['08', '36', '47', '56']:
    for date in target_dates['date']:
        one_date_covariate(date, loc_code)

08: Getting date 2023-10-14
(50,)
08: Getting date 2023-10-21
(50,)
08: Getting date 2023-10-28
(50,)
08: Getting date 2023-11-04
(50,)
08: Getting date 2023-11-11
(50,)
08: Getting date 2023-11-18
(50,)
08: Getting date 2023-11-25
(50,)
08: Getting date 2023-12-02
(50,)
08: Getting date 2023-12-09
(50,)
08: Getting date 2023-12-16
(50,)
08: Getting date 2023-12-23
(50,)
08: Getting date 2023-12-30
(50,)
08: Getting date 2024-01-06
(50,)
08: Getting date 2024-01-13
(50,)
08: Getting date 2024-01-20
(50,)
08: Getting date 2024-01-27
(50,)
08: Getting date 2024-02-03
(50,)
08: Getting date 2024-02-10
(50,)
08: Getting date 2024-02-17
(50,)
08: Getting date 2024-02-24
(50,)
08: Getting date 2024-03-02
(50,)
08: Getting date 2024-03-09
(50,)
08: Getting date 2024-03-16
(50,)
08: Getting date 2024-03-23
(50,)
08: Getting date 2024-03-30
(50,)
08: Getting date 2024-04-06
(50,)
08: Getting date 2024-04-13
(50,)
08: Getting date 2024-04-20
(50,)
08: Getting date 2024-04-27
(50,)
36: Getting da

ValueError: Google Trends data failed for 56 from 2023-08-23 to 2023-10-11.

Google Trends did not work for Wyoming --- might be a rate limit issue. Going to move on for now with the other locations.