In [1]:
# Import packages
import pandas as pd
import numpy as np
import datetime as dt
from sklearn import datasets, linear_model
from numpy import inf
np.seterr(divide = 'ignore') 

NORCAL_COUNTIES = ['Alameda', 'Contra Costa', 'Fresno', 'San Francisco', 'Merced', 'San Mateo', 'Santa Clara', 'Santa Cruz', 'Tulare']
SOCAL_COUNTIES = ['Imperial', 'Kern', 'Los Angeles', 'Orange', 'Riverside', 'San Bernardino', 'San Diego', 'Santa Barbara', 'San Luis Obispo', 'Ventura']

lin_regr = linear_model.LinearRegression()

In [2]:
# Read in parsed CSV and select counties to generate regressions
df = pd.read_csv('../parsed-csvs/covid-19-cases-deaths.csv')
df['Lin Regr Deaths'] = df['Deaths'];
df['Log Regr Deaths'] = df['Deaths'];
df['Exp Regr Deaths'] = df['Deaths'];

counties = []
counties.append(NORCAL_COUNTIES)
counties.append(SOCAL_COUNTIES)

In [3]:
# Model linear, logarithmic, and exponential regressions
for region in counties:
    for county in region:
        # Get the 10 most recent weeks and prep data
        county_df = df[df['County Name'].values == county].tail(10)
        county_df[county_df['Deaths'].values < 0] = 0
        deaths = np.array(county_df['Deaths'].values).astype('int64')
        weeks = np.array(county_df['Week'].values).astype('int64')
        latest_date = county_df['Start of Week Date'].iat[-1]
        latest_week = county_df['Week'].iat[-1]
        weeks_to_pred = np.arange(10) + 1 + latest_week
        
        # Model linear regression equation and predict future weeks
        lin_regr.fit(weeks.reshape(len(weeks), 1), deaths.reshape(len(deaths), 1))
        lin_regr_pred = lin_regr.predict(weeks_to_pred.reshape(len(weeks_to_pred), 1))
        
        # Model logarithmic regression equation
        log_regr_eq = np.polyfit(np.log(weeks), deaths, 1)
        log_A = log_regr_eq[0]; # Coefficient A for log eq
        log_B = log_regr_eq[1]; # Coefficient B for log eq
        
        # Model exponential regression equation
        log_deaths = np.log(deaths)
        log_deaths[log_deaths == -inf] = 0
        exp_regr_eq = np.polyfit(weeks, log_deaths, 1)
        exp_A = exp_regr_eq[0]; # Coefficient A for exp eq
        exp_B = exp_regr_eq[1]; # Coefficient B for exp eq
        
        # Predict future weeks and add to predictions df
        predictions = []
        for index in range (0, 10): 
            log_regr_pred = log_B + log_A * np.log(weeks_to_pred[index])
            exp_regr_pred = np.exp(exp_B) * np.exp(exp_A * weeks_to_pred[index])
            
            # Append to predictions. Match the entries to the columns of df
            new_date = pd.to_datetime(latest_date) + dt.timedelta(days=((index + 1) * 7))
            predictions.append([new_date, latest_week + index, county, 
                                -1, -1, lin_regr_pred[index][0], log_regr_pred, exp_regr_pred])
        
        # Append predictions to df
        df = df.append(pd.DataFrame(predictions, columns=['Start of Week Date', 'Week', 'County Name', 
            'Cases', 'Deaths', 'Lin Regr Deaths', 'Log Regr Deaths', 'Exp Regr Deaths']), sort=True)

In [4]:
# Write df with regressions to separate CSVs based on counties
df = df[['Start of Week Date', 'Week', 'County Name', 'Cases', 'Deaths', 
         'Lin Regr Deaths', 'Log Regr Deaths', 'Exp Regr Deaths']]
df['Deaths'] = df['Deaths'].round().astype(int)
df['Lin Regr Deaths'] = df['Lin Regr Deaths'].round().astype(int)
df['Log Regr Deaths'] = df['Log Regr Deaths'].round().astype(int)
df['Exp Regr Deaths'] = df['Exp Regr Deaths'].round().astype(int)
df['Start of Week Date'] = pd.to_datetime(df['Start of Week Date'])
df['Start of Week Date'] = df['Start of Week Date'].dt.month.astype(str) + '/' + df['Start of Week Date'].dt.day.astype(str)
df.sort_values(['County Name', 'Week'], ascending=True, inplace=True)

norcal_df = df.loc[np.isin(df['County Name'], NORCAL_COUNTIES)]
socal_df = df.loc[np.isin(df['County Name'], SOCAL_COUNTIES)]

norcal_df.to_csv('../parsed-csvs/covid-19-pred-norcal-deaths.csv')
socal_df.to_csv('../parsed-csvs/covid-19-pred-socal-deaths.csv')