In [7]:
import pandas as pd
import numpy as np
from collections import OrderedDict
import datetime

In [8]:
def trend_break(df, stop_date):
    counties = df['Unnamed: 3']
    date_lst = [i for i in df.iloc[:,5:] if 'Unnamed' not in i]
    tstpos_lst = [i for i in df.iloc[:,5:] if df[i][0][0:6] == 'tstpos']
    mort_lst = [i for i in df.iloc[:,5:] if df[i][0][0:4] == 'mort']
    dic = {}
    for i in range(len(tstpos_lst)):
        if i == 0:
            continue
        date = date_lst[i]  

        if date == stop_date:
            break
        curr_c = df[tstpos_lst[i]]
        prev_c = df[tstpos_lst[i-1]]
        curr_c = curr_c[1:].fillna(0).replace(',','', regex=True).apply(int)
        prev_c = prev_c[1:].fillna(0).replace(',','', regex=True).apply(int)
        diff_c = curr_c - prev_c
        neg_idx_c = np.where(diff_c < 0)[0]
        # neg_idx_c = diff_c.index[diff_c < 0]
        tb_counties_c = counties[neg_idx_c]
        
        
        curr_d = df[mort_lst[i]]
        prev_d = df[mort_lst[i-1]]
        curr_d = curr_d[1:].fillna(0).replace(',','', regex=True).apply(int)
        prev_d = prev_d[1:].fillna(0).replace(',','', regex=True).apply(int)
        diff_d = curr_d - prev_d
        neg_idx_d = np.where(diff_d < 0)[0]
        # neg_idx_d = diff_d.index[diff_d < 0]
        tb_counties_d = counties[neg_idx_d]

        state_cases = tb_counties_c[tb_counties_c.str.contains(',')].str.split(',').apply(lambda x: x[-1])
        state_deaths = tb_counties_d[tb_counties_d.str.contains(',')].str.split(',').apply(lambda x: x[-1])

        # replace with tb_counties_c, tb_counties_d to get the specific counties
        exceptions = ['Georgia','Indiana','Michigan','Connecticut','North Dakota','Virginia','Massachusetts','Maine','Texas','Vermont',
            'Tennessee','Pennsylvania','California','Puerto Rico','Minnesota','Washington','Wisconsin','Illinois','Ohio']
        out_pre = pd.concat([state_cases, state_deaths]) 
        out = []
        for i in out_pre:
            if i.strip() not in exceptions:
                out.append(i.strip())
        if len(out) > 0:
            dic[date] = set(out)
     
    return dic

In [9]:
# insert all sheet urls here
url_lst = [
'https://docs.google.com/spreadsheets/d/1R8K20hVSqcLkn8vgvq4cVflG1UGxn0d_tEb_R15-klM/edit#gid=1642263273',
'https://docs.google.com/spreadsheets/d/1PuphWZHl6TM6dTEVeNnmDGF6c2_QXqwVl1wMZtBrPHE/edit#gid=311031188',
'https://docs.google.com/spreadsheets/d/18IldxlzQeP-BdvEJZ6T5OTFdbdFoWd-sOKMiNnojsrI/edit#gid=1642263273'
]

regions = {'midwest': ['Nebraska', 'Iowa', 'Kentucky', 'Kansas', 'South Dakota'],
                    'mountains': ['Arizona', 'Idaho', 'Montana', 'New Mexico', 'Wyoming'],
                    'prairie_mrv': ['Oklahoma', 'Louisiana', 'Arkansas', 'Mississippi', 'Missouri'],
                    'northeast': ['Delaware', 'Maryland', 'New Hampshire', 'New York', 'Rhode Island', 'Washington', 'New Jersey'],
                    'southeast': ['Alabama', 'Florida', 'North Carolina', 'South Carolina', 'West Virginia'],
                    'w_pacific' : ['Nevada', 'Utah', 'Oregon', 'Alaska', 'Hawaii']
                    }

all_regions = []
for i in url_lst:
    sheet_url = i
    url_1 = sheet_url.replace('/edit#gid=', '/export?format=csv&gid=')
    df = pd.read_csv(url_1)

    # insert date after the day you want to stop checking for trend breaks
    all_regions.append(trend_break(df, '10/16/2022'))

In [10]:
midwest = {}
mountains = {}
prairie_mrv = {}
northeast = {}
southeast = {}
w_pacific = {}

for sheet in all_regions:
    for date in sheet:
        for state in sheet[date]:
            for region in regions:
                if state in regions[region]:
                    curr = eval(region)
                    if date not in curr.keys():
                        curr[date] = [state]
                    else:
                        curr[date] += [state]

In [11]:
midwest = dict(OrderedDict(sorted(midwest.items(), key=lambda t: datetime.datetime.strptime(t[0], '%m/%d/%Y').date())))
midwest

{'11/23/2022': ['South Dakota', 'Kansas'],
 '11/28/2022': ['Nebraska'],
 '11/30/2022': ['South Dakota', 'Kentucky']}

In [12]:
mountains = dict(OrderedDict(sorted(mountains.items(), key=lambda t: datetime.datetime.strptime(t[0], '%m/%d/%Y').date())))
mountains

{'11/22/2022': ['Wyoming'],
 '11/23/2022': ['New Mexico'],
 '11/25/2022': ['Montana'],
 '11/30/2022': ['New Mexico', 'Arizona']}

In [13]:
prairie_mrv = dict(OrderedDict(sorted(prairie_mrv.items(), key=lambda t: datetime.datetime.strptime(t[0], '%m/%d/%Y').date())))
prairie_mrv

{'11/24/2022': ['Arkansas', 'Oklahoma'],
 '11/25/2022': ['Arkansas'],
 '11/27/2022': ['Arkansas'],
 '11/28/2022': ['Missouri'],
 '11/29/2022': ['Oklahoma'],
 '11/30/2022': ['Arkansas', 'Louisiana'],
 '12/3/2022': ['Arkansas', 'Missouri']}

In [14]:
northeast = dict(OrderedDict(sorted(northeast.items(), key=lambda t: datetime.datetime.strptime(t[0], '%m/%d/%Y').date())))
northeast

{'11/21/2022': ['New Hampshire', 'New Jersey'],
 '11/23/2022': ['New Jersey', 'Maryland'],
 '11/29/2022': ['New Jersey'],
 '11/30/2022': ['New Hampshire', 'Maryland'],
 '12/1/2022': ['New Jersey'],
 '12/2/2022': ['New Hampshire', 'New Jersey', 'Maryland']}

In [15]:
southeast = dict(OrderedDict(sorted(southeast.items(), key=lambda t: datetime.datetime.strptime(t[0], '%m/%d/%Y').date())))
southeast

{'11/22/2022': ['West Virginia'],
 '11/23/2022': ['West Virginia'],
 '11/27/2022': ['West Virginia'],
 '11/30/2022': ['North Carolina']}

In [16]:
w_pacific = dict(OrderedDict(sorted(w_pacific.items(), key=lambda t: datetime.datetime.strptime(t[0], '%m/%d/%Y').date())))
w_pacific

{'11/21/2022': ['Utah'],
 '11/22/2022': ['Alaska'],
 '11/29/2022': ['Alaska'],
 '12/1/2022': ['Utah']}