In [11]:
import pandas as pd
import numpy as np
import os
import datetime

In [12]:
# hashtable with month and number of days in the month
maxMonth = {1:31, 2:29, 3:31, 4:30, 5:31, 6:30, 7:31, 8:31, 9:30, 10:31, 11:30, 12:31}

# gets next day, needed because the current day could be the last of the month
def next_day(current_day):
    # assumes that everything is in 2020
    if current_day.day < maxMonth[current_day.month]:
        return datetime.datetime(2020, current_day.month, current_day.day + 1)
    else:
        return datetime.datetime(2020, current_day.month + 1, 1)

# we want formatting in the form 2020-04-01, with 0s before months, days < 10
def formatter(numb):
    if numb < 10:
        return "0" + str(numb)
    else:
        return str(numb)

In [13]:
def format_submission(dates, death_errors, start, fips):
    dates = dates.tolist()
    
    # swap columns and rows for death_errors
    death_errors = np.array(death_errors)
    death_errors = death_errors.T
    death_errors = death_errors.tolist()
    
    # adding to dates so lengths match up
    final_date = dates[-1]
    while len(dates) < len(death_errors):
        dates.append(next_day(final_date))
        final_date = dates[-1]
    
    #print(dates)
    #print(death_errors)
    
    # trim both lists so they begin with date represented by start
    # assumes the lists begin originally at the same place
    start_index = -1
    for i in range(0, len(dates)):
        current_day = dates[i]
        if current_day.month == start.month and current_day.day == start.day:
            start_index = i
            break
    if start_index == -1: # start doesn't exist in dates
        raise Exception("start date not present")
        
    dates = dates[start_index:]
    death_errors = death_errors[start_index:]
    
    # convert dates from datetime to string, add fips code
    for i in range(len(dates)):
        day = dates[i]
        day_format = '{year}-{month}-{day}-{fips}'.format(year = day.year,
                                                          month = formatter(day.month), 
                                                          day = formatter(day.day), 
                                                          fips = fips)
        dates[i] = day_format
        if i < len(death_errors):
            death_errors[i].insert(0, dates[i])
        
    return death_errors

In [14]:
import git
import sys
repo = git.Repo("./", search_parent_directories=True)
homedir = repo.working_dir
sys.path.insert(1, f"{homedir}" + '/models/data_processing')
import loader

us = loader.load_data("/models/epidemiological/us_training_data.csv")
fips = 36061
county_data = loader.query(us, "fips", fips)
start = datetime.datetime(2020, 4, 1)
dates = pd.to_datetime(county_data["date"].values)
#death_errors = np.array([[1,2,3,4,5,6,7,8,9] for i in range(100)])
death_errors = np.array([[i] * 100 for i in range(10)])

print(format_submission(dates, death_errors, start, fips))

31
[['2020-04-01-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-02-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-03-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-04-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-05-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-06-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-07-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-08-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-09-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-10-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-11-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-12-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-13-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-14-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-15-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-16-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-17-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-18-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-04-19-36061', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], ['2020-0