In [None]:
# Go to https://www.aphis.usda.gov/livestock-poultry-disease/avian/avian-influenza/hpai-detections/commercial-backyard-flocks
# and download. Change name to outbreaks.csv and move to assets/
# NOTE - sometimes csv uses tab instead of comma.  Open in excel and save as proper csv.

import csv
import json

def load_csv(csv_file):
    csv_data = []
    with open(csv_file, 'r') as file:
        csv_reader = csv.reader(file, dialect=csv.excel)
        for row in csv_reader:
            print(row)
            csv_data.append(row)
    return csv_data

In [None]:
data_list = load_csv("./src/assets/outbreaks.csv")
# labels are spread over the first two rows - the top row just says "Control Area Released" a bunch of times
# the second row is where it has location and dates
data_labels = data_list[1]
# and then starting on the 3rd row is where the data starts 
data_list = data_list[2:]

In [None]:
# convert US county location to lat/long
import json

# load JSON file with lat/long for US counties
with open('./src/assets/counties.json', 'r') as file:
    geoCountyData = json.load(file)
    
# create a dict with [state][county] = [lat, long]    
locationDict = {}

for line in geoCountyData:
    if line['state'] not in locationDict.keys():
        locationDict[line['state']] = {}
    locationDict[line['state']][line['county'].upper()] = [line['lat'], line['lon']]


In [None]:
import datetime

months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
def stringToDate(dateText):
    parts = dateText.split('-')
    if len(parts) != 3:
        return dateText
    year = int('20'+parts[2])
    month = 1 + months.index(parts[1])
    dt = datetime.date(year, month, int(parts[0]))
    return dt.strftime("%Y-%m-%d")
    

In [None]:

def process_line(data_line):
    # initialize output line as dict with data from the csv file
    output_line = {}
    for i in range(len(data_line)):
        #if data_line[i] != '':
        output_line[data_labels[i]] = data_line[i]
    # improve formatting for some types
    output_line['Confirmed'] = stringToDate(output_line['Confirmed'])
    del output_line['Special Id']
    for date_label in data_labels[5:]:
        if output_line[date_label] != '':
            output_line['EndDate'] = stringToDate(date_label)
            output_line['NumInfected']= output_line[date_label]
        del output_line[date_label]
    # convert location to lat long
    state = output_line['State'].upper() 
    county = output_line['County Name'].upper() 
    output_line['GeoLoc'] = locationDict[state][county]
    return output_line

In [None]:
# convert csv data to json data
output_list = []
for line in data_list:
    output_list.append(process_line(line))

In [None]:
# output json data
keys = list(output_list[0].keys())
with open("./src/assets/outbreaks.json", "w") as outfile:
    outfile.write('[\n')
    for i in range(len(output_list)):
        line = output_list[i]
        outfile.write('  {\n')
        for key in keys:
            outfile.write('    "'+key+'": ')
            outfile.write(json.dumps(line[key]))
            if key != keys[-1]:
                outfile.write(',')
            outfile.write('\n')
        if i == len(output_list)-1:
            outfile.write('\t}\n')
        else:
            outfile.write('\t},\n')

    outfile.write(']')