# DSE241 Exercise 8 Data Processing Notebook
The purpose of this notebook is to merge West_Nile_Virus_by_County.json with california-counties.geojson

california-counties.geojson downloaded from https://github.com/codeforamerica/click_that_hood/blob/master/public/data/california-counties.geojson

In [1]:
import json
import copy
import pandas as pd

## Check csv to see range of years/weeks

In [2]:
wnf_df = pd.read_csv('../data/West_Nile_Virus_by_County.csv')

In [3]:
print(wnf_df['Week_Reported'].min(), wnf_df['Week_Reported'].max())

9 53


In [4]:
print(wnf_df['Year'].min(), wnf_df['Year'].max())

2006 2015


In [5]:
# print number of total cases by county
wnf_df.groupby('County')['Positive_Cases'].sum()

County
Alameda               6
Amador                1
Butte               170
Calaveras             1
Colusa               15
Contra Costa         42
El Dorado             6
Fresno              157
Glenn                68
Imperial              7
Kern                315
Kings                23
Lake                  6
Los Angeles        1112
Madera               25
Marin                 4
Mendocino             4
Merced               30
Modoc                 2
Mono                  1
Monterey              2
Napa                  3
Nevada                3
Orange              500
Placer               47
Riverside           286
Sacramento          123
San Bernardino      170
San Diego           108
San Francisco         3
San Joaquin          83
San Luis Obispo       1
Santa Barbara         3
Santa Clara          31
Santa Cruz            1
Shasta               21
Siskiyou              2
Solano               17
Sonoma                1
Stanislaus          174
Sutter               43
Tehama   

## Read json files

In [7]:
with open('../data/West_Nile_Virus_by_County.json') as wnv_json:  
    wnv_list = json.load(wnv_json)

with open('../data/california-counties.geojson') as counties_json:  
    counties_dict = json.load(counties_json)

### Convert wnv_list to dictionary

In [8]:
# create template dict to use to fill in missing counties/years
template_reports_dict = {}
template_reports_dict['Total'] = 0
template_reports_dict['By_Year'] = {}

for year in range(2006,2016):
    template_reports_dict['By_Year'][year] = {}
    template_reports_dict['By_Year'][year]['Yearly_Total'] = 0
    template_reports_dict['By_Year'][year]['By_Week'] = [0]*53

In [9]:
wnv_dict = {}
for elem in wnv_list:
    temp_county = elem['County']
    temp_wk_rpt = elem['Week_Reported']
    temp_year = elem['Year']
    temp_cases = elem['Positive_Cases']
    
    if temp_county not in wnv_dict.keys():        
        # add county to dict
        wnv_dict[temp_county] = {}
        wnv_dict[temp_county]['Total'] = 0

        # copy template report dict to ensure all years are included
        wnv_dict[temp_county]['By_Year'] = copy.deepcopy(template_reports_dict['By_Year'])

    # add reports for county and year
    wnv_dict[temp_county]['By_Year'][temp_year]['By_Week'][temp_wk_rpt-1] = temp_cases
    wnv_dict[temp_county]['By_Year'][temp_year]['Yearly_Total'] += temp_cases
    wnv_dict[temp_county]['Total'] += temp_cases

In [10]:
wnv_dict['Alameda']['Total']

6

### Add WNV reports for matching counties to 'properties' in county_dict

In [11]:
for elem in counties_dict['features']:
    temp_county = elem['properties']['name']
    if temp_county in wnv_dict.keys():
        temp_county_reports = wnv_dict[temp_county]
        elem['properties']['reports'] = temp_county_reports
    else:
        elem['properties']['reports'] = template_reports_dict

In [13]:
# dump modified geojson to file
with open('../data/WNV_County.geojson', 'w', encoding='utf-8') as f:
    json.dump(counties_dict, f)