# Charting the effects of Covid-19:
***
## Key Questions:
1. Correlation between Covid & S&P.
2. Correlation between covid & unemployment.
3. States that were affected the most.
4. Industries that were affected the most* ## Where do you get data: 
***
### What can be visualized through geocoding tools?
### and how do you represent the data 
We found data through CSV and API at https://covidtracking.com, and got to work finding ways to plot it. I tried four different tools before I was able to get a "working" map up and running to share with my team.

My first attempt was with Cesiumpy and CesiumJS, I had used them before and felt it might be easier to just go with what I was familiar with and could get going in the fastest time... Cesiumpy looked really promising because it was built in python and seemed to fit right in Jupyter notebooks or lab, see link for details [https://cesiumpy.readthedocs.io/en/latest/basics.html#display-cesium-widget ] but after multiple days and countless attempts I only ever got a basic html map to render with no data and had to look for a "working" solution. CesiumJS, this was still cesium just better right... more days wasted and still no map, I started to go down the road of a local install, but realized my team wouldn't be able to easily use it, so I thought "Docker images" might work, and wasted another day installing prototype Windows 10, just to find that the Docker images available were also too much of a time investment because they too didn't work...

Okay back to the drawing board.... I needed something easy, reliable, and integrated with our IDE (Jupyter)... Geopandas and ileaftlet, but just as I started to look into these, I found Folium... and it was like Magic! Within a few hours I was able to not only get a map up but also have some content plotting in a heat-map... YES!!! it wasn't all easy, and I still took several development roads that lead to broken code... but Folium is where it's at! they even have notebook examples you can download and try out...
* ## Folium [https://pypi.org/project/folium/]
* Examples: https://nbviewer.jupyter.org/github/python-visualization/folium/tree/master/examples/       
 

## Load the libraries and read in data

In [1]:
import folium
import folium.plugins as plugins
from folium.plugins import HeatMap
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
#import geopandas
import random

In [2]:
## Start by reading in my data CSV and displaying the calumn headers
covid_dataframe = pd.read_csv('spread_hist_by_state.csv')
print(covid_dataframe.columns.tolist())

['date', 'state', 'positive', 'negative', 'pending', 'hospitalizedCurrently', 'hospitalizedCumulative', 'inIcuCurrently', 'inIcuCumulative', 'onVentilatorCurrently', 'onVentilatorCumulative', 'recovered', 'dataQualityGrade', 'lastUpdateEt', 'hash', 'dateChecked', 'death', 'hospitalized', 'total', 'totalTestResults', 'posNeg', 'fips', 'deathIncrease', 'hospitalizedIncrease', 'negativeIncrease', 'positiveIncrease', 'totalTestResultsIncrease']


In [3]:
covid_data = covid_dataframe[['date', 'state', 'death', 'positive', 'negative','recovered', 'dataQualityGrade', 'lastUpdateEt']].loc[covid_dataframe['dataQualityGrade'] != 'F']
covid_data.head(3)

state_geo_df = pd.read_csv('state_latlong.csv')
covid_df = pd.merge(covid_data, state_geo_df, how='left', left_on="state", right_on='state')

plot_df = covid_df[['latitude','longitude','death','date','positive', 'negative', 'state', 'recovered']].sort_values(by='date', axis=0, ascending=True).fillna(0)
# print any outlayers with blank coordinates
#print(plot_df.loc[plot_df['latitude'] == 0]['state'].unique())
plot_df.head(2)

Unnamed: 0,latitude,longitude,death,date,positive,negative,state,recovered
3913,47.751074,-120.740139,0.0,20200122,1.0,0.0,WA,0.0
3912,47.751074,-120.740139,0.0,20200123,1.0,0.0,WA,0.0


## Set up the map layers

In [4]:
# sets up a dictionary to organize elements related to map layers 
layer_types = [{'title' : 'Covid : Tested Positive',
                'status' : 'positive',
                'show' : True,
                'plot_data' : [],
                'gr_val' : [.2,.9,.3],
                'gr_clr' : ['red','orange','red'],
                'rad' : 1.5,
                'fps' : 5
               },
               {'title' : 'Covid : Tested Negative',
                'status' : 'negative',
                'show' : False,
                'plot_data' : [],
                'gr_val' : [.9,.3,.3],
                'gr_clr' : ['green','yellow','lime'],
                'rad' : 1,
                'fps' : 5
               },
               {'title' : 'Covid : Recovered',
                'status' : 'recovered',
                'show' : False,
                'plot_data' : [],
                'gr_val' : [.5,.5,.5],
                'gr_clr' : ['blue','blue','blue'],
                'rad' : 1,
                'fps' : 5
               },
               {'title' : 'Covid : Deaths',
                'status' : 'death',
                'show' : False,
                'plot_data' : [],
                'gr_val' : [1,1,0],
                'gr_clr' : ['rgb(0,0,0)','rgb(0,0,0)','rgb(255,0,0)'],
                'rad' : .5,
                'fps' : 5
               }]

## Process dataframes into lists the hardway (the stupid way)

In [5]:
date_indx = []

for date_stp in plot_df['date'].unique():
    
    # set up the data index by splitting the date into its parts and using datatime function to reassemble it into a datatiem object *(not really needed just yet)
    dt = str(date_stp)
    y, m, d = int(dt[:4]), int(dt[4:6]), int(dt[6:])
    date_indx.append(datetime(y,m,d).strftime("%Y-%m-%d"))  
    
    # use loc to pull all data for each day to match the index across the four layers  
    heat_df = plot_df.loc[plot_df['date'] == date_stp].dropna(axis=0, subset=['latitude', 'longitude', 'death', 'positive', 'negative', 'recovered', 'date'])

    # setup temp lists to hold data for each day to be added to the layer_types dict afterward
    heat_data_pos_date, heat_data_neg_date, heat_data_rec_date, heat_data_dth_date = [], [], [], []
    
    #loop over the heat_df dataframe using tuples to access the rows
    #  (there is cleaner way to load list right into Folium, but errors keep getting in the way, so brut force is the fallback)
    for row in heat_df.itertuples():
        pos_magna = int(row.positive)
        neg_magna = int(row.negative)
        rec_magna = int(row.recovered)
        dth_magna = int(row.death)

        if pos_magna > 0:
            heat_point = [row.latitude * random.uniform(1.001,1.01), row.longitude * random.uniform(1.001,1.01), 1]
            heat_data_pos_date.append(heat_point)
        if neg_magna > 0:
            heat_point = [row.latitude, row.longitude, 1]
            heat_data_neg_date.append(heat_point) 
        if rec_magna > 0:
            heat_point = [row.latitude, row.longitude, 1]
            heat_data_rec_date.append(heat_point)
        if dth_magna > 0:
            heat_point = [row.latitude, row.longitude, 1]
            heat_data_dth_date.append(heat_point)

    #print(f'heat_data_pos_date [{heat_data_pos_date}]')
    
    # push data lists into the layer_types dict
    layer_types[0]['plot_data'].append(heat_data_pos_date)
    layer_types[1]['plot_data'].append(heat_data_neg_date)
    layer_types[2]['plot_data'].append(heat_data_rec_date)
    layer_types[3]['plot_data'].append(heat_data_dth_date)

# Print below to ensure the index lenth matches the data list lenth     
#print(f'layer_types[0]["plot_data"] [{len(layer_types[0]["plot_data"])}] date_indx [{len(date_indx)}]')
#print(f'layer_types[0]["plot_data"] \n{"--"* 40}\n[{layer_types[0]["plot_data"]}]')

## Build the map

In [6]:
main_map = folium.Map(
    [plot_df['latitude'].mean(),plot_df['longitude'].mean()],
    tiles='stamentoner',
    zoom_start=4,
    max_opacity=0.9,
    blur=4
)

layer_control = folium.LayerControl(collapsed=True)

# loop over layer_types dict to build each map layer
for layer_type in layer_types:

    # this holds 
    map_Feature_Group = folium.FeatureGroup(name=layer_type['title'], show=layer_type['show'])
    layer_control.add_child(map_Feature_Group)

    Heat_mapw_time = plugins.HeatMapWithTime(
        data = layer_type['plot_data'],
        index = date_indx,
        auto_play = True,
        use_local_extrema = True,
        radius = layer_type['rad'],
        min_opacity = 0.5,
        speed_step = 2,
        min_speed = layer_type['fps'],
        scale_radius = True,
        name = layer_type['title'],
        gradient={layer_type['gr_val'][0]: layer_type['gr_clr'][0],
                  layer_type['gr_val'][1]: layer_type['gr_clr'][1],
                  layer_type['gr_val'][2]: layer_type['gr_clr'][2]}
    )

    main_map.add_child(Heat_mapw_time)

# add the layer control
layer_control.add_to(main_map)

<folium.map.LayerControl at 0x1d2f0176448>

## Done!!!

In [7]:
# and.... display the map!!!
main_map

## Display help for key Folium features

In [8]:
help(plugins.HeatMapWithTime)
help(folium.LayerControl)

Help on class HeatMapWithTime in module folium.plugins.heat_map_withtime:

class HeatMapWithTime(folium.map.Layer)
 |  HeatMapWithTime(data, index=None, name=None, radius=15, min_opacity=0, max_opacity=0.6, scale_radius=False, gradient=None, use_local_extrema=False, auto_play=False, display_index=True, index_steps=1, min_speed=0.1, max_speed=10, speed_step=0.1, position='bottomleft', overlay=True, control=True, show=True)
 |  
 |  Create a HeatMapWithTime layer
 |  
 |  Parameters
 |  ----------
 |  data: list of list of points of the form [lat, lng] or [lat, lng, weight]
 |      The points you want to plot. The outer list corresponds to the various time
 |      steps in sequential order. (weight is in (0, 1] range and defaults to 1 if
 |      not specified for a point)
 |  index: Index giving the label (or timestamp) of the elements of data. Should have
 |      the same length as data, or is replaced by a simple count if not specified.
 |  name : string, default None
 |      The name 

In [None]:
industriesDF = pd.read_csv('merged_industries.csv')
industriesDF

## Here below is a broken version
### its a cleaner data entry method, but for some reason it doesn't display the layer... it also lacks code to split data and display multiple layers, but with more time to debug this is probably the way to go with Folium..

In [None]:

#heat_df[['latitude', 'longitude', 'death']].groupby(['latitude', 'longitude']).sum().reset_index().values.tolist(),
        
#heat_df = plot_df.dropna(axis=0, subset=['latitude', 'longitude', 'death', 'positive', 'negative', 'recovered', 'date'])

#heat_df['positive'] = heat_df['positive'].astype(int)
#heat_df['negative'] = heat_df['negative'].astype(int)
#heat_df['recovered'] = heat_df['recovered'].astype(int)

main_map = folium.Map(
    [plot_df['latitude'].mean(),plot_df['longitude'].mean()],
    tiles='stamentoner',
    zoom_start=4,
    max_opacity=0.9,
    blur=4
)

layer_control = folium.LayerControl(collapsed=True)

try:

    map_Feature_Group = folium.FeatureGroup(name='death', show=True)
    layer_control.add_child(map_Feature_Group)

    Heat_mapw_time = plugins.HeatMapWithTime(
        data = heat_df[['latitude', 'longitude', 'death']].groupby(['latitude', 'longitude']).sum().reset_index().values.tolist(),
        auto_play = True,
        use_local_extrema = True,
        radius = 3,
        min_opacity = 0.5,
        scale_radius = True,
        name = 'Covid : Deaths' 
    )
     #gradient={1 : 'rgb(0,0,0)', 1 : 'rgb(0,0,0)', 1 : 'rgb(0,0,0)'}
    main_map.add_child(Heat_mapw_time)

except Exception as e:
    print(f'An ERROR occured [{e}]')

    # add the layer control
layer_control.add_to(main_map)
    
main_map

In [None]:
'''


#g1 = folium.plugins.FeatureGroupSubGroup(mcg, 'g1') # First group, in mcg
#g2 = folium.plugins.FeatureGroupSubGroup(mcg, 'g2') # Second group, in mcg
#m.add_child(mcg)
#m.add_child(g1)
#m.add_child(g2)
#g1.add_child(folium.Marker([0,0]))
#g2.add_child(folium.Marker([0,1]))
#folium.LayerControl().add_to(m)



#  zoom_start=6
params = {
    'location':[plot_df['latitude'].mean(),plot_df['longitude'].mean()],
    'tiles':'stamentoner',
    'min_lat':plot_df['latitude'].min(),
    'max_lat':plot_df['latitude'].max(),
    'min_lon':plot_df['longitude'].min(),
    'max_lon':plot_df['longitude'].max()
}
#        min_lat=plot_df['latitude'].min(),
#        max_lat=plot_df['latitude'].max(),
#        min_lon=plot_df['longitude'].min(),
#        max_lon=plot_df['longitude'].max(),
covid_map = folium.Map(
        [plot_df['latitude'].mean(),plot_df['longitude'].mean()],
        tiles='stamentoner',
        zoom_start=5
    )


#heat_df = plot_df.dropna(axis=0, subset=['latitude','longitude'])
#heat_data = [[row['latitude'],row['longitude']] for index, row in heat_df.iterrows()]

heat_df = plot_df.loc[plot_df['date'] == 20200315].dropna(axis=0, subset=['latitude','longitude', 'total'])
print(f'heat_df {len(heat_df)}')
#heat_df['weight'] = heat_df['total'].astype(float)
#heat_data = [[[row['latitude'],row['longitude']] for index, row in heat_df[heat_df['weight'] == i].iterrows()] for i in range(0,plot_df['total'].max())]

#heat_time = list(heat_df.dropna(axis=0, subset=['date']))
heat_time =  [[row['date']] for index, row in heat_df.iterrows()]
#heat_time =  [[index] for index, row in heat_df.iterrows()]

#heat_time = plot_df[])#.toList()
heat_data = []
for row in heat_df.itertuples():
    #print(row.total)
    heat_data.append([row.latitude,row.longitude, row.total])# for i in range(0, row.total)]
#    [heat_data.append([row.latitude,row.longitude, row.total]) for i in range(0, row.total)]
#heat_data = [[row['latitude'],row['longitude'],row['total']] for index, row in heat_df.iterrows()]
len(heat_data)

                
#     for index, row in heat_df.iterrows()]
print(heat_data[:3])

HeatMap(heat_data).add_to(covid_map)
covid_map'''