In [2]:
import pandas as pd 
import geopandas as gpd

from datetime import datetime as dt

import sys
sys.path.append("../../")
import create_plots

In [3]:
# read in data
ca = gpd.read_file("../county_level_shp/county_level.shp")
data = pd.read_csv("../weighted_averages.csv")

data['date_range_start'] = [dt.strptime(date,"%Y-%m-%dT%H:%M:%SZ").date() for date in data['date_range_start']]

In [3]:
data.head()

Unnamed: 0,origin_census_block_group,county_fips,date_range_start,weighted_average
0,60014075002,6001,2019-12-21,8261.455224
1,60190057022,6019,2019-12-21,6267.438356
2,60210101002,6021,2019-12-21,19282.191489
3,60290060071,6029,2019-12-21,18713.533333
4,60310016011,6031,2019-12-21,17818.441558


### Group Data

I added a group function, you can use it for grouping by day but it doesn't do much, only really useful for grouping by week.

In [4]:
grouped = create_plots.group_data(
    data=data,
    cols="weighted_average", 
    date_col="date_range_start", 
    group_col="county_fips",
    by='week'
)

The point is that grouped is a grouped dataframe that we can iterate over.

# Heat Map

### Export Function

I broke down the map making process into two parts:

1. Calculate and export the needed data for plotting.
2. Pass in that data into one of the plotting functions.

I did this is if you want to do multiple plots with the same data you don't need to recompute the values, you can also easily export the data to a file.

In [5]:
heat_export, col_order = create_plots.export_heat_vals(
    grouped = grouped,
    cols = "weighted_average", # list of column names to compute
    date_col = "date_range_start", # We use the dates to for plot title
    group_col = "county_fips", # column name of data to join on
    map = ca,
    map_group_col = "fips" # column name of map to join on
)

100%|██████████| 48/48 [00:50<00:00,  1.06s/it]


The first column is a list of quadrants for each fip code for the stated time period. The last column is the global Moran's I estimate and p-value.

In [6]:
heat_export.head()

Unnamed: 0,weighted_average,date,weighted_average_g
0,"[0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, ...","[2019-12-21, 2019-12-22]","(0.13468559156922646, 0.031499877738438875)"
1,"[0, 0, 0, 0, 0, 3, 0, 3, 3, 0, 0, 0, 0, 0, 0, ...","[2019-12-23, 2019-12-29]","(0.20524491736058084, 0.0032548170939247223)"
2,"[0, 0, 0, 0, 3, 3, 0, 3, 3, 0, 0, 0, 0, 0, 0, ...","[2019-12-30, 2020-01-05]","(0.22906529411641913, 0.001298253955306805)"
3,"[0, 0, 0, 0, 3, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, ...","[2020-01-06, 2020-01-12]","(0.29756183290336663, 5.944184606965219e-05)"
4,"[0, 0, 0, 1, 3, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, ...","[2020-01-13, 2020-01-19]","(0.2818071016210822, 0.00012808935137975652)"


## Stacked Heat Map

### Change by Color

Each iteration, the colors get darker.

In [None]:
create_plots.create_heat_map_from_export(
    export = heat_export, 
    cols = "weighted_average",
    date_col = "date", # this changed to date
    map = ca,
    folder = "stacked_heat_map", # output folder
    color_change="color",
    stack=True
)

### Change by Opacity

Each iteration, the color stays the same, but the opacity gets darker

In [None]:
create_plots.create_heat_map_from_export(
    export = heat_export, 
    cols = "weighted_average",
    date_col = "date",
    map = ca,
    folder = "stacked_heat_map2",
    color_change="opacity", # notice the change
    stack=True,
    incl_dd = True
)

We can do this on all plots, but notice that on the last one, we set `incl_dd` to `True` to include the hot-cold and cold-hot areas as well.

## Not Stacked Heat Map

In [None]:
create_plots.create_heat_map_from_export(
    export = heat_export, 
    cols = "weighted_average",
    date_col = "date",
    map = ca,
    folder = "heat_map",
    stack=False,
    incl_dd = True
)

# Quantile Map

### Export

In [7]:
quantile_export, col_order = create_plots.export_quantile_vals(
    grouped = grouped,
    cols = "weighted_average",
    date_col = "date_range_start",
    group_col = "county_fips",
    map = ca,
    map_group_col="fips"
)

100%|██████████| 48/48 [00:00<00:00, 182.09it/s]


This returns a dataframe of the quantile, the date range, and the upper bound of the bin.

In [8]:
quantile_export.head()

Unnamed: 0,weighted_average,date,weighted_average_bin
0,"[2, 1, 6, 8, 1, 0, 2, 2, 2, 3, 5, 5, 8, 8, 7, ...","[2019-12-21, 2019-12-22]","[10014.667548587548, 10668.506300414958, 11231..."
1,"[8, 1, 6, 7, 2, 1, 3, 1, 3, 4, 8, 7, 9, 8, 5, ...","[2019-12-23, 2019-12-29]","[10955.019153078892, 11544.123853821304, 12268..."
2,"[9, 1, 5, 8, 1, 0, 2, 2, 2, 5, 2, 7, 9, 8, 6, ...","[2019-12-30, 2020-01-05]","[10924.745072884538, 11712.356882070419, 12179..."
3,"[9, 1, 1, 9, 2, 0, 2, 3, 2, 5, 0, 4, 9, 9, 6, ...","[2020-01-06, 2020-01-12]","[10278.686108395563, 10856.43617489041, 11147...."
4,"[8, 2, 1, 9, 2, 0, 3, 1, 2, 6, 0, 6, 9, 8, 5, ...","[2020-01-13, 2020-01-19]","[10726.237431739717, 11123.429976775093, 11637..."


In [None]:
create_plots.create_quantile_map_from_export(
    export=quantile_export,
    cols="weighted_average",
    date_col="date",
    map=ca,
    folder="quantile"
)

For all of these plots, I've also added a `limit` argument if you only want to output a few plots.