In [16]:
#import the zip code boundaries using the geojson from https://data.cityofchicago.org/
import gmaps
from config import gkey
gmaps.configure(api_key=gkey)

In [17]:
# Dependencies
import pandas as pd

In [18]:
# Store filepath in a variable
census_data = "chicago_census_data.csv"
vaccine_data = "COVID-19_Vaccinations_by_ZIP_Code.csv"

In [19]:
census_df = pd.read_csv(census_data, encoding="ISO-8859-1")
vaccine_df = pd.read_csv(vaccine_data,encoding="ISO-8859-1")

In [20]:
#cleaning census so Zip Code column matches Vaccinee CSV Zip Code
census_df = census_df.rename(columns={"Zipcode":"Zip Code"})
census_df.dtypes

Zip Code               int64
Population             int64
Median Age           float64
Household Income       int64
Per Capita Income      int64
Poverty Count          int64
Poverty Rate         float64
dtype: object

In [21]:
#getting rid of 'Unknown' values in vaccine df
vaccine_df = vaccine_df[vaccine_df["Zip Code"] != "Unknown"]
#changing Zip Code from Object to int
vaccine_df["Zip Code"]=vaccine_df["Zip Code"].astype(int)

In [22]:
#merging Data
merge_df = pd.merge(vaccine_df, census_df, on="Zip Code")
merge_df

Unnamed: 0,Zip Code,Date,Total Doses - Daily,Total Doses - Cumulative,1st Dose - Daily,1st Dose - Cumulative,1st Dose - Percent Population,Vaccine Series Completed - Daily,Vaccine Series Completed - Cumulative,Vaccine Series Completed - Percent Population,Population_x,ZIP Code Location,Row_ID,Population_y,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate
0,60603,12/15/2020,0,0,0,0,0.000,0,0,0.000,1052,POINT (-87.625473 41.880112),60603-20201215,1052,31.2,146250,134620,142,13.498099
1,60603,12/16/2020,0,0,0,0,0.000,0,0,0.000,1052,POINT (-87.625473 41.880112),60603-20201216,1052,31.2,146250,134620,142,13.498099
2,60603,12/17/2020,8,8,8,8,0.008,0,0,0.000,1052,POINT (-87.625473 41.880112),60603-20201217,1052,31.2,146250,134620,142,13.498099
3,60603,12/18/2020,6,14,6,14,0.013,0,0,0.000,1052,POINT (-87.625473 41.880112),60603-20201218,1052,31.2,146250,134620,142,13.498099
4,60603,12/19/2020,3,17,3,17,0.016,0,0,0.000,1052,POINT (-87.625473 41.880112),60603-20201219,1052,31.2,146250,134620,142,13.498099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5853,60656,02/09/2021,163,2789,130,2230,0.079,33,559,0.020,28218,POINT (-87.817934 41.974566),60656-20210209,28218,40.1,64440,36778,2129,7.544830
5854,60656,02/14/2021,42,3463,37,2768,0.098,5,695,0.025,28218,POINT (-87.817934 41.974566),60656-20210214,28218,40.1,64440,36778,2129,7.544830
5855,60656,12/27/2020,0,197,0,197,0.007,0,0,0.000,28218,POINT (-87.817934 41.974566),60656-20201227,28218,40.1,64440,36778,2129,7.544830
5856,60656,12/26/2020,3,197,3,197,0.007,0,0,0.000,28218,POINT (-87.817934 41.974566),60656-20201226,28218,40.1,64440,36778,2129,7.544830


In [23]:
test_df = merge_df.loc[merge_df["Population_x"] != merge_df["Population_y"]]
test_df

Unnamed: 0,Zip Code,Date,Total Doses - Daily,Total Doses - Cumulative,1st Dose - Daily,1st Dose - Cumulative,1st Dose - Percent Population,Vaccine Series Completed - Daily,Vaccine Series Completed - Cumulative,Vaccine Series Completed - Percent Population,Population_x,ZIP Code Location,Row_ID,Population_y,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate


In [24]:
final_df = merge_df.drop(columns=['Row_ID', 'Poverty Count', 'Poverty Rate', 'Household Income', 'Population_x', 'Vaccine Series Completed - Daily','Vaccine Series Completed - Cumulative','Vaccine Series Completed  - Percent Population'])
final_df

Unnamed: 0,Zip Code,Date,Total Doses - Daily,Total Doses - Cumulative,1st Dose - Daily,1st Dose - Cumulative,1st Dose - Percent Population,ZIP Code Location,Population_y,Median Age,Per Capita Income
0,60603,12/15/2020,0,0,0,0,0.000,POINT (-87.625473 41.880112),1052,31.2,134620
1,60603,12/16/2020,0,0,0,0,0.000,POINT (-87.625473 41.880112),1052,31.2,134620
2,60603,12/17/2020,8,8,8,8,0.008,POINT (-87.625473 41.880112),1052,31.2,134620
3,60603,12/18/2020,6,14,6,14,0.013,POINT (-87.625473 41.880112),1052,31.2,134620
4,60603,12/19/2020,3,17,3,17,0.016,POINT (-87.625473 41.880112),1052,31.2,134620
...,...,...,...,...,...,...,...,...,...,...,...
5853,60656,02/09/2021,163,2789,130,2230,0.079,POINT (-87.817934 41.974566),28218,40.1,36778
5854,60656,02/14/2021,42,3463,37,2768,0.098,POINT (-87.817934 41.974566),28218,40.1,36778
5855,60656,12/27/2020,0,197,0,197,0.007,POINT (-87.817934 41.974566),28218,40.1,36778
5856,60656,12/26/2020,3,197,3,197,0.007,POINT (-87.817934 41.974566),28218,40.1,36778


In [25]:
#create line graphs that compare % complete on last day for each zip code
#to % complete for Chicago using final df (create a new df with final day)
#save as a png lowest average and highest average for presentation
#(See Matplot Lib Day 2 Activity 7)- Eugene
# if time, line graph 

In [26]:
#create heat map of % complete - Sarah 
#save heatmap for presentation

# Calculate the final vaccine count of each zip code
#get a df with the selected columns
vaccine_heatmap_df = final_df[['Zip Code','Date','1st Dose - Daily','1st Dose - Cumulative','1st Dose - Percent Population','Population_y','ZIP Code Location']]

# Start by getting the last (greatest) timepoint for each zip code
groupby_df = pd.DataFrame({"Vaccine Max":vaccine_heatmap_df.groupby("Zip Code")["1st Dose - Cumulative"].max()})

# Merge this group df with the original dataframe to get the # of vaccines at the last timepoint
merged_df = pd.merge(vaccine_heatmap_df, groupby_df, how='outer', on='Zip Code')
greatest_vaccine_df = merged_df.loc[(merged_df["1st Dose - Cumulative"]==merged_df["Vaccine Max"])]
greatest_vaccine_df = greatest_vaccine_df[['Zip Code','Date','1st Dose - Cumulative','Population_y','1st Dose - Percent Population',
                                           'ZIP Code Location']]

#get lat/lng coordinates in a format that gmaps can use (float)
lng_lat_list = greatest_vaccine_df["ZIP Code Location"].tolist()
cleaned = []
for coord in lng_lat_list:
    x = coord.strip("POINT (").strip(")").replace(" ",",")
    cleaned.append(x)
lat = []
lng = []
for coord in cleaned:
    a,b = coord.split(',')
    lat.append(float(a))
    lng.append(float(b))
cleaned_df = pd.DataFrame({"Lat":lat,
                        "Lng":lng,
                       "ZIP Code Location":lng_lat_list})

#add cleaned lat/lng to greatest_vaccine_df
gmaps_merged_df = pd.merge(greatest_vaccine_df, cleaned_df, how='outer', on='ZIP Code Location')
gmaps_merged_df.head()

Unnamed: 0,Zip Code,Date,1st Dose - Cumulative,Population_y,1st Dose - Percent Population,ZIP Code Location,Lat,Lng
0,60603,03/25/2021,623,1052,0.592,POINT (-87.625473 41.880112),-87.625473,41.880112
1,60604,03/25/2021,432,823,0.525,POINT (-87.629029 41.878153),-87.629029,41.878153
2,60608,03/25/2021,27423,80059,0.343,POINT (-87.670366 41.849879),-87.670366,41.849879
3,60609,03/25/2021,13052,60939,0.214,POINT (-87.653382 41.812017),-87.653382,41.812017
4,60610,03/25/2021,12251,40548,0.302,POINT (-87.63581 41.90455),-87.63581,41.90455


In [27]:
with open('chicago_boundaries.geojson') as f:
    geometry = json.load(f)

In [28]:
#load vaccine completion percentage data
zipcodes = gmaps_merged_df["Zip Code"].to_list()
vaccine_percent = gmaps_merged_df["1st Dose - Percent Population"].round(4).to_list()
rows = list(zip(zipcodes,vaccine_percent))
zipcode2vaccine = dict(rows)

In [29]:
#map colors to vaccine completion percentage
from matplotlib.cm import viridis
from matplotlib.colors import to_hex
def calculate_color(vaccine_percentage):
    inverse_vaccine = 1.0 - vaccine_percentage
    matpltlib_color = viridis(inverse_vaccine)
    gmaps_color = to_hex(matpltlib_color,keep_alpha=False)
    return gmaps_color

colors = []
for feature in geometry['features']:
    zipcode = feature['properties']['zip']
    zipcode = int(zipcode)
    try:
        vaccine_percentage = zipcode2vaccine[zipcode]
        color = calculate_color(vaccine_percentage)
    except KeyError:
        # no zipcode: return default color
        color = (0, 0, 0, 0.3)
    colors.append(color)

fig = gmaps.figure()
vaccine_layer = gmaps.geojson_layer(
    geometry,
    fill_color=colors,
    stroke_color=colors,
    fill_opacity=0.8)
fig.add_layer(vaccine_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [14]:
#median age comparison across zip codes - bar graph - John
#median age comparison to % complete/zip code- if time
#save png for presentation

In [15]:
#create a scatterplot comparing average income per capita and % vaccination- Amy
#calculate the r-value