In [1]:
import pandas as pd
import scipy.stats as sts
import matplotlib.pyplot as plt
import numpy as np
import gmaps
import requests
import json

#import API key for gmaps
from config import gkey
gmaps.configure(api_key=gkey)

In [2]:
college_data = pd.read_csv('Data/college_data.csv')
college_data.head()

Unnamed: 0,School_name,Longitude,Latitude,Total_Enrollment
0,Academy of Art University,-122.400578,37.787943,9812.0
1,Academy of Chinese Culture and Health Sciences,-122.269839,37.805972,133.0
2,Avalon School of Cosmetology-Alameda,-122.243566,37.764203,47.0
3,College of Alameda,-122.279303,37.781017,5667.0
4,Allan Hancock College,-120.421144,34.943716,11894.0


In [3]:
clean_case = pd.read_csv('Data/clean_case.csv')
clean_case.head()

Unnamed: 0,date,area,population,cases,deaths,total_tests,positive_tests
0,2021-03-31,Alameda,1685886.0,98.0,1.0,10553.0,123.0
1,2021-03-31,Alpine,1117.0,0.0,0.0,3.0,0.0
2,2021-03-31,Amador,38531.0,3.0,0.0,317.0,8.0
3,2021-03-31,Butte,217769.0,11.0,0.0,632.0,11.0
4,2021-03-31,Calaveras,44289.0,4.0,0.0,80.0,4.0


In [4]:
vaccine_df = pd.read_csv('Data/vaccine_df.csv')
vaccine_df.head()

Unnamed: 0,county,administered_date,total_doses,pfizer_doses,moderna_doses,jj_doses,partially_vaccinated,total_partially_vaccinated,fully_vaccinated,cumulative_fully_vaccinated,at_least_one_dose
0,Alameda,12/15/2020,36,36,0,0,36,36,0,0,36
1,Alameda,12/16/2020,202,202,0,0,202,238,0,0,202
2,Alameda,12/17/2020,396,396,0,0,396,634,0,0,396
3,Alameda,12/18/2020,1766,1765,1,0,1766,2400,0,0,1766
4,Alameda,12/19/2020,1357,1357,0,0,1357,3757,0,0,1357


In [26]:
# Splitting and grouping Case data by year and county

clean_case_2020 = (clean_case.loc[clean_case['date']<'2021-01-01']).groupby('area') \
['cases', 'deaths', 'positive_tests'].sum()
clean_case_2020.head()

  clean_case_2020 = (clean_case.loc[clean_case['date']<'2021-01-01']).groupby('area') \


Unnamed: 0_level_0,cases,deaths,positive_tests
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alameda,56435.0,695.0,64527.0
Alpine,70.0,0.0,30.0
Amador,2715.0,25.0,3051.0
Butte,8027.0,127.0,8133.0
Calaveras,1251.0,28.0,1380.0


In [6]:
clean_case_2021 = (clean_case.loc[clean_case['date']>='2021-01-01']).groupby('area') \
['cases', 'deaths', 'positive_tests'].sum()
clean_case_2021.tail()

  clean_case_2021 = (clean_case.loc[clean_case['date']>='2021-01-01']).groupby('area') \


Unnamed: 0_level_0,cases,deaths,positive_tests
area,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Tulare,12682.0,315.0,15157.0
Tuolumne,965.0,24.0,1149.0
Ventura,34098.0,607.0,43862.0
Yolo,4216.0,66.0,4094.0
Yuba,1651.0,24.0,1958.0


In [7]:
# Grouping Vaccine data by county and finding dose and fully vaccinated totals

vaccine_grouped = vaccine_df.groupby('county').agg({'total_doses': 'sum',
                                                  'cumulative_fully_vaccinated': 'max'})
vaccine_grouped.tail()

Unnamed: 0_level_0,total_doses,cumulative_fully_vaccinated
county,Unnamed: 1_level_1,Unnamed: 2_level_1
Tulare,247750,107059
Tuolumne,36523,15828
Ventura,627733,262419
Yolo,165608,66441
Yuba,30927,13560


In [8]:
# Create a list of Counties included in data sets to use for geocoding API
# Create columns to append in for loop

print(len(clean_case['area'].unique()))
print(len(vaccine_df['county'].unique()))
counties = clean_case['area'].unique()
counties

58
58


array(['Alameda', 'Alpine', 'Amador', 'Butte', 'Calaveras', 'Colusa',
       'Contra Costa', 'Del Norte', 'El Dorado', 'Fresno', 'Glenn',
       'Humboldt', 'Imperial', 'Inyo', 'Kern', 'Kings', 'Lake', 'Lassen',
       'Los Angeles', 'Madera', 'Marin', 'Mariposa', 'Mendocino',
       'Merced', 'Modoc', 'Mono', 'Monterey', 'Napa', 'Nevada', 'Orange',
       'Placer', 'Plumas', 'Riverside', 'Sacramento', 'San Benito',
       'San Bernardino', 'San Diego', 'San Francisco', 'San Joaquin',
       'San Luis Obispo', 'San Mateo', 'Santa Barbara', 'Santa Clara',
       'Santa Cruz', 'Shasta', 'Sierra', 'Siskiyou', 'Solano', 'Sonoma',
       'Stanislaus', 'Sutter', 'Tehama', 'Trinity', 'Tulare', 'Tuolumne',
       'Ventura', 'Yolo', 'Yuba'], dtype=object)

In [9]:
# Creating Lists to fill coordinates
lat = []
lng = []

# Set up for API call
base_url = 'https://maps.googleapis.com/maps/api/geocode/json?address='
    
for county in counties:
    target_county = f'{base_url}{county},+CA&key={gkey}'
    response = requests.get(target_county).json()
    try:
        lat.append(response['results'][0]['geometry']['location']['lat'])
        lng.append(response['results'][0]['geometry']['location']['lng'])
    except:
        print(f'{county} could not be located...Skipping')
        pass
print('Coordinates have been added to datasets.')

Coordinates have been added to datasets.


In [11]:
county_dict = {'county': counties,
              'latitude': lat,
              'longitude': lng}
county_data = pd.DataFrame(county_dict)
county_data.head()

Unnamed: 0,county,latitude,longitude
0,Alameda,37.779872,-122.282185
1,Alpine,32.835052,-116.766411
2,Amador,38.348892,-120.774093
3,Butte,39.625395,-121.537
4,Calaveras,38.196048,-120.680504


In [27]:
print(college_data['Total_Enrollment'].max())
print(clean_case_2020['cases'].max())
print(clean_case_2021['cases'].max())
print(vaccine_grouped['total_doses'].max())
print(vaccine_grouped['cumulative_fully_vaccinated'].max())

48321.0
837233.0
344672.0
7003153
2839079


In [35]:
# Creating heat map of college populations and Covid-19 cases

center = 36.7783,-119.4179
schools = college_data[['Latitude', 'Longitude']]
enrollment = college_data['Total_Enrollment']
counties = county_data[['latitude', 'longitude']]
cases = clean_case_2020['cases']

fig1 = gmaps.figure(center=center, zoom_level=5.5)

heatmap_layer_s = gmaps.heatmap_layer(schools, weights=enrollment, dissipating=False)
heatmap_layer_s.max_intensity = 25000
heatmap_layer_s.point_radius = 0.25
heatmap_layer_s.gradient = ['white', 'aqua', 'blue']

heatmap_layer_c = gmaps.heatmap_layer(counties, weights=cases, dissipating=False)
heatmap_layer_c.max_intensity = 150000
heatmap_layer_c.point_radius = 0.25

fig1.add_layer(heatmap_layer_s)
fig1.add_layer(heatmap_layer_c)

fig1

Figure(layout=FigureLayout(height='420px'))