# Coursera Capstone Project

### This notebook will contain the code for completing the capstone project

In [40]:
import pandas as pd
import numpy as np

### Get the Covid-19 daily results from the web


In [41]:
#Load Data
import requests

url = 'https://www.worldometers.info/coronavirus/usa/florida/'   #table containing counts by caounty

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}                                          # set headers for compatability
r = requests.get(url, headers=header)      # make the get request
df = pd.read_html(r.text)                  # get the tables
df_fla = df[0]                             # keep only the case data
df_fla                                     # show the table

Unnamed: 0,County,TotalCases,NewCases,TotalDeaths,NewDeaths,ActiveCases,TotalTests,Source
0,Florida Total,389868,+10249,5520.0,173.0,344397.0,3215185,
1,Miami-Dade,95068,+2723,1354.0,12.0,93714.0,535356,[county]* [state]
2,Broward,45010,+1263,536.0,7.0,44474.0,343039,[county]* [state]
3,Palm Beach,28267,+761,727.0,18.0,27540.0,227538,[county]* [state]
4,Hillsborough,25432,+541,278.0,18.0,25154.0,197601,[county] [state]
...,...,...,...,...,...,...,...,...
65,Jefferson,156,+34,5.0,,151.0,1965,[county]* [state]
66,Franklin,82,+1,1.0,,81.0,2245,[county]* [state]
67,Lafayette,75,+1,,,75.0,772,[county]* [state]
68,Unassigned,597,+35,,,,5504,


In [42]:
#Exploritory Data Analysis - we do not need all the columns provided. We also will remove rows with Unassigned County
# I also set the NaN values to 0 (zero)
#drop the columns we are not using (NewCases, NewDeaths, ActiveCases, Source)
df_fla.drop(['NewCases', 'NewDeaths', 'ActiveCases', 'Source'], axis = 1, inplace=True) 
df_fla.drop(index=0, inplace=True)                           # remove the Florid Total row

indexNames = df_fla[ df_fla['County'] == 'Unassigned'].index  # get rows where county is unassigned 
df_fla.drop(indexNames , inplace=True)                        # drop them

indexNames = df_fla[ df_fla['County'] == 'Total:'].index  # get rows where county is Total
df_fla.drop(indexNames , inplace=True)                         #drop it

df_fla.fillna(0, inplace=True)                                              # fill NaN values with 0
df_fla.reset_index(drop=True, inplace=True)                  # reset the index
df_fla

Unnamed: 0,County,TotalCases,TotalDeaths,TotalTests
0,Miami-Dade,95068,1354.0,535356
1,Broward,45010,536.0,343039
2,Palm Beach,28267,727.0,227538
3,Hillsborough,25432,278.0,197601
4,Orange,25254,156.0,225348
...,...,...,...,...
62,Dixie,208,4.0,2130
63,Union,169,3.0,2959
64,Jefferson,156,5.0,1965
65,Franklin,82,1.0,2245


In [43]:
#copy the datafram to preserve the original and add the Long and Lat columns
df_tmp = df_fla
df_tmp['Latitude'], df_tmp['Longitude'] = [1.1, 1.2]
df_tmp

Unnamed: 0,County,TotalCases,TotalDeaths,TotalTests,Latitude,Longitude
0,Miami-Dade,95068,1354.0,535356,1.1,1.2
1,Broward,45010,536.0,343039,1.1,1.2
2,Palm Beach,28267,727.0,227538,1.1,1.2
3,Hillsborough,25432,278.0,197601,1.1,1.2
4,Orange,25254,156.0,225348,1.1,1.2
...,...,...,...,...,...,...
62,Dixie,208,4.0,2130,1.1,1.2
63,Union,169,3.0,2959,1.1,1.2
64,Jefferson,156,5.0,1965,1.1,1.2
65,Franklin,82,1.0,2245,1.1,1.2


In [44]:
import geocoder # import geocoder

# loop over the counties
print('Looping over the list of Counties.')
for index, row in df_tmp.iterrows():
    g=geocoder.arcgis('{}, Florida'.format(row.County))              #get each tal, long
    lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]                                     # assign values
    longitude = lat_lng_coords[1]
    df_tmp.at[index, 'Latitude'] = latitude                         # update the dataframe
    df_tmp.at[index, 'Longitude'] = longitude
    print(row.County+'                   ', end='\r')               # Show progress bar
print('                                        ')                   # clear progress bar
print('Complete')
df_tmp.at[4, 'Latitude'] = 28.540786                         # fix coords for Orange county since geo picks orange NY
df_tmp.at[4, 'Longitude'] = -81.370648
df_tmp

Looping over the list of Counties.
                                        
Complete


Unnamed: 0,County,TotalCases,TotalDeaths,TotalTests,Latitude,Longitude
0,Miami-Dade,95068,1354.0,535356,25.614182,-80.567908
1,Broward,45010,536.0,343039,26.152246,-80.487053
2,Palm Beach,28267,727.0,227538,26.703060,-80.036610
3,Hillsborough,25432,278.0,197601,27.930230,-82.307857
4,Orange,25254,156.0,225348,28.540786,-81.370648
...,...,...,...,...,...,...
62,Dixie,208,4.0,2130,27.938700,-82.486100
63,Union,169,3.0,2959,30.803800,-86.042160
64,Jefferson,156,5.0,1965,30.437669,-83.895184
65,Franklin,82,1.0,2245,29.749190,-85.028540


In [46]:
#trun off the depreciation warnings
import warnings
warnings.filterwarnings('ignore')

# definition of the boundaries in the map
county_geo = 'florida_counties.geojson'
  
# get county name and count of cases
map_data = df_tmp
map_data.sort_values(by='TotalCases', ascending=True)
  
# creation of the choropleth
import folium
map1 = folium.Map(location=[27.6648, -81.5158], zoom_start=7)
map1.choropleth(geo_data = county_geo,
              data = map_data,
              columns = ['County', 'TotalCases'],
              key_on = 'feature.properties.county',
              fill_color = 'BuPu', 
              fill_opacity = 0.4, 
              line_opacity = 0.2,
              legend_name = 'Number of Covid-19 cases by County', 
                bins=3)
               
display(map1)

### Comment: 
Since the range of case countvary drastically, the heat map is not effect.
I test the bubble map below which presents a superiour view if the case count by county

In [47]:
# Plot all counties where total case >= 1000
map2 = folium.Map(location=[27.6648, -81.5158], zoom_start=7)

map_data2 = map_data              #copy df to retain original
map_data2 = map_data2.astype({'TotalCases': 'float'})   # change data type to float for use in the Circle function

# I can add marker one by one on the map
for i in range(0,len(map_data2)):
    if map_data2.iloc[i]['TotalCases'] > 999:
        folium.Circle(
          location=[map_data2.iloc[i]['Latitude'], map_data2.iloc[i]['Longitude']],
          popup=map_data2.iloc[i]['TotalCases'],
          radius=map_data2.iloc[i]['TotalCases'],
          color='crimson',
          fill=True,
          fill_color='crimson',
       ).add_to(map2)

display(map2)

In [67]:
# Get input from user to swelect region of fl to view
from ipywidgets import Layout, Button, Box, FloatText, Textarea, Dropdown, Label, IntSlider

#define widgets
meters = IntSlider(min=0, max=100, step=5)
meters.value=5
area = Dropdown(options=['North Florida', 'Central Florida', 'South Florida'])
venues = Dropdown(options=['Beach', 'Parks', 'Theme Parks'])

#define layout
form_item_layout = Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)

#create form items
form_items = [
    Box([Label(value='Search Distance: miles'), meters], layout=form_item_layout),
    Box([Label(value='Select Area'),
         area], layout=form_item_layout),
    Box([Label(value='Select Venue'),
         venues], layout=form_item_layout),
]

#create form
form = Box(form_items, layout=Layout(
    display='flex',
    flex_flow='column',
    border='solid 2px',
    align_items='stretch',
    width='50%'
))
form        #display form for user input

Box(children=(Box(children=(Label(value='Search Distance: miles'), IntSlider(value=5, step=5)), layout=Layout(…

In [68]:
import yaml
import sys, os

with open("c:\Python\Python38\config.yml", "r") as ymlfile:
    cfg = yaml.safe_load(ymlfile)

CLIENT_ID = cfg['client_id']
CLIENT_SECRET = cfg['client_secret']
VERSION = '20180605' # Foursquare API version

#get credentials from config file


In [69]:
#find chosen venues in the are selected
print(area.value, venues.value)   #verify selected choices

#setup foursquare search values
limit = 100
radius = meters.value * 1609

#setup caegory search values
cat_beach = '4bf58dd8d48988d1e2941735'
cat_theme = '4bf58dd8d48988d182941735'
cat_park = '52e81612bcbc57f1066b7a21'

if venues.value == 'Beach':     # which category was selected
    cat_id = cat_beach
elif venues.value == 'Theme Parks':
    cat_id = cat_theme
else:
    cat_id = cat_park

if area.value == 'Central Florida':     #set coords to Orlando
    area_lat = 28.540786
    area_long = -81.676325
    url = 'https://api.foursquare.com/v2/venues/search?categoryId={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(cat_id, CLIENT_ID, CLIENT_SECRET, VERSION, area_lat, area_long,  radius, limit)
elif area.value == 'North Florida':     #set coords to Jacksonville
    area_lat = 30.474970
    area_long = -81.370648
    url = 'https://api.foursquare.com/v2/venues/search?categoryId={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(cat_id, CLIENT_ID, CLIENT_SECRET, VERSION, area_lat, area_long,  radius, limit)
elif area.value == 'South Florida':     #set coords to Miami
    area_lat = 25.801247
    area_long = -80.221564
    url = 'https://api.foursquare.com/v2/venues/search?categoryId={}&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(cat_id, CLIENT_ID, CLIENT_SECRET, VERSION, area_lat, area_long,  radius, limit)

else:
    print(area.value)

Central Florida Theme Parks


In [70]:
results = requests.get(url).json()    # make the Foursquare request
results

{'meta': {'code': 200, 'requestId': '5f19e46a0c154050638f309f'},
 'response': {'venues': [{'id': '4acb60d2f964a52094c320e3',
    'name': "Disney's Hollywood Studios",
    'location': {'address': '351 S Studio Dr',
     'crossStreet': 'E Buena Vista Dr',
     'lat': 28.358045447549014,
     'lng': -81.55914187004456,
     'labeledLatLngs': [{'label': 'routing',
       'lat': 28.353703311855732,
       'lng': -81.55956029891968},
      {'label': 'display',
       'lat': 28.358045447549014,
       'lng': -81.55914187004456}],
     'distance': 23353,
     'postalCode': '32830',
     'cc': 'US',
     'neighborhood': 'Walt Disney World Resort',
     'city': 'Lake Buena Vista',
     'state': 'FL',
     'country': 'United States',
     'formattedAddress': ['351 S Studio Dr (E Buena Vista Dr)',
      'Lake Buena Vista, FL 32830',
      'United States']},
    'categories': [{'id': '4bf58dd8d48988d182941735',
      'name': 'Theme Park',
      'pluralName': 'Theme Parks',
      'shortName': 'Theme

In [71]:
from pandas.io.json import json_normalize

venues = results['response']['venues']   #get venues fron json
    
nearby_venues = json_normalize(venues) # flatten JSON

# filter columns
filtered_columns = ['name', 'location.lat', 'location.lng', 'id']
nearby_venues = nearby_venues.loc[:, filtered_columns]

#set lat & long to float
nearbu_venues =  nearby_venues.astype({'location.lat': 'float'})   # change data type to float for use coords
nearbu_venues =  nearby_venues.astype({'location.lng': 'float'})
nearby_venues.head()

Unnamed: 0,name,location.lat,location.lng,id
0,Disney's Hollywood Studios,28.358045,-81.559142,4acb60d2f964a52094c320e3
1,Epcot,28.374694,-81.549404,4b301d74f964a52053f624e3
2,Magic Kingdom® Park,28.416315,-81.581029,4b11d311f964a520758523e3
3,Test Track Presented by Chevrolet,28.372799,-81.547224,4b0b1c8bf964a520122d23e3
4,Soarin',28.373658,-81.552007,4b3e51fdf964a520339b25e3


In [72]:
#add venue markers to map

for idx, row in nearby_venues.iterrows():
    folium.Marker([row['location.lat'], row['location.lng']], popup=row['name']).add_to(map2)
    
display(map2)

### Conclusion

For Beach goers South Jacksonville, Cenral Florida and the entire Gulf Coast look the safest.
For Theme Parks, Disney looks relatively safe.
National Parks not in 'Hot' zones would likley be safe however many of them remain closed and must be researched individually.