In [1]:
#dependencies and setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os
import json

#import API key
from config import g_key

#access maps with unique API key
gmaps.configure(api_key=g_key)

In [2]:
#import cities.csv file into a dataframe
cities_df = pd.read_csv("cities.csv")

#get rid of 'City_ID' column
cities_df = cities_df[["City", "Cloudiness", "Country", 
                       "Date", "Humidity", "Lat", "Lng", "Max Temp", "Wind Speed"]]
cities_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29
4,Omboue,94,GA,1579882003,70,-1.57,9.26,85.96,4.74
5,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29
6,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98
7,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52
8,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29


In [3]:
#create heatmap based on humidity

#store latitude and longitude into the variable 'coordinates'
coordinates = cities_df[["Lat", "Lng"]]

#convert humidity to float; all data read from a csv is read as a string
humidity = cities_df["Humidity"].astype(float)

In [4]:
#plot heatmap
fig = gmaps.figure()

#create heat layer
heat_layer = gmaps.heatmap_layer(coordinates, weights=humidity, 
                                 dissipating=False, max_intensity=100,
                                 point_radius=1)

#add layer
fig.add_layer(heat_layer)

# Display figure
fig

Figure(layout=FigureLayout(height='420px'))

In [5]:
cities_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29
4,Omboue,94,GA,1579882003,70,-1.57,9.26,85.96,4.74
5,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29
6,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98
7,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52
8,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29


In [6]:
specs_df = cities_df.loc[(cities_df["Max Temp"] > 40) &
                        (cities_df["Humidity"] < 93) & 
                        (cities_df["Wind Speed"] < 3) | 
                        (cities_df["Wind Speed"] > 8), :].reset_index()
specs_df = specs_df[["City", "Cloudiness", "Country", "Date", "Humidity", "Lat", "Lng", "Max Temp", "Wind Speed"]]
specs_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29
4,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29
5,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98
6,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52
7,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29


In [7]:
nan_df = specs_df[specs_df.isna().any(axis=1)]
nan_df

hotel_df = specs_df.dropna()
hotel_df

#try this on the larger dataframe
#cities_nan = cities_df[cities_df.isna().any(axis=1)]
#cities_nan
#if there are nan values, then save by coding cities_df = cities_df.dropna()

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29
4,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29
5,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98
6,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52
7,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29


In [8]:
#set up additional columns to hold information
hotel_df["Hotel Name"] = ""
hotel_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Hotel Name
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29,
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29,
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54,
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29,
4,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29,
5,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98,
6,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52,
7,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29,


In [9]:
#want to find a hotel within 5000 meters of each (lat, lng) in my chosen destinations

#create params dict that will be updated with a hotel name each iteration
params = {"keyword": "Hotel",
          "radius": 5000,
          "key": g_key}

# Use the lat/lng in our hotel_df to identify hotels
for index, row in hotel_df.iterrows():
    # get lat, lng from df
    lat = row["Lat"]
    lng = row["Lng"]

    # change location each iteration while leaving original params in place
    params["location"] = f"{lat},{lng}"

    # Use the search term: "Hotel" and our lat/lng
    base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

    # make request and print url
    hotel_request = requests.get(base_url, params=params)
    
    #print the hotel_request url, avoid doing for public github repos in order to avoid exposing key
    #print(hotel_request.url)

    # convert to json
    hotel_request = hotel_request.json()
    #print(json.dumps(hotel_request, indent=4, sort_keys=True))

    # Since some data may be missing we incorporate a try-except to skip any that are missing a data point.
    try:
        hotel_df.loc[index, "Hotel Name"] = hotel_request["results"][0]["name"]
   
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")
        
#preview to make sure the dataframe updates
hotel_df       

Missing field/result... skipping.


Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Hotel Name
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29,DoubleTree by Hilton Jamestown
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29,Lagoon Lodge
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54,Blue Diamond Beach and Spa Resort
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29,Jimmy Hotel
4,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29,Dzigbodi Guest Houselon
5,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98,Protea Hotel by Marriott Takoradi Select
6,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52,
7,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29,La Palm Royal Beach Hotel


In [None]:
#dropna already exists above.  Should I do it twice?  Will need to test with larger dataset

In [10]:
#want to drop rows with missing 'Hotel Name' data

#first fill empty 'Hotel Name' cells with NaN
hotel_df = hotel_df.replace(r'^\s*$', np.nan, regex=True)
hotel_df



Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Hotel Name
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29,DoubleTree by Hilton Jamestown
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29,Lagoon Lodge
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54,Blue Diamond Beach and Spa Resort
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29,Jimmy Hotel
4,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29,Dzigbodi Guest Houselon
5,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98,Protea Hotel by Marriott Takoradi Select
6,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52,
7,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29,La Palm Royal Beach Hotel


In [11]:
#find the rows with missing 'Hotel Name' data
hotel_nan = hotel_df[hotel_df.isna().any(axis = 1)]
hotel_nan

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Hotel Name
6,Gamba,69,GA,1579881996,74,-2.65,10.0,83.91,8.52,


In [12]:
#drop the NaN rows
hotel_df = hotel_df.dropna()
hotel_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Hotel Name
0,Jamestown,90,US,1579882002,40,42.1,-79.24,43.0,10.29,DoubleTree by Hilton Jamestown
1,Winneba,0,GH,1579882001,62,5.35,-0.62,89.6,10.29,Lagoon Lodge
2,Mumford,0,GH,1579881998,38,5.26,-0.76,95.05,11.54,Blue Diamond Beach and Spa Resort
3,Port-Gentil,40,GA,1579881886,70,-0.72,8.78,91.4,10.29,Jimmy Hotel
4,Anloga,15,GH,1579882000,46,5.79,0.9,91.4,10.29,Dzigbodi Guest Houselon
5,Takoradze,0,GH,1579881894,52,4.88,-1.76,91.38,10.98,Protea Hotel by Marriott Takoradi Select
7,Teshie,0,GH,1579882184,62,5.58,-0.11,89.6,10.29,La Palm Royal Beach Hotel


In [13]:
fig

Figure(layout=FigureLayout(height='420px'))

In [48]:
# NOTE: Do not change any of the code in this cell

# Using the template add the hotel marks to the heatmap
info_box_template = """
<dl>
<dt>Name</dt><dd>{Hotel Name}</dd>
<dt>City</dt><dd>{City}</dd>
<dt>Country</dt><dd>{Country}</dd>
</dl>
"""
# Store the DataFrame Row
# NOTE: be sure to update with your DataFrame name
hotel_info = [info_box_template.format(**row) for index, row in hotel_df.iterrows()]
locations = hotel_df[["Lat", "Lng"]]

In [68]:
hotel_marker = hotel_info.tolist()
marker_layer = gmaps.marker_layer(hotel_marker)


fig = gmaps.figure()
fig.add_layer(marker_layer)

fig

AttributeError: 'list' object has no attribute 'tolist'

In [41]:
fig = gmaps.figure()
fig.add_layer(info_box_template)
fig

TraitError: Element of the 'layers' trait of a Map instance must be a Widget, but a value of '\n<dl>\n<dt>Name</dt><dd>{Hotel Name}</dd>\n<dt>City</dt><dd>{City}</dd>\n<dt>Country</dt><dd>{Country}</dd>\n</dl>\n' <class 'str'> was specified.