In [1]:
# GWU Bootcamp Module 6 Web APIs and Pandas Data Frames
# Module 6 Challenge - Deliverable # 1- Retrieve Weather Data and Export to CSV
# Student name: Christopher Mastrangelo

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random

# very important to include this module - this is what was causing error
import requests

# Use the citipy module to determine city based on latitude and longitude.
from citipy import citipy

# Import the datetime module from the datetime library.
from datetime import datetime

In [2]:
# Import the API key.  will need to copy the config.py file to this folder
from config import weather_api_key

In [3]:
# Create a set of random latitude and longitude combinations.
# I think -90 to 90 is too likely to pick a non populated area
# Even using -60 to 70 you are going to get some extremely remote locations, most over water 
# start with 20 until its working then scale up to 200 then to 2000 GPS pairs for the challenge

# For the current run - keep cities in USA and Canada including Alaska and Hawaii- just to show I can 
maxsize = 675
lats = np.random.uniform(low=24.000, high=72.000, size=maxsize)
lngs = np.random.uniform(low=-165.000, high=-65.000, size=maxsize)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x2415d4a8308>

In [4]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)
# coordinates

In [5]:
# This is how we build the unique list of cities. only the name field is unique.  
# question - what if there is a city name like springfield which is located in more than one state?
# that is beyond the scope of this assignment but I would like to verify if the lat/long matches the original used

# Create a list for holding the cities.
cities = []

# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)
# cities

# I said it before and I will say it again- none of these places are places any traveler would actually go
# they happen to be islands or places along the coast because random pins dropped over the ocean end up in Hawaii
# or Capetown South Africa as the nearest city on the coast
# make sure to add this comment to the README file when submitting the challenge

257

In [6]:
# Starting URL for Weather Map API Call - do this outside the loop and do not PRINT IT
# Be careful how many times you run this in case there is a limit on the number of free uses for API Key

url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

# Create an empty list to hold the weather data.
city_data = []

# for city in cities
for i, city in enumerate(cities):
    
    # Create an endpoint URL for a city.
    city_url = url + "&q=" + city.replace(" ","+")
    
    # copies this code right out of the WeatherPy notebook - we just have to add "description"
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        
        # Parse out the needed data. Put them in the order we need so we don't have to reorder columns later
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        
        # next line is the new line added
        city_description = city_weather["weather"][0]['description']

        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        if (city_country not in ["CA","US"]):
            print("Skipping ", city.title(), city_country, city_lat, city_lng)
        else:
            city_data.append({"City": city.title(),
                          "Country": city_country,
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Current Description": city_description})
                          # removing date for the challenge was >>> "Date": city_date})
        # show which cities are in Canada, Alaska or Hawaii
        if ((city_lat > 49.75) or (city_lng < -124.6)):
            print(city.title(), city_country, city_lat, city_lng)
            
# If an error is experienced, skip the city. using silent diagnostics instead of print statements 
    except:
        print("City: " + city + " not found. Skipping...")
        pass

# If we process more than 200 cities we should batch them in groups of 50 like the we did in the WeatherPy Notebook
# If we demonstrate using 200 cities in USA and Canada that should prove the code is scalable to 2000 locations
# As indicated above I enhanced random code to only select cities in USA including ALaska Hawaii or Canada


Sioux Lookout CA 50.1001 -91.917
Moose Factory CA 51.2585 -80.6094
Kodiak US 57.79 -152.4072
Yellowknife CA 62.456 -114.3525
Fort Nelson CA 58.8053 -122.7002
Ahuimanu US 21.4447 -157.8378
Skipping  Guadalupe Y Calvo MX 26.1 -106.9667
Iqaluit CA 63.7506 -68.5145
Sitka US 57.0531 -135.33
Skipping  Alexandria EG 31.2156 29.9553
Kapaa US 22.0752 -159.319
Skipping  Ojinaga MX 29.5667 -104.4167
Barrow US 71.2906 -156.7887
Skipping  Guaymas MX 27.9333 -110.9
Norman Wells CA 65.282 -126.8329
College US 64.8569 -147.8028
Juneau US 58.3019 -134.4197
Whitehorse CA 60.7161 -135.0538
Aklavik CA 68.2191 -135.0107
Skipping  Edinburg GB 55.9521 -3.1965
Edinburg GB 55.9521 -3.1965
Port Hardy CA 50.6996 -127.4199
City: attawapiskat not found. Skipping...
Kahului US 20.8947 -156.47
Tuktoyaktuk CA 69.4541 -133.0374
Hilo US 19.7297 -155.09
Skipping  Victoria HK 22.2855 114.1577
Thompson CA 55.7435 -97.8558
Powell River CA 49.8496 -124.5361
Skipping  Sidney AU -33.8679 151.2073
Flin Flon CA 54.7682 -101.865

In [7]:
# DEBUG - this cell only needs to be run when used to parse the JSON output
# used this cell to traverse the JSON and peel the layers off the onion to get the syntax for each item
# do these steps outside the try/catch so we can see what is going on 

city = 'westminster'
city_url = url + "&q=" + city.replace(" ","+")
        
city_weather = requests.get(city_url).json()
        
# print(city_weather)
# if the status code will not print maybe that is what is causing the catch

city_lat = city_weather["coord"]["lat"]
city_lng = city_weather["coord"]["lon"]
city_max_temp = city_weather["main"]["temp_max"]
city_humidity = city_weather["main"]["humidity"]

# next line is the new line added
city_description = city_weather["weather"][0]['description']

# city_clouds = city_weather["clouds"]["all"]
# city_wind = city_weather["wind"]["speed"]
# city_country = city_weather["sys"]["country"]

    
print(city_description)
city_weather["weather"][0]['description']


haze


'haze'

In [8]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(20)


Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description
0,Sioux Lookout,CA,50.1001,-91.917,57.58,88,75,5.75,smoke
1,Steamboat Springs,US,40.485,-106.8317,57.94,78,75,0.0,haze
2,Bethel,US,41.3712,-73.414,80.02,80,100,6.87,overcast clouds
3,Lompoc,US,34.6391,-120.4579,61.09,92,90,3.44,mist
4,Moose Factory,CA,51.2585,-80.6094,53.6,87,1,4.61,clear sky
5,Kodiak,US,57.79,-152.4072,51.31,87,75,3.44,broken clouds
6,Yellowknife,CA,62.456,-114.3525,64.92,68,75,13.8,smoke
7,Nantucket,US,41.2835,-70.0995,79.97,89,1,9.22,clear sky
8,Fort Nelson,CA,58.8053,-122.7002,58.59,94,90,9.22,moderate rain
9,Ahuimanu,US,21.4447,-157.8378,77.94,75,25,16.2,scattered clouds


In [9]:
# Create the output file (CSV).
output_data_file = "./WeatherPy_Database.csv"
# Export the City_Data into a CSV. SAme folder that we are in which should be World_Weather

# ONLY RE-ENABLE THIS LIINE IF YOU WANT TO SAVE ANOTHER COPY OF THE OUTPUT FILE 
# Header default is TRUE so no need to include the column headers except the label for the first column
# city_data_df.to_csv(output_data_file, index_label="City_ID")

# Looks like we are ready to submit Deliverable #1 for Module 6 Challenge date 2021-08-06

In [28]:
# Sat 8-7 Practice - before I work on the heatmaps I want to create a map of just the lat/longs of the cities above
# just to see the locations of the city on the map and compare it to the lat/long pairs used to generate the cities

# still need to install gmaps in the back end using anaconda prompt- this might be a good office hours question
# it was still waiting for a y/n answer so it had not installed yet

import gmaps
import gmaps.datasets

# Import the API key.
from config import g_key

# Configure gmaps to use your Google API key.
gmaps.configure(api_key=g_key)

# Heatmap of percent humidity
locations = city_data_df[["Lat", "Lng"]]
humidity = city_data_df["Humidity"]

fig = gmaps.figure(center=(40.0, -75.0), zoom_level=8)

# marker_layer labels work but they are hard to read at same size/font as the background map 
marker_layer = gmaps.marker_layer(locations, label=city_data_df["City"])
fig.add_layer(marker_layer)

# use symbols vs markers to show the original set of lat/long points used to generate the citis 
# the list used to xxx was called coordinates not cities
symbol_layer = gmaps.symbol_layer(coordinates, scale=5)
fig.add_layer(symbol_layer)


# directions later - can you really do this?? yes but then you can't save the map 
start = (42.35, -71.112)
end = (38.8, -77.223)
waypoints = [(40.01, -75.01)]
directions = gmaps.directions_layer(start, end, waypoints=waypoints)
# fig.add_layer(directions)

# fig.add_layer(heat_layer)

# Call the figure to plot the data.
# fig


In [33]:
# get these using a input statement but first I will enter them to test the df below
# min_temp = 65.03
# max_temp = 88.98

# Ask the customer to add a minimum and maximum temperature value.
min_temp = float(input("What is the minimum temperature you would like for your trip? "))
max_temp = float(input("What is the maximum temperature you would like for your trip? "))

# Filter the dataset to find the cities that fit the criteria.
preferred_cities_df = city_data_df.loc[(city_data_df["Max Temp"] <= max_temp) & \
                                       (city_data_df["Max Temp"] >= min_temp)]
preferred_cities_df

What is the minimum temperature you would like for your trip? 77.9358
What is the maximum temperature you would like for your trip? 99.555


Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description
2,Bethel,US,41.3712,-73.4140,80.02,80,100,6.87,overcast clouds
7,Nantucket,US,41.2835,-70.0995,79.97,89,1,9.22,clear sky
9,Ahuimanu,US,21.4447,-157.8378,77.94,75,25,16.20,scattered clouds
11,Havelock,US,34.8791,-76.9013,82.38,87,75,3.00,broken clouds
14,Redlands,US,34.0556,-117.1825,84.15,55,0,0.22,clear sky
...,...,...,...,...,...,...,...,...,...
211,San Angelo,US,31.4638,-100.4370,79.54,80,75,1.99,broken clouds
212,Key West,US,24.5557,-81.7826,89.60,64,20,12.66,few clouds
213,Alamogordo,US,32.8995,-105.9603,79.95,45,1,6.91,clear sky
215,Fairfield,US,41.2668,-73.3162,80.65,78,100,7.14,overcast clouds


In [34]:
preferred_cities_df.count()

City                   65
Country                65
Lat                    65
Lng                    65
Max Temp               65
Humidity               65
Cloudiness             65
Wind Speed             65
Current Description    65
dtype: int64

In [35]:
# Create DataFrame called hotel_df to store hotel names along with city, country, max temp, and coordinates.
hotel_df = preferred_cities_df[["City", "Country", "Max Temp", "Lat", "Lng"]].copy()
hotel_df["Hotel Name"] = ""
hotel_df.head(10)

Unnamed: 0,City,Country,Max Temp,Lat,Lng,Hotel Name
2,Bethel,US,80.02,41.3712,-73.414,
7,Nantucket,US,79.97,41.2835,-70.0995,
9,Ahuimanu,US,77.94,21.4447,-157.8378,
11,Havelock,US,82.38,34.8791,-76.9013,
14,Redlands,US,84.15,34.0556,-117.1825,
17,Kapaa,US,79.12,22.0752,-159.319,
18,Mastic Beach,US,81.99,40.7668,-72.8521,
22,Mayo,US,79.99,38.8876,-76.5119,
29,Beamsville,CA,80.89,43.1668,-79.4829,
30,Hamilton,US,78.89,39.1834,-84.5333,


In [58]:
# section 6.5.4 find hotels around a location using API lookup 
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
# we will build the params including the API Key
params = "&key=" + g_key + "&q="
# params
# use my home GPS coords as test of the code before adding it to loop
plat = 38.801
plng = -77.223
pref_city = f"{plat},{plng}"
# Set parameters to search for a hotel.
params = {
    "radius": 5000,
    "type": "lodging",
    "key": g_key,
    "location": pref_city
}

# Make request and get the JSON data from the search.
hotels = requests.get(base_url, params=params).json()

hotels

{'html_attributions': [],
 'results': [{'business_status': 'OPERATIONAL',
   'geometry': {'location': {'lat': 38.78022000000001, 'lng': -77.18251},
    'viewport': {'northeast': {'lat': 38.78159058029151,
      'lng': -77.18135941970849},
     'southwest': {'lat': 38.77889261970851, 'lng': -77.1840573802915}}},
   'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/lodging-71.png',
   'icon_background_color': '#909CE1',
   'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/hotel_pinlet',
   'name': 'Holiday Inn Express Washington DC SW - Springfield, an IHG Hotel',
   'opening_hours': {'open_now': True},
   'photos': [{'height': 533,
     'html_attributions': ['<a href="https://maps.google.com/maps/contrib/103652742749938049887">Holiday Inn Express Washington DC SW - Springfield, an IHG Hotel</a>'],
     'photo_reference': 'Aap_uEDdNi4m0cqGQ4fxT9w0mMXbBSg-7xnDtFy06KmyY3CihZVHdwXAdDGy4GuokEyMGmFCGZt3uY8boQs91dto-bl8DeF4qlUSfB6Bl7V-KnUMxszaNqXWTq

In [59]:
# peel the onion
num_hotels = len(hotels['results'])
first_hotel = hotels['results'][0]['name']
first_hotel 


'Holiday Inn Express Washington DC SW - Springfield, an IHG Hotel'

In [64]:
# Iterate through the DataFrame.
for index, row in hotel_df.iterrows():
    # Get the latitude and longitude.
    lat = row["Lat"]
    lng = row["Lng"]

    # Add the latitude and longitude to location key for the params dictionary.
    params["location"] = f"{lat},{lng}"
    
    try:
        # Use the search term: "lodging" and our latitude and longitude.
        base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
        # Make request and get the JSON data from the search.
        hotels = requests.get(base_url, params=params).json()
        # Grab the first hotel from the results and store the name.
        hotel_df.loc[index, "Hotel Name"] = hotels["results"][0]["name"]
    except (IndexError):
        city_name = row["City"]        
        print("Hotel not found for city = " + city_name + " not found... skipping.")

    

Hotel not found for city = Bristol not found... skipping.
Hotel not found for city = Merritt Island not found... skipping.
Hotel not found for city = Fairfield not found... skipping.


In [66]:
# check for any nulls in the hotel name before moving forward three cities should have a null? no it says they are all there
hotel_df.count()


City          65
Country       65
Max Temp      65
Lat           65
Lng           65
Hotel Name    65
dtype: int64

In [80]:
info_box_template = """
<dl>
<dt>Hotel Name</dt><dd>{Hotel Name}</dd>
<dt>City</dt><dd>{City}</dd>
<dt>Country</dt><dd>{Country}</dd>
<dt>Max Temp</dt><dd>{Max Temp} °F</dd>
</dl>
"""

# Store the DataFrame Row.
hotel_info = [info_box_template.format(**row) for index, row in hotel_df.iterrows()]
len(hotel_info)

65

In [83]:
# Add a heatmap of temperature for the vacation spots and marker for each city.
locations = hotel_df[["Lat", "Lng"]]
max_temp = hotel_df["Max Temp"]
fig = gmaps.figure(center=(40.0, -75.0), zoom_level=6)

heat_layer = gmaps.heatmap_layer(locations, weights=max_temp,
             dissipating=False, max_intensity=300, point_radius=4)

marker_layer = gmaps.marker_layer(locations, info_box_content=hotel_info)

# fig.add_layer(heat_layer)
fig.add_layer(marker_layer)
# Call the figure to plot the data. Do it in the next cell to test for errors first without building map every time
# fig

# it works but the way they use heat maps in this lesson does not tell you anything of value
# a hot area on the map just means there are more hotels or pins in that area, not the actual temperature

In [84]:
fig

Figure(layout=FigureLayout(height='420px'))