In [1]:
# We must first import our relevant packages to this project.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# We will use the requests package to call on APIs.
import requests
import time
# We use the citipy package to generate a list of randomly selected cities.
from citipy import citipy

# We will use the getpass function to keep our API key hidden.
import getpass

In [2]:
# We will use the getpass function to define the API key we are using to make requests.
api_key = getpass.getpass()

········


In [3]:
# Output file (CSV)
output_data_file = "output_data/cities.csv"

In [4]:
# We will soon generate a list of randomly selected cities.
# Here we define the latitudinal and longitudinal boundaries each city must be within.
lat_range = (-90, 90)
lng_range = (-180, 180)

In [11]:
# The cities_to_check variable will be a list containing randomly selected cities using the citipy package.
# Later we will make API requests to OpenWeatherMap and try to extract data relating to each city.
# The citipy package may return cities, for which OpenWeatherMap has not data, hence, we are "checking" each city.
# Later we will define a new list containing only those cities, for which OpenWeatherMap does have data.
# The citipy package may return city names, for which OpenWeatherMap
cities_to_check = []

In [12]:
# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats,lngs)

# Identify nearest city for each lat/lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # if the cit is uniquem then add it to our cities list
    if city not in cities_to_check:
        cities_to_check.append(city)
        
# print the city amound to confirm sufficient count        
len(cities_to_check)

607

In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

query_url = f"{url}appid={api_key}&units=units&q="

In [None]:
# We are only allowed to make a limited amount of requests per a certain time restraint.
# Here we make a subset of our cities_to_check list containing only the first 10 cities.
# We will use this subset-list for testing purposes, and only use our full list once we are confident our code will work.
cities_to_check_subset_for_testing = cities_to_check[0:10]
cities_to_check_subset_for_testing

In [None]:
# latitudes = []
# temperatures = []
# found_cities = []

# print("Beginning Data Retrieval")

# for city in cities_to_check:
#     response = requests.get(query_url + city).json()
#     city_name = cities_to_check[cities_to_check.index(city)]
#     record_number = cities.index(city)
    
#     try:
#         print(f"Processing Record {record_number} of Set | {city_name}")
#         found_cities.append(city_name)
#         latitudes.append(response["coord"]["lat"])
#         temperatures.append(response["main"]["temp"])
        
#     except:
#         print ("City not found.  Skipping...")
        
# print("-----------------------------")
# print("Data Retrieval Complete")
# print("-----------------------------")



# This block of code serves the same purpose as the block of code above.
# The only difference is that we are using a subset of the entire list of cities for testing purposes.
latitudes = []
temperatures = []
found_cities = []

print("Beginning Data Retrieval")

for city in cities_to_check_subset_for_testing:
    response = requests.get(query_url + city).json()
    city_name = cities_to_check_subset_for_testing[cities_to_check_subset_for_testing.index(city)]
    record_number = cities_to_check_subset_for_testing.index(city)
    
    try:
        print(f"Processing Record {record_number} of Set | {city_name}")
        found_cities.append(response["name"])
        latitudes.append(response["coord"]["lat"])
        temperatures.append(response["main"]["temp"])
        
    except:
        print ("City not found.  Skipping...")
        
print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")

In [None]:
weather_data_dictionary = {
    "City": found_cities,
    "Latitude": latitudes,
    "Temperature": temperatures
}

In [None]:
weather_data = pd.DataFrame(weather_data_dictionary)
weather_data.head()