In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=7)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=7)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

7

# Perform API Calls

Perform a weather check on each city using a series of successive API calls.
Include a print log of each city as it'sbeing processed (with the city number and city name).

In [3]:
# openweathermap api url, pass it my api key (can test url by pasting in browser)
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [4]:
# Create empty lists for storing data from API
city_name = []
cloudiness = []
country = []
date = []
humidity = []
lat = []
lng = []
max_temp = []
wind_speed = []

# Set initial count of 1 for loop
record =1

# Include a print log of each city as it'sbeing processed (with the city number and city name).
print(f"Beginning Data Retrieval")
print(f"-------------------------------")

Beginning Data Retrieval
-------------------------------


In [5]:
# fetch data for each city in list, using FOR loop (try enumerate in the FOR loop, see link from Raina)
for city in cities:
    try:        # TRY statement appends when value is not NULL (city exists in CityPy module)
        response = requests.get(query_url + city).json() 
        city_name.append(response["name"])
        cloudiness.append(response["clouds"]["all"])
        country.append(response["sys"]["country"])
        date.append(response["dt"])
        humidity.append(response["main"]["humidity"])
        max_temp.append(response["main"]["temp_max"])
        lat.append(response["coord"]["lat"])
        lng.append(response["coord"]["lon"])
        wind_speed.append(response["wind"]["speed"])
        city_record = response["name"]
        print(f"Processing Record {record} | {city_record}")
        print(f"{url}&q={city}")
        record= record + 1 # Increment counter  
        time.sleep(1.02) # Pause for one second in loop, to avoid maxing out API limit
    except: # EXCEPT skips to next row, where OpenWeatherMap is missing the city generated by CityPy module
        print("City not found. Skipping...")
    continue

Processing Record 1 | Bluff
http://api.openweathermap.org/data/2.5/weather?&q=bluff
Processing Record 2 | Kade
http://api.openweathermap.org/data/2.5/weather?&q=kade
City not found. Skipping...
City not found. Skipping...
Processing Record 3 | Busselton
http://api.openweathermap.org/data/2.5/weather?&q=busselton
Processing Record 4 | Lazarev
http://api.openweathermap.org/data/2.5/weather?&q=lazarev
Processing Record 5 | Hong Gai
http://api.openweathermap.org/data/2.5/weather?&q=hong gai


# Convert Raw Data to DataFrame
Export the city data into a .csv.
Display the DataFrame

In [13]:
# Create dictionary with all the lists from API calls
weather_dictionary = {
    "City": city_name,
    "Cloudiness":cloudiness, 
    "Country":country,
    "Date":date, 
    "Humidity": humidity,
    "Lat":lat, 
    "Lng":lng, 
    "Max Temp": max_temp,
    "Wind Speed":wind_speed
}
# Create dataframe from above Dictionary
weather_df = pd.DataFrame(weather_dictionary)
weather_df
# Drop rows with humidity > 100%
weather_df.drop(weather_df[weather_df["Humidity"] > 100].index, inplace = True)
# Create and save CSV file from above DataFrame
weather_df.to_csv('output_data/cities.csv')

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,Bluff,85,NZ,1604274810,68,-46.6,168.33,55.99,13.0
1,Kade,82,DE,1604275169,86,52.38,12.27,61.0,1.99
2,Busselton,76,AU,1604275170,74,-33.65,115.33,60.01,5.57
3,Lazarev,100,RU,1604275172,94,52.23,141.51,41.56,13.73
4,Hong Gai,79,VN,1604275173,94,20.96,107.09,75.2,1.12


# Inspect the data and remove the cities where the humidity > 100%.

Skip this step if there are no cities that have humidity > 100%.

In [None]:
#  Get the indices of cities that have humidity over 100%.
Int64Index([], dtype='int64')

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

# Plotting the Data
Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
Save the plotted figures as .pngs.