In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Use the citipy module to determine city based on latitude and longitude.
from citipy import citipy
# Import the datetime module from the datetime library.
from datetime import datetime
# Import the requests library.
import requests
# Import the API key.
from config import weather_api_key

In [2]:
# Starting URL for Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [3]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x20b34fc0348>

In [4]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [5]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

621

In [6]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [7]:
# Loop through all the cities in our list.
for i in range(len(cities)):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city.
    city_url = url + "&q=" + cities[i]

In [8]:
# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    #print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city.
    except:
        print(f"City {city} not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

City saleaula not found. Skipping...
City sentyabrskiy not found. Skipping...
City illoqqortoormiut not found. Skipping...
City barentsburg not found. Skipping...
City tumannyy not found. Skipping...
City nizhneyansk not found. Skipping...
City barawe not found. Skipping...
City grand centre not found. Skipping...
City umzimvubu not found. Skipping...
City taolanaro not found. Skipping...
City skagastrond not found. Skipping...
City araguatins not found. Skipping...
City longlac not found. Skipping...
City tsihombe not found. Skipping...
City belushya guba not found. Skipping...
City karaul not found. Skipping...
City phumi samraong not found. Skipping...
City amderma not found. Skipping...
City vaitupu not found. Skipping...
City ilinsko-podomskoye not found. Skipping...
City bargal not found. Skipping...
City bur gabo not found. Skipping...
City jujuy not found. Skipping...
City solovetskiy not found. Skipping...
City marcona not found. Skipping...
City mendahara not found. Skipping.

In [12]:
city_data_df = pd.DataFrame(city_data)
city_data_df.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Clyde River,70.4692,-68.5914,26.83,93,90,8.05,CA,2021-06-01 17:32:21
1,Jamestown,42.097,-79.2353,71.1,45,20,14.97,US,2021-06-01 17:24:32
2,Bandarbeyla,9.4942,50.8122,80.31,78,92,24.81,SO,2021-06-01 17:30:50
3,Jaguarari,-10.2639,-40.1958,82.56,41,32,8.03,BR,2021-06-01 17:32:21
4,Dikson,73.5069,80.5464,31.06,99,100,7.99,RU,2021-06-01 17:30:42


In [13]:
new_column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", 
                    "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[new_column_order]
city_data_df.head()

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Clyde River,CA,2021-06-01 17:32:21,70.4692,-68.5914,26.83,93,90,8.05
1,Jamestown,US,2021-06-01 17:24:32,42.097,-79.2353,71.1,45,20,14.97
2,Bandarbeyla,SO,2021-06-01 17:30:50,9.4942,50.8122,80.31,78,92,24.81
3,Jaguarari,BR,2021-06-01 17:32:21,-10.2639,-40.1958,82.56,41,32,8.03
4,Dikson,RU,2021-06-01 17:30:42,73.5069,80.5464,31.06,99,100,7.99


In [15]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")