# Collect the Data

## Use NumPy module to generate over 1500 random latitude and longitude pairs

In [1]:
# Import dependencies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from citipy import citipy
import requests
from datetime import datetime
from config import weather_api_key

In [2]:
# Create a set of random latitude and longitude combinations
lats = np.random.uniform(low = -90, high = 90, size = 1500)
lngs = np.random.uniform(low = -180, high = 180, size = 1500)
lats_lngs = zip(lats, lngs)
lats_lngs

<zip at 0x7fd32f0c9d20>

In [3]:
# Add latitude and longitude tuples to list
coordinates = list(lats_lngs)

## Match coordinates to the nearest city using Python's citipy module

In [4]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

641

## Use the OpenWeatherMap API to request the current weather data from each unique city in your list. Parse the JSON data from the API request, and obtain the following data for each city:
    - City, country, and date
    - Latitude and longitude
    - Maximum temperature
    - Humidity
    - Cloudiness
    - Wind speed

## Then add the data to a pandas DataFrame

In [5]:
# Starting URL for Weather Map API Call. Units = Imperial
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key
print(url)

http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=114bba6fa0eed51bc8e39279807587ea


In [None]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city. replace method used to take out blank spaces and concatenate city names.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | morropon
Processing Record 2 of Set 1 | mataura
Processing Record 3 of Set 1 | taolanaro
City not found. Skipping...
Processing Record 4 of Set 1 | kodiak
Processing Record 5 of Set 1 | ushuaia
Processing Record 6 of Set 1 | rikitea
Processing Record 7 of Set 1 | bambous virieux
Processing Record 8 of Set 1 | bengkulu
Processing Record 9 of Set 1 | melendugno
Processing Record 10 of Set 1 | alofi
Processing Record 11 of Set 1 | vredendal
Processing Record 12 of Set 1 | albany
Processing Record 13 of Set 1 | todos santos
Processing Record 14 of Set 1 | yellowknife
Processing Record 15 of Set 1 | kazalinsk
City not found. Skipping...
Processing Record 16 of Set 1 | road town
Processing Record 17 of Set 1 | vila velha
Processing Record 18 of Set 1 | bethel
Processing Record 19 of Set 1 | kapaa
Processing Record 20 of Set 1 | provideniya
Processing Record 21 of Set 1 | aklavik
Processing Record 22 of

Processing Record 38 of Set 4 | maceio
Processing Record 39 of Set 4 | macusani
Processing Record 40 of Set 4 | umzimvubu
City not found. Skipping...
Processing Record 41 of Set 4 | aksarka
Processing Record 42 of Set 4 | amderma
City not found. Skipping...
Processing Record 43 of Set 4 | maniago
Processing Record 44 of Set 4 | tuatapere
Processing Record 45 of Set 4 | halifax
Processing Record 46 of Set 4 | san jeronimo
Processing Record 47 of Set 4 | chadiza
Processing Record 48 of Set 4 | imeni poliny osipenko
Processing Record 49 of Set 4 | kano
Processing Record 50 of Set 4 | georgiyevka
Processing Record 1 of Set 5 | ngukurr
City not found. Skipping...
Processing Record 2 of Set 5 | ariquemes
Processing Record 3 of Set 5 | dayton
Processing Record 4 of Set 5 | gushi
Processing Record 5 of Set 5 | matameye
City not found. Skipping...
Processing Record 6 of Set 5 | vardo
Processing Record 7 of Set 5 | hamilton
Processing Record 8 of Set 5 | port-gentil
Processing Record 9 of Set 5 

Processing Record 23 of Set 8 | tasbuget
City not found. Skipping...
Processing Record 24 of Set 8 | banda aceh
Processing Record 25 of Set 8 | mrirt
City not found. Skipping...
Processing Record 26 of Set 8 | prainha
Processing Record 27 of Set 8 | aras
Processing Record 28 of Set 8 | ulaangom
Processing Record 29 of Set 8 | tura
Processing Record 30 of Set 8 | vilhena
Processing Record 31 of Set 8 | peleduy
Processing Record 32 of Set 8 | gwadar
Processing Record 33 of Set 8 | attawapiskat
City not found. Skipping...
Processing Record 34 of Set 8 | pombas
Processing Record 35 of Set 8 | luganville
Processing Record 36 of Set 8 | galesong
Processing Record 37 of Set 8 | myskhako
Processing Record 38 of Set 8 | ocos
Processing Record 39 of Set 8 | dunmore east
Processing Record 40 of Set 8 | minuri
Processing Record 41 of Set 8 | mirnyy
Processing Record 42 of Set 8 | galiwinku
City not found. Skipping...
Processing Record 43 of Set 8 | vaitupu
City not found. Skipping...
Processing Re

In [None]:
# Check to ensure data from at least 500 cities has been collected.
len(city_data)

In [None]:
# Convert city_data to a dataframe
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
# reorder the columns as City, Country, Date, Lat, Lng, Max Temp, Humidity, Cloudiness, and Wind Speed
new_column_order = ['City', 'Country', 'Date', 'Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed']
city_data_df = city_data_df[new_column_order]
city_data_df.head()

In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")