In [10]:
# Dependencies
import json
import requests
from config import api_key
import pandas as pd
import numpy as np
from citipy import citipy

output_data_file = os.path.join("output_data","cities.csv")

In [2]:
# Generate cities list

# List for holding lat_lngs and cities
lat_long = []
cities = []

# Create a set of lat and long combinations
lat = np.random.uniform(low=-90.000, high=90.000, size=1500)
lng = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_long = zip(lat, lng)

# Identify nearest city for each lat, lng combination
for lat_long in lat_long:
    city = citipy.nearest_city(lat_long[0], lat_long[1]).city_name
    
# If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

605

In [3]:
# Perform a weather check on each city using a series of successive API calls.
# Include a print log of each city as it's being processed (with the city number and city name).

# Save config information
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Build query URL
query_url = f"{url}appid={api_key}&units={units}&q="

In [5]:
# set up lists to hold in the dataframe
citylist = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

record_count = 0
set_count = 1

for city in cities:
    try:
        response = requests.get(query_url + city).json()
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        if record_count > 49:
            record_count = 1
            set_count += 1
            citylist.append(city)
        else:
            record_count += 1
            citylist.append(city)
        print(f"Processing Record {record_count} of Set {set_count} | {city}")
    except Exception:
        print("City not found. Skipping...")
print("-----------------------")
print("Data Retrieval Complete")      
print("-----------------------")

Processing Record 50 of Set 2 | laguna
Processing Record 1 of Set 3 | kenai
Processing Record 2 of Set 3 | hofn
Processing Record 3 of Set 3 | marawi
Processing Record 4 of Set 3 | georgetown
Processing Record 5 of Set 3 | hasaki
Processing Record 6 of Set 3 | mgandu
Processing Record 7 of Set 3 | asosa
Processing Record 8 of Set 3 | fare
Processing Record 9 of Set 3 | praia da vitoria
Processing Record 10 of Set 3 | asau
Processing Record 11 of Set 3 | oga
Processing Record 12 of Set 3 | swan river
Processing Record 13 of Set 3 | ilulissat
Processing Record 14 of Set 3 | scalea
Processing Record 15 of Set 3 | arraial do cabo
City not found. Skipping...
City not found. Skipping...
Processing Record 16 of Set 3 | hay river
City not found. Skipping...
Processing Record 17 of Set 3 | tshikapa
Processing Record 18 of Set 3 | chuy
Processing Record 19 of Set 3 | hermanus
Processing Record 20 of Set 3 | touros
City not found. Skipping...
Processing Record 21 of Set 3 | bathsheba
Processing R

In [12]:
# Convert Raw Data to DataFrame
weather_df = pd.DataFrame({
     'City': citylist,
     'Lat': lat,
     'Lng': lng,
     'Max Temperature': max_temp,
     'Humidity': humidity,
     'Cloudiness': cloudiness,
     'Wind Speed': wind_speed,
     'Country': country,
     'Date': date,
     })

# Display the DataFrame
weather_df

Unnamed: 0,City,Lat,Lng,Max Temperature,Humidity,Cloudiness,Wind Speed,Country,Date
0,mehamn,71.0357,27.8492,-3.00,93,75,12.86,NO,1611739560
1,jamestown,42.0970,-79.2353,-3.89,93,90,4.07,US,1611739560
2,yellowknife,62.4560,-114.3525,-22.78,70,90,1.03,CA,1611739440
3,bluff,-46.6000,168.3333,14.44,75,3,4.92,NZ,1611739560
4,qaanaaq,77.4840,-69.3632,-26.39,63,0,2.73,GL,1611739561
...,...,...,...,...,...,...,...,...,...
559,kedrovyy,56.1667,91.8167,-23.00,77,90,2.00,RU,1611739640
560,sur,22.5667,59.5289,23.22,57,0,5.71,OM,1611739640
561,rudraprayag,30.2833,78.9833,23.67,14,0,3.00,IN,1611739640
562,mayskiy,47.6931,40.1025,1.00,93,20,8.00,RU,1611739640


In [11]:
# Export the city data into a .csv.
weather_df.to_csv(output_data_file)

In [None]:
# Inspect the data and remove the cities where the humidity > 100%.
# Skip this step if there are no cities that have humidity > 100%.

In [22]:
weather_df["Humidity"].describe()

count    564.000000
mean      72.539007
std       21.965343
min        7.000000
25%       63.000000
50%       78.000000
75%       88.000000
max      100.000000
Name: Humidity, dtype: float64