In [1]:
# Import dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy
from config import weather_api_key
import time
from datetime import datetime
import requests

In [2]:
# Create a set of random latitude and longitude combinations
lats = np.random.uniform(low=-90.000,high=90.000,size=1500)
lngs = np.random.uniform(low=-180.000,high=180.000,size=1500)
coordinates = list(zip(lats,lngs))
coordinates[:10]


[(-44.48389565305699, -17.07464308505604),
 (8.04953691830427, -93.38900568270421),
 (-31.92119666305215, 132.97056136839853),
 (72.68822683284156, -179.21013535470837),
 (3.0488551275202838, 111.05632418144717),
 (-31.567860621070594, -144.0127993718339),
 (13.24736348133294, -166.10729865631023),
 (-85.3882826203581, 16.44565571832419),
 (74.27150415119948, -22.13771380702292),
 (-14.594253336989397, -23.088321487219815)]

In [3]:
# Create a list for holding the cities.
cities = []

# Identify the nearest city for each latitude and longitude combination
for coord in coordinates:
    city = citipy.nearest_city(coord[0],coord[1]).city_name
    
    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
num_cities = len(cities)
num_cities

604

In [4]:
# Create an empty lkist to hold the weather data.
city_data = []

# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters for records, sets
record_count = 1
set_count = 1

# Basic URL with API key
url = "https://api.openweathermap.org/data/2.5/weather?units=Imperial&appid=" + weather_api_key

Beginning Data Retrieval     
-----------------------------


In [5]:
# Loop through all the cities in our list.
for ii, city in enumerate(cities):
    
    # Group cities in set of 50 for logging purposes.
    if (ii % 50 == 0 and ii >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)
    
    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")
    
    # Log the URL, record and set numbers and the city.
    print(f'Processing record {record_count} of Set {set_count} | {city}')
    
    # Add 1 to the record count.
    record_count += 1
    
    # Run an API request for each of the cities.
    try:
        
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()

        # Parse out the needed data.
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']

        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather['dt']).strftime('%Y-%m-%d %H:%M:%S')

        # Append the city information into the city_data list.
        city_data.append({"City": city.title(),
                         "Lat": city_lat,
                         "Lng": city_lng,
                         "Max Temp": city_max_temp,
                         "Humidity": city_humidity,
                         "Cloudiness": city_clouds,
                         "Wind Speed": city_wind,
                         "Country": city_country,
                         "Date": city_date})
    
    # If and error is experienced, skip the city
    except:
        print("City not found. Skipping...")
    
# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

# Check the number of cities for which there are data.
num_proc_cities = len(city_data)
print(f"Found data for {num_proc_cities} cities out of {num_cities} total cities.")

Processing record 1 of Set 1 | jamestown
Processing record 2 of Set 1 | san jose
Processing record 3 of Set 1 | flinders
Processing record 4 of Set 1 | mys shmidta
City not found. Skipping...
Processing record 5 of Set 1 | sibu
Processing record 6 of Set 1 | mataura
Processing record 7 of Set 1 | kapaa
Processing record 8 of Set 1 | bredasdorp
Processing record 9 of Set 1 | illoqqortoormiut
City not found. Skipping...
Processing record 10 of Set 1 | georgetown
Processing record 11 of Set 1 | burnie
Processing record 12 of Set 1 | honiara
Processing record 13 of Set 1 | vaitupu
City not found. Skipping...
Processing record 14 of Set 1 | stepnogorsk
Processing record 15 of Set 1 | airai
Processing record 16 of Set 1 | busselton
Processing record 17 of Set 1 | souillac
Processing record 18 of Set 1 | geraldton
Processing record 19 of Set 1 | amga
Processing record 20 of Set 1 | severo-kurilsk
Processing record 21 of Set 1 | ushuaia
Processing record 22 of Set 1 | sioux lookout
Processing 

Processing record 34 of Set 4 | sorkjosen
Processing record 35 of Set 4 | maykor
Processing record 36 of Set 4 | port alfred
Processing record 37 of Set 4 | sentyabrskiy
City not found. Skipping...
Processing record 38 of Set 4 | sai buri
Processing record 39 of Set 4 | vao
Processing record 40 of Set 4 | sao filipe
Processing record 41 of Set 4 | arys
Processing record 42 of Set 4 | laguna
Processing record 43 of Set 4 | puerto escondido
Processing record 44 of Set 4 | san cristobal
Processing record 45 of Set 4 | esperance
Processing record 46 of Set 4 | gushikawa
Processing record 47 of Set 4 | caucaia
Processing record 48 of Set 4 | champasak
Processing record 49 of Set 4 | sharlyk
Processing record 50 of Set 4 | pangkalanbuun
Processing record 1 of Set 5 | shakhtinsk
Processing record 2 of Set 5 | nouadhibou
Processing record 3 of Set 5 | bhatkal
Processing record 4 of Set 5 | yanam
Processing record 5 of Set 5 | qujing
Processing record 6 of Set 5 | nanortalik
Processing record 7

Processing record 24 of Set 8 | orlik
Processing record 25 of Set 8 | tuatapere
Processing record 26 of Set 8 | lisakovsk
Processing record 27 of Set 8 | oranjemund
Processing record 28 of Set 8 | ventersburg
Processing record 29 of Set 8 | cairns
Processing record 30 of Set 8 | starominskaya
Processing record 31 of Set 8 | jacksonville
Processing record 32 of Set 8 | srednekolymsk
Processing record 33 of Set 8 | curup
Processing record 34 of Set 8 | zhanatas
City not found. Skipping...
Processing record 35 of Set 8 | amnat charoen
Processing record 36 of Set 8 | spornoye
Processing record 37 of Set 8 | rockhampton
Processing record 38 of Set 8 | taldan
Processing record 39 of Set 8 | hailar
Processing record 40 of Set 8 | takab
Processing record 41 of Set 8 | tancheng
Processing record 42 of Set 8 | moose factory
Processing record 43 of Set 8 | mikhaylov
Processing record 44 of Set 8 | port-gentil
Processing record 45 of Set 8 | sibolga
Processing record 46 of Set 8 | saint-augustin
P

Processing record 15 of Set 12 | bur gabo
City not found. Skipping...
Processing record 16 of Set 12 | lasa
Processing record 17 of Set 12 | sur
Processing record 18 of Set 12 | staroaleyskoye
Processing record 19 of Set 12 | tabon
Processing record 20 of Set 12 | miquelon
Processing record 21 of Set 12 | sabinas
Processing record 22 of Set 12 | springbok
Processing record 23 of Set 12 | nueva gerona
Processing record 24 of Set 12 | vardo
Processing record 25 of Set 12 | char bhadrasan
Processing record 26 of Set 12 | tornio
Processing record 27 of Set 12 | mehriz
Processing record 28 of Set 12 | kruisfontein
Processing record 29 of Set 12 | verkhniy baskunchak
Processing record 30 of Set 12 | talara
Processing record 31 of Set 12 | conceicao das alagoas
Processing record 32 of Set 12 | blagoyevo
Processing record 33 of Set 12 | sayat
Processing record 34 of Set 12 | sola
Processing record 35 of Set 12 | muros
Processing record 36 of Set 12 | asfi
Processing record 37 of Set 12 | manuk

In [6]:
# Convert city data to data frame
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Jamestown,42.097,-79.2353,39.18,34,0,19.57,US,2022-11-21 18:39:02
1,San Jose,37.3394,-121.895,64.42,49,75,5.01,US,2022-11-21 18:40:06
2,Flinders,-34.5833,150.8552,53.19,58,2,11.99,AU,2022-11-21 18:40:46
3,Sibu,2.3,111.8167,77.31,88,40,5.01,MY,2022-11-21 18:40:47
4,Mataura,-46.1927,168.8643,50.49,84,57,3.06,NZ,2022-11-21 18:40:47
5,Kapaa,22.0752,-159.319,77.47,77,100,19.57,US,2022-11-21 18:40:48
6,Bredasdorp,-34.5322,20.0403,61.25,70,100,4.16,ZA,2022-11-21 18:40:48
7,Georgetown,5.4112,100.3354,82.24,88,40,2.3,MY,2022-11-21 18:40:49
8,Burnie,-41.0667,145.9167,40.95,87,98,13.38,AU,2022-11-21 18:40:50
9,Honiara,-9.4333,159.95,76.21,84,56,5.55,SB,2022-11-21 18:40:50


In [7]:
# Reorder the columns in the data frame
new_column_order = ['City','Country','Date','Lat','Lng','Max Temp','Humidity','Cloudiness','Wind Speed']
city_data_df = city_data_df[new_column_order]
city_data_df.head(10)

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Jamestown,US,2022-11-21 18:39:02,42.097,-79.2353,39.18,34,0,19.57
1,San Jose,US,2022-11-21 18:40:06,37.3394,-121.895,64.42,49,75,5.01
2,Flinders,AU,2022-11-21 18:40:46,-34.5833,150.8552,53.19,58,2,11.99
3,Sibu,MY,2022-11-21 18:40:47,2.3,111.8167,77.31,88,40,5.01
4,Mataura,NZ,2022-11-21 18:40:47,-46.1927,168.8643,50.49,84,57,3.06
5,Kapaa,US,2022-11-21 18:40:48,22.0752,-159.319,77.47,77,100,19.57
6,Bredasdorp,ZA,2022-11-21 18:40:48,-34.5322,20.0403,61.25,70,100,4.16
7,Georgetown,MY,2022-11-21 18:40:49,5.4112,100.3354,82.24,88,40,2.3
8,Burnie,AU,2022-11-21 18:40:50,-41.0667,145.9167,40.95,87,98,13.38
9,Honiara,SB,2022-11-21 18:40:50,-9.4333,159.95,76.21,84,56,5.55


In [8]:
# Create output CSV
output_csv_file = "weather_data/cities.csv"

# Save the csv file so we don't have to run all these API requests again!
city_data_df.to_csv(output_csv_file,index_label='City ID')