In [1]:
#Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
from config import weather_api_key

In [2]:
# Base url
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [3]:
#Create set of random coordinates
lats = np.random.uniform(-90, 90, size = 2000)
lngs = np.random.uniform(-180, 180, size = 2000)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x1ad142285c8>

In [4]:
#Add the coordinates to a list
coordinates = list(lat_lngs)

In [5]:
from citipy import citipy

In [6]:
#Create list for citites
cities = []

#Identify nearest city for coordinates
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    #If city is unique, add to list of cities
    if city not in cities: 
        cities.append(city)

#Print length of city list
len(cities)

739

In [None]:
#Import datetime
from datetime import datetime

#Create empty list for weather data
city_data = []

#Print beginning of log
print("Beginning data retrieval")
print("-----------------------------")

#Create counters.
record_count = 1
set_count = 1

#Loop through cities in list 
for i, city in enumerate(cities):
    #Group cities in sets of 50
    if(i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    
    #Create endpoint URL for each city
    city_url = url + "&q=" + city.replace(" ", "+")
    
    #Log URL, record, set num, city
    print(f"Processing record {record_count} of set {set_count} | {city}")
    
    record_count += 1
    
    #Run API req for each city
    try:
    #Parse JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]

        #Convert date to ISO
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')

        #Append city info to city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})
    
    #Skip errors
    except:
        print("City not found. Skipping...")
        pass
    
#Indicate Data Loading complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning data retrieval
-----------------------------
Processing record 1 of set 1 | atuona
Processing record 2 of set 1 | busselton
Processing record 3 of set 1 | katsuura
City not found. Skipping...
Processing record 4 of set 1 | balakhninskiy
Processing record 5 of set 1 | faanui
Processing record 6 of set 1 | tasiilaq
Processing record 7 of set 1 | opuwo
Processing record 8 of set 1 | albany
City not found. Skipping...
Processing record 9 of set 1 | punta arenas


In [None]:
len(city_data)

In [None]:
#Convert l-d to pandas df
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
#Reorder cols
new_col_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[new_col_order]
city_data_df.head(10)

In [None]:
#Create output file path
output_path = "weather_data/cities.csv"

#Export city_data
city_data_df.to_csv(output_path, index_label = "City_ID")