In [1]:
# to do:
# add a repeatable block to get > 500 records if initial pull did not 
# enhance exceptions handling
# export printing of results into written log txt
# migrate unfound cities and status codes into separate df

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy
import time
import requests
from datetime import datetime
from config import weather_api_key
import string

In [3]:
lats = np.random.uniform(low=-90, high=90, size=1500)
longs = np.random.uniform(low=-180, high=90, size=1500)
lat_longs = zip(lats, longs)
lat_longs

<zip at 0x1655dbe5808>

In [4]:
base_url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&appid=" + weather_api_key

In [5]:
coordinates = list(lat_longs)
cities = list()

for coordinate in coordinates:
    city = citipy.nearest_city(latitude=coordinate[0], longitude=coordinate[1])
    if string.capwords(city.city_name) not in cities:
        cities.append(string.capwords(city.city_name)) # capitalizing all names since they came through in lowercase.  source https://favtutor.com/blogs/capitalize-first-letter-python
        
len(cities)

['Hermanus',
 'Cabo San Lucas',
 'Ushuaia',
 'Fez',
 'Hithadhoo',
 'Namwala',
 'Husavik',
 'Grindavik',
 'Kresttsy',
 'Manavalakurichi',
 'Port Hardy',
 'Tuktoyaktuk',
 'Aklavik',
 'Arraial Do Cabo',
 'Namibe',
 'Bay Roberts',
 'Puerto Ayora',
 'Zhanaozen',
 'Grand Gaube',
 'Vardo',
 'Tasiilaq',
 'Thompson',
 'Ilulissat',
 'Haftoni',
 'Amapa',
 'Rikitea',
 'Punta Arenas',
 'College',
 'Gobabis',
 'Beloha',
 'Upernavik',
 'Kahului',
 'Valparaiso',
 'Cabedelo',
 'Teguldet',
 'Narsaq',
 'Clyde River',
 'Eyl',
 'Mataura',
 'Arlit',
 'Price',
 'Kruisfontein',
 'Port-cartier',
 'Cape Town',
 'Petauke',
 'Illoqqortoormiut',
 'Sainte-anne-des-monts',
 'Busselton',
 'Xuddur',
 'Mindelo',
 'Caraquet',
 'Sao Joao Da Barra',
 'Tatvan',
 'Umzimvubu',
 'Sinnamary',
 'Santa Rosa',
 'Luderitz',
 'Ponta Do Sol',
 'Mahebourg',
 'Lompoc',
 'Kodiak',
 'Bambous Virieux',
 'Bethel',
 'Norman Wells',
 'Henties Bay',
 'Pisco',
 'San Patricio',
 'Oksfjord',
 'Quatre Cocos',
 'Mananjary',
 'Taolanaro',
 'Codrin

In [6]:
city_data = list()


print("Beginning Data Retrieval")
print("-------------------------------------")

record_count = 1
set_count = 1

for i, city in enumerate(cities):
    # group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count+=1
        record_count = 1
        # time.sleep(60) # delay execution for 60 seconds.  But why?
        
    city_url = base_url + "&q=" + city.replace(" ","+")
    
    print(f"Processing Record {record_count} of set {set_count} | {city}")
    
    record_count += 1
    
    try:
        #extract the JSON data using API URL
        city_weather = requests.get(city_url).json()
        # print(city_url)
        # print(city_weather)
        
        #parse out data points
        city_lat = city_weather["coord"]["lat"]
        city_long = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        
        #convert datetime to ISO
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        
        #add to list
        city_data.append({"City": city,
                          "Lat": city_lat,
                          "Long": city_long,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date
                           })
        
        print(f"{city} processed successfully.")
        
    except:
        #Change me later to print out the error!
        
        status_code = requests.get(city_url).status_code
        
        if status_code == 404:
            print(f"ERROR: City {city} not found.")
        else:
            print(f"ERROR: City {city} not processed. StatusCode: {status_code}")
        pass
    
print("-------------------------------------")
print("Data Retrieval Complete!")
print("-------------------------------------")

Beginning Data Retrieval
-------------------------------------
Processing Record 1 of set 1 | Hermanus
Hermanus processed successfully.
Processing Record 2 of set 1 | Cabo San Lucas
Cabo San Lucas processed successfully.
Processing Record 3 of set 1 | Ushuaia
Ushuaia processed successfully.
Processing Record 4 of set 1 | Fez
Fez processed successfully.
Processing Record 5 of set 1 | Hithadhoo
Hithadhoo processed successfully.
Processing Record 6 of set 1 | Namwala
Namwala processed successfully.
Processing Record 7 of set 1 | Husavik
Husavik processed successfully.
Processing Record 8 of set 1 | Grindavik
Grindavik processed successfully.
Processing Record 9 of set 1 | Kresttsy
Kresttsy processed successfully.
Processing Record 10 of set 1 | Manavalakurichi
Manavalakurichi processed successfully.
Processing Record 11 of set 1 | Port Hardy
Port Hardy processed successfully.
Processing Record 12 of set 1 | Tuktoyaktuk
Tuktoyaktuk processed successfully.
Processing Record 13 of set 1 | Ak

In [7]:
city_data_df = pd.DataFrame(city_data)
city_data_df

Unnamed: 0,City,Lat,Long,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Hermanus,-34.4187,19.2345,64.90,89,14,7.52,ZA,2021-12-19 01:30:52
1,Cabo San Lucas,22.8909,-109.9124,73.71,78,20,3.44,MX,2021-12-19 01:36:56
2,Ushuaia,-54.8000,-68.3000,53.26,71,40,24.16,AR,2021-12-19 01:34:21
3,Fez,34.0372,-4.9998,48.31,70,0,5.75,MA,2021-12-19 01:37:28
4,Hithadhoo,-0.6000,73.0833,81.90,72,100,2.62,MV,2021-12-19 01:28:29
...,...,...,...,...,...,...,...,...,...
488,Iquitos,-3.7481,-73.2472,77.04,94,0,0.00,PE,2021-12-19 01:40:40
489,Ippy,6.2679,21.2247,72.28,42,100,3.51,CF,2021-12-19 01:40:41
490,Krasnyy Yar,46.5331,48.3456,35.53,83,100,13.82,RU,2021-12-19 01:40:41
491,Umm Lajj,25.0213,37.2685,65.07,59,2,12.66,SA,2021-12-19 01:40:31


In [8]:
column_order = ["City", "Country", "Date", "Lat", "Long", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]

In [9]:
import os.path
output_data_file = os.path.join("weather_data", "cities.csv")
city_data_df.to_csv(output_data_file, index_label="City_ID")