In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from citipy import citipy
import time
from datetime import datetime
import requests
from config import weather_api_key

In [2]:
lats = np.random.uniform(low=-90.000,high=90.000,size=1500)
lngs = np.random.uniform(low=-180.000,high=180.000,size=1500)
lat_lngs = zip(lats,lngs)
lat_lngs

<zip at 0x157ce58f880>

In [3]:
coordinates = list(zip(lats,lngs))

In [4]:
cities = []
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    if city not in cities:
        cities.append(city)
        
len(cities)

623

In [5]:
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [6]:
city_data = []

print('Beginning Data Retrieval')
print('------------------------')

record_count = 1
set_count = 1

Beginning Data Retrieval
------------------------


In [7]:
for i,city in enumerate(cities):
    
    if (i % 50 ==0 and i >=50):
        set_count +=1
        record_count =1
        time.sleep(60)
    
    city_url = url +'&q=' + city.replace(" ","+")
    
    #print(f"Processing Record {record_count} of Set {set_count} | {city}")
    record_count +=1
    
    try:
        city_weather = requests.get(city_url).json()
        city_lat = city_weather['coord']['lat']
        city_lng = city_weather['coord']['lon']
        city_max_temp = city_weather['main']['temp_max']
        city_humidity = city_weather['main']['humidity']
        city_clouds = city_weather['clouds']['all']
        city_wind = city_weather['wind']['speed']
        city_country = city_weather['sys']['country']
        
        city_date = datetime.utcfromtimestamp(city_weather['dt']).strftime('%Y-%m-%d %H:%M:%S')
        
        city_data.append({'City': city.title(),
                         'Lat': city_lat,
                          'Lng': city_lng,
                          'Max Temp': city_max_temp,
                          'Humidity': city_humidity,
                          'Cloudiness': city_clouds,
                          'Wind Speed': city_wind,
                          'Country': city_country,
                          'Date': city_date
                          })
        
    except:
        #print('City not found. Skipping...')
        pass
        
print('------------------------')
print('Data Retrieval Complete ')
print('------------------------')

------------------------
Data Retrieval Complete 
------------------------


In [8]:
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Hobart,-42.8794,147.3294,61.05,89,0,6.91,AU,2022-01-23 20:06:58
1,Olavarria,-36.8927,-60.3225,86.02,44,100,3.2,AR,2022-01-23 20:07:08
2,Tuktoyaktuk,69.4541,-133.0374,6.13,94,100,7.58,CA,2022-01-23 20:07:08
3,Antofagasta,-23.65,-70.4,72.21,57,62,10.27,CL,2022-01-23 20:07:09
4,New Norfolk,-42.7826,147.0587,60.71,74,93,1.97,AU,2022-01-23 20:07:09
5,Qaanaaq,77.484,-69.3632,-5.44,74,100,4.41,GL,2022-01-23 20:07:09
6,Barrow,71.2906,-156.7887,-2.18,78,100,6.91,US,2022-01-23 20:07:00
7,Tocopilla,-22.092,-70.1979,71.64,75,100,10.89,CL,2022-01-23 20:07:10
8,The Valley,18.217,-63.0578,81.1,69,20,9.22,AI,2022-01-23 20:07:10
9,Rikitea,-23.1203,-134.9692,78.48,74,96,15.9,PF,2022-01-23 20:04:19


In [9]:
column_order = ['City','Country','Date','Lat','Lng','Max Temp','Humidity','Cloudiness','Wind Speed']
city_data_df = city_data_df[column_order]
city_data_df.head(10)

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Hobart,AU,2022-01-23 20:06:58,-42.8794,147.3294,61.05,89,0,6.91
1,Olavarria,AR,2022-01-23 20:07:08,-36.8927,-60.3225,86.02,44,100,3.2
2,Tuktoyaktuk,CA,2022-01-23 20:07:08,69.4541,-133.0374,6.13,94,100,7.58
3,Antofagasta,CL,2022-01-23 20:07:09,-23.65,-70.4,72.21,57,62,10.27
4,New Norfolk,AU,2022-01-23 20:07:09,-42.7826,147.0587,60.71,74,93,1.97
5,Qaanaaq,GL,2022-01-23 20:07:09,77.484,-69.3632,-5.44,74,100,4.41
6,Barrow,US,2022-01-23 20:07:00,71.2906,-156.7887,-2.18,78,100,6.91
7,Tocopilla,CL,2022-01-23 20:07:10,-22.092,-70.1979,71.64,75,100,10.89
8,The Valley,AI,2022-01-23 20:07:10,18.217,-63.0578,81.1,69,20,9.22
9,Rikitea,PF,2022-01-23 20:04:19,-23.1203,-134.9692,78.48,74,96,15.9


In [11]:
#create the output file (CSV)
output_data_file = 'weather_data_cities.csv'
#export the city_data into a csv
city_data_df.to_csv(output_data_file, index_label='City_ID')