In [6]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from pprint import pprint
import datetime
import matplotlib.dates as mdate


from api_keys import api_key

from citipy import citipy

output_data_file = "output_data/cities.csv"

lat_range = (-90, 90)
lng_range = (-180, 180)

In [7]:
lat_lngs = []
cities = []
country_codes = []

lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)


for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1])
    city_name = city.city_name
    country_code = city.country_code
    
    if city_name not in cities:
        cities.append(city_name)
        country_codes.append(country_code)
        
print('The above script has generated {} different cities using random numbers.'.format(len(set(cities))))

The above script has generated 601 different cities using random numbers.


In [9]:
df_countries = pd.DataFrame({'City': cities, 'Country': country_codes})

df_countries['Latitude'] = ''
df_countries['Longitude'] = ''
df_countries['Max Temperature (F)'] = ''
df_countries['Humidity (%)'] = ''
df_countries['Cloudiness (%)'] = ''
df_countries['Wind Speed (mph)'] = ''
df_countries['Date'] = ''

city_count = 0


api_call_count = 1 
sets = 0 
t0 = time.time()


for index,row in df_countries.iterrows():
    city_name = row['City']
    country_id = row['Country']
    
    search_parameters = {
        "q": str(city_name)+","+str(country_id),
        "units": "IMPERIAL",
        "mode": "json",
        "APPID": api_key
    }
 
    target_url = "http://api.openweathermap.org/data/2.5/weather"
    country_info = requests.get(target_url, params = search_parameters).json()
    
    try:
        df_countries.loc[index,'Latitude'] = country_info['coord']['lat']
        df_countries.loc[index,'Longitude'] = country_info['coord']['lon']
        df_countries.loc[index,'Max Temperature (F)'] = country_info['main']['temp_max']
        df_countries.loc[index,'Humidity (%)'] = country_info['main']['humidity']
        df_countries.loc[index,'Cloudiness (%)'] = country_info['clouds']['all']
        df_countries.loc[index,'Wind Speed (mph)'] = country_info['wind']['speed']
        df_countries.loc[index, 'Date'] = country_info['dt']
        
        print(f'Processing Record {api_call_count} of Set {sets} | {city_name}' )
        
        city_count = city_count + 1 
        
    except KeyError:
        df_countries.loc[index,'Latitude'] = np.nan
        df_countries.loc[index,'Longitude'] = np.nan
        df_countries.loc[index,'Max Temperature (F)'] = np.nan
        df_countries.loc[index,'Humidity (%)'] = np.nan
        df_countries.loc[index,'Cloudiness (%)'] = np.nan
        df_countries.loc[index,'Wind Speed (mph)'] = np.nan
        df_countries.loc[index,'Date'] = np.nan
        print(f"Missing weather information for city named: {city_name} ...hence skipping")
    api_call_count = api_call_count + 1        
    
    if api_call_count == 51: # loop to limit 50 API calls per minute. The OpenWeatherMap API limit is 60 calls/min
        t1 = time.time()
        api_call_count = 1 # reset it for next 50 calls
        sets = sets+1
        time.sleep(60-(t1-t0))
        t0 = time.time()
print(f"Processing successfully completed. Retrieved weather information for {city_count} cities")

Processing Record 1 of Set 0 | hambantota
Processing Record 2 of Set 0 | rumonge
Missing weather information for city named: airai ...hence skipping
Processing Record 4 of Set 0 | yellowknife
Processing Record 5 of Set 0 | punta arenas
Processing Record 6 of Set 0 | vaini
Processing Record 7 of Set 0 | butaritari
Processing Record 8 of Set 0 | new norfolk
Processing Record 9 of Set 0 | ushuaia
Processing Record 10 of Set 0 | port elizabeth
Processing Record 11 of Set 0 | quatre cocos
Processing Record 12 of Set 0 | bagan
Processing Record 13 of Set 0 | puerto madryn
Processing Record 14 of Set 0 | balikpapan
Processing Record 15 of Set 0 | chokurdakh
Processing Record 16 of Set 0 | klyuchi
Processing Record 17 of Set 0 | flinders
Processing Record 18 of Set 0 | lagoa
Processing Record 19 of Set 0 | puerto ayora
Processing Record 20 of Set 0 | santa rosa
Processing Record 21 of Set 0 | kodiak
Processing Record 22 of Set 0 | erzin
Processing Record 23 of Set 0 | wilhelmsburg
Processing R

KeyboardInterrupt: 

In [None]:
df_countries['Latitude'] = pd.to_numeric(df_countries['Latitude'])
df_countries['Longitude'] = pd.to_numeric(df_countries['Longitude'])
df_countries['Max Temperature (F)'] = pd.to_numeric(df_countries['Max Temperature (F)'])
df_countries['Humidity (%)'] = pd.to_numeric(df_countries['Humidity (%)'])
df_countries['Cloudiness (%)'] = pd.to_numeric(df_countries['Cloudiness (%)'])
df_countries['Wind Speed (mph)'] = pd.to_numeric(df_countries['Wind Speed (mph)'])

df_countries = df_countries.dropna()

df_countries = df_countries[["City", "Cloudiness (%)", "Country", "Date", "Humidity (%)","Latitude","Longitude",
                       "Max Temperature (F)", "Wind Speed (mph)"]]

print(df_countries.count()) 
print(df_countries.head(20)) 
df_countries.dtypes 
df_countries.to_csv(output_data_file, encoding="utf-8", index=False)

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(df_countries["Latitude"], df_countries["Max Temperature (F)"], color='b', alpha=0.5)
plt.grid()
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title(f'City Latitude vs Max Temperature ({datetime.datetime.now().strftime("%D")})')
plt.savefig("./output_data/LatVsMaxTemp.png")
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(df_countries["Latitude"], df_countries["Humidity (%)"], color='blue', alpha=0.5)
plt.ylim(10,120)
plt.grid()
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f'City Latitude vs Humidity ({datetime.datetime.now().strftime("%D")})')
plt.savefig("./output_data/LatVsHumidity.png")
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(df_countries["Latitude"], df_countries["Cloudiness (%)"], color='b', alpha=0.5)
plt.grid()
plt.ylim(-20,(df_countries["Cloudiness (%)"].max()+10))
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title(f'City Latitude vs Cloudiness ({datetime.datetime.now().strftime("%D")})')
plt.savefig("./output_data/LatVsCloudiness.png")
plt.show()

In [None]:
plt.figure(figsize=(10,6))
plt.scatter(df_countries["Latitude"], df_countries["Wind Speed (mph)"], color='blue', alpha=0.5)
plt.grid()
plt.ylim((df_countries["Wind Speed (mph)"].min()-5),(df_countries["Wind Speed (mph)"].max()+5))
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f'City Latitude vs Wind Speed ({datetime.datetime.now().strftime("%D")})')
plt.savefig("./output_data/LatVsWind.png")
plt.show()