In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import csv
from scipy.stats import linregress
from pprint import pprint
# Import API key
from config import api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

594

In [80]:
#URL for OpenWeather API
url = "http://api.openweathermap.org/data/2.5/weather?q="
#List to hold API responses
responses = []
#For loop to pull data for each of the first 60 cities in the list
#Put for loop into function for easy calling to not exceed call limit per minute
def api_call(x, y, set):
    count = 1
    for city in cities[x:y]:
        city_url = url + f"{city}&units=imperial&appid={api_key}"
        response = requests.get(city_url).json()
        responses.append(response)
        if response['cod'] == '404':
            print('City not found. Skipping...')
        else:
            print()

In [81]:
#Calling function to get the data over time (placed in different cells to avoid this problem)
api_call(0,60)

In [84]:
#Call for data to city index 120
api_call(60,121)

In [86]:
#to index 181
api_call(121, 182)

In [88]:
#to index 242
api_call(182,243)

In [90]:
#to index 302
api_call(243,303)

In [92]:
#to index 362
api_call(303, 363)

In [94]:
#to index 422
api_call(363, 423)

In [96]:
#to index 482
api_call(423,483)

In [98]:
#to index 542
api_call(483,543)

In [100]:
#to index 594(end)
api_call(543,595)

In [101]:
#check that data was pulled
len(responses)

594

In [102]:
#Create a dataframe of the API data
df = pd.DataFrame(responses)
#drop rows with no data
index_drop = df.loc[df['cod'] == '404'].index.to_list()
df = df.drop(index_drop)
df = df.reset_index(drop=True)
df.head()

Unnamed: 0,coord,weather,base,main,visibility,wind,clouds,dt,sys,timezone,id,name,cod,snow,message,rain
0,"{'lon': -121.42, 'lat': 38.42}","[{'id': 800, 'main': 'Clear', 'description': '...",stations,"{'temp': 294.69, 'feels_like': 292.57, 'temp_m...",16093.0,"{'speed': 3.6, 'deg': 240}",{'all': 1},1583443000.0,"{'type': 1, 'id': 5769, 'country': 'US', 'sunr...",-28800.0,5364271.0,Laguna,200,,,
1,"{'lon': 127.47, 'lat': 63.92}","[{'id': 600, 'main': 'Snow', 'description': 'l...",stations,"{'temp': 258.55, 'feels_like': 251.48, 'temp_m...",,"{'speed': 5.21, 'deg': 102}",{'all': 100},1583443000.0,"{'country': 'RU', 'sunrise': 1583446814, 'suns...",32400.0,2017215.0,Sangar,200,{'3h': 0.25},,
2,"{'lon': 131.55, 'lat': 48.98}","[{'id': 800, 'main': 'Clear', 'description': '...",stations,"{'temp': 259.47, 'feels_like': 255.52, 'temp_m...",,"{'speed': 0.79, 'deg': 323}",{'all': 0},1583443000.0,"{'country': 'RU', 'sunrise': 1583444767, 'suns...",36000.0,2023360.0,Izvestkovyy,200,,,
3,"{'lon': -134.97, 'lat': -23.12}","[{'id': 801, 'main': 'Clouds', 'description': ...",stations,"{'temp': 298.56, 'feels_like': 296.4, 'temp_mi...",,"{'speed': 8.36, 'deg': 96}",{'all': 13},1583443000.0,"{'country': 'PF', 'sunrise': 1583420274, 'suns...",-32400.0,4030556.0,Rikitea,200,,,
4,"{'lon': -156.47, 'lat': 20.89}","[{'id': 803, 'main': 'Clouds', 'description': ...",stations,"{'temp': 296.77, 'feels_like': 298.7, 'temp_mi...",16093.0,"{'speed': 3.6, 'deg': 280}",{'all': 75},1583443000.0,"{'type': 1, 'id': 7875, 'country': 'US', 'sunr...",-36000.0,5847411.0,Kahului,200,,,


In [103]:
#Create a list out of the countries for the cities
countries = []
sys_data = dict(df['sys'])
city_count = 0
for city in sys_data:
    countries.append(sys_data[city_count]['country'])
    city_count += 1
countries

['US',
 'RU',
 'RU',
 'PF',
 'US',
 'AR',
 'RU',
 'US',
 'US',
 'MV',
 'ZA',
 'US',
 'ZA',
 'KZ',
 'KI',
 'CA',
 'IS',
 'NO',
 'MM',
 'US',
 'CL',
 'PE',
 'BR',
 'JP',
 'ZA',
 'FR',
 'VE',
 'GB',
 'MU',
 'ID',
 'CK',
 'BR',
 'RE',
 'ZM',
 'MX',
 'US',
 'CV',
 'ZA',
 'CA',
 'MG',
 'NG',
 'TO',
 'RE',
 'RU',
 'NZ',
 'MU',
 'ID',
 'CL',
 'US',
 'AU',
 'CL',
 'ZA',
 'CA',
 'RU',
 'PF',
 'NO',
 'US',
 'AR',
 'MY',
 'IN',
 'KE',
 'ZA',
 'RU',
 'AU',
 'SO',
 'CA',
 'RU',
 'RU',
 'NA',
 'NA',
 'US',
 'TR',
 'UY',
 'US',
 'RU',
 'MN',
 'ES',
 'MU',
 'ID',
 'IN',
 'BR',
 'ID',
 'FI',
 'JP',
 'SO',
 'TZ',
 'BR',
 'MU',
 'US',
 'RU',
 'EC',
 'WS',
 'RU',
 'PT',
 'EG',
 'NZ',
 'BR',
 'PT',
 'FO',
 'CA',
 'BR',
 'ZA',
 'IS',
 'ZM',
 'KZ',
 'SO',
 'CD',
 'CD',
 'CL',
 'PE',
 'CN',
 'US',
 'ID',
 'RU',
 'NO',
 'GL',
 'IN',
 'CA',
 'GL',
 'NC',
 'BR',
 'AU',
 'NU',
 'CD',
 'US',
 'CO',
 'NZ',
 'BR',
 'RU',
 'AU',
 'RU',
 'RU',
 'RU',
 'US',
 'CA',
 'AU',
 'CA',
 'CL',
 'SA',
 'AU',
 'US',
 'JP',
 'TZ',

In [104]:
#Create a list of the city names
names = df['name'].values
names

array(['Laguna', 'Sangar', 'Izvestkovyy', 'Rikitea', 'Kahului', 'Ushuaia',
       'Provideniya', 'North Bend', 'Albany', 'Hithadhoo',
       'Port Elizabeth', '‘Āhuimanu', 'East London', 'Ordzhonīkīdze',
       'Butaritari', 'Yellowknife', 'Grindavik', 'Vardø', 'Shwebo',
       'Jamestown', 'Coquimbo', 'Paita', 'Itapeva', 'Shingū',
       'Port Alfred', 'Tourlaville', 'San Cristobal', 'Brae',
       'Quatre Cocos', 'Gorontalo', 'Avarua', 'Olinda', 'Saint-Philippe',
       'Kalabo', 'Coahuayana Viejo', 'Nome', 'São Filipe', 'Saldanha',
       'Norman Wells', 'Vangaindrano', 'Yenagoa', 'Vaini', 'Saint-Pierre',
       'Khatanga', 'Mataura', 'Mahébourg', 'Sibolga', 'Punta Arenas',
       'Rockport', 'Busselton', 'Coyhaique', 'Sundumbili', 'Campbellton',
       'Pevek', 'Faanui', 'Båtsfjord', 'Eureka', 'Mar del Plata',
       'George Town', 'Tura', 'Lamu', 'Hermanus', 'Nikolskoye',
       'Codrington', 'Mogadishu', 'Thompson', "Oktyabr'skoye", 'Chara',
       'Walvis Bay', 'Lüderitz', 'West

In [105]:
#Create a list out of the cloudiness for the cities
cloudiness = []
cloud_data = dict(df['clouds'])
city_count = 0
for city in cloud_data:
    cloudiness.append(cloud_data[city_count]['all'])
    city_count += 1
cloudiness

[1,
 100,
 0,
 13,
 75,
 40,
 75,
 1,
 40,
 27,
 90,
 20,
 100,
 0,
 100,
 75,
 20,
 75,
 10,
 1,
 90,
 13,
 100,
 0,
 99,
 90,
 20,
 100,
 75,
 100,
 57,
 0,
 40,
 26,
 75,
 90,
 0,
 18,
 20,
 99,
 58,
 20,
 40,
 100,
 39,
 75,
 100,
 7,
 1,
 56,
 100,
 0,
 69,
 0,
 34,
 75,
 90,
 100,
 20,
 100,
 40,
 0,
 75,
 100,
 13,
 20,
 22,
 18,
 0,
 0,
 1,
 75,
 0,
 90,
 0,
 61,
 75,
 75,
 100,
 100,
 0,
 100,
 75,
 20,
 50,
 4,
 100,
 75,
 1,
 3,
 20,
 100,
 99,
 40,
 0,
 56,
 94,
 40,
 0,
 5,
 63,
 0,
 90,
 100,
 90,
 0,
 78,
 6,
 33,
 87,
 0,
 90,
 40,
 23,
 90,
 79,
 0,
 90,
 75,
 3,
 86,
 75,
 40,
 69,
 1,
 40,
 53,
 20,
 0,
 90,
 99,
 100,
 100,
 1,
 90,
 75,
 27,
 54,
 0,
 75,
 1,
 75,
 76,
 100,
 19,
 15,
 53,
 0,
 1,
 40,
 0,
 75,
 1,
 100,
 2,
 1,
 90,
 1,
 100,
 0,
 75,
 100,
 75,
 100,
 0,
 1,
 90,
 0,
 1,
 0,
 75,
 0,
 97,
 75,
 1,
 20,
 0,
 7,
 100,
 100,
 100,
 95,
 1,
 75,
 20,
 1,
 75,
 1,
 90,
 100,
 5,
 100,
 0,
 100,
 75,
 78,
 75,
 100,
 20,
 100,
 95,
 60,
 75,
 69,
 100,

In [106]:
#Create a list of the dates
dates_unix = df['dt'].values
dates_unix

array([1.58344308e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344310e+09, 1.58344316e+09, 1.58344325e+09, 1.58344320e+09,
       1.58344325e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344302e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344325e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344298e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344325e+09, 1.58344325e+09, 1.58344304e+09, 1.58344325e+09,
       1.58344325e+09, 1.58344325e+09, 1.58344325e+09, 1.58344301e+09,
       1.58344325e+09, 1.58344325e+09, 1.58344325e+09, 1.58344316e+09,
       1.58344325e+09, 1.58344300e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344325e+09, 1.58344325e+09, 1.58344298e+09, 1.58344325e+09,
       1.58344325e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344314e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
       1.58344305e+09, 1.58344325e+09, 1.58344325e+09, 1.58344325e+09,
      

In [107]:
#Create a lists for humidity and max temps
humidity = []
max_temps = []
main_data = dict(df['main'])
city_count = 0
for city in main_data:
    humidity.append(main_data[city_count]['humidity'])
    max_temps.append(main_data[city_count]['temp_max'])
    city_count += 1
print(humidity)
print(max_temps)

[52, 89, 86, 72, 88, 40, 92, 71, 39, 71, 88, 65, 79, 89, 79, 63, 79, 86, 51, 39, 82, 100, 91, 68, 91, 75, 41, 93, 94, 94, 79, 74, 88, 96, 40, 84, 59, 64, 63, 89, 82, 78, 88, 96, 84, 94, 94, 46, 36, 63, 38, 88, 80, 80, 63, 92, 81, 83, 88, 52, 81, 84, 93, 82, 81, 58, 43, 78, 88, 73, 35, 59, 62, 74, 77, 91, 77, 94, 84, 72, 83, 92, 86, 41, 69, 92, 79, 94, 31, 76, 66, 88, 95, 63, 12, 59, 74, 67, 66, 67, 94, 68, 92, 97, 85, 75, 94, 42, 42, 30, 77, 80, 88, 75, 100, 90, 92, 79, 53, 81, 80, 72, 69, 90, 24, 56, 75, 83, 94, 94, 99, 80, 98, 31, 54, 72, 77, 51, 23, 100, 39, 71, 75, 98, 90, 98, 38, 67, 27, 57, 100, 94, 38, 69, 100, 74, 100, 13, 93, 84, 70, 76, 93, 46, 71, 95, 77, 92, 14, 34, 41, 61, 76, 72, 36, 72, 68, 63, 87, 81, 81, 97, 91, 75, 64, 74, 68, 36, 65, 97, 80, 96, 16, 44, 88, 88, 94, 91, 38, 96, 86, 68, 73, 12, 71, 88, 64, 75, 6, 94, 76, 62, 80, 53, 10, 65, 18, 96, 79, 51, 93, 84, 87, 90, 86, 78, 87, 80, 77, 94, 92, 70, 90, 87, 83, 83, 5, 91, 72, 93, 90, 82, 85, 83, 7, 73, 87, 96, 12, 

In [108]:
#Create lists for latitude and logitude
lats = []
lons = []
coord_data = dict(df['coord'])
city_count = 0
for city in coord_data:
    lats.append(coord_data[city_count]['lat'])
    lons.append(coord_data[city_count]['lon'])
    city_count += 1
print(lats)
print(lons)

[38.42, 63.92, 48.98, -23.12, 20.89, -54.8, 64.38, 43.41, 42.6, -0.6, -33.92, 21.44, -33.02, 52.44, 3.07, 62.46, 63.84, 70.37, 22.57, 42.1, -29.95, -5.09, -23.98, 33.73, -33.59, 49.64, 7.77, 60.4, -20.21, 0.54, -21.21, -8.01, -21.36, -14.97, 18.73, 64.5, 14.9, -33.01, 65.28, -23.35, 4.92, -21.2, -21.34, 71.97, -46.19, -20.41, 1.74, -53.15, 28.02, -33.65, -45.58, -29.13, 48.01, 69.7, -16.48, 70.63, 40.8, -38, 5.41, 25.52, -2.27, -34.42, 59.7, -38.27, 2.04, 55.74, 43.06, 56.91, -22.96, -26.65, 40.74, 36.49, -33.7, 22.08, 53.05, 46.68, 29.06, -19.98, 0.13, 29.85, -7.47, 5.56, 63.56, 35.73, 8.41, -4.55, -9.62, -20.52, 47.02, 63.05, -0.74, -13.87, 68.05, 32.67, 24.09, -41.41, -21.64, 38.52, 62.23, 69.45, -3.28, -33.93, 66.04, -13.59, 49.78, 9.49, -4.98, 2.77, -37.62, -13.7, 36.62, 45.02, -8.45, 69.38, 67.67, 77.48, 19.92, 49.87, 69.22, -22.67, -24.79, -42.78, -19.06, -6.47, 39.18, 4.71, -46.28, -22.97, 57.31, -23.85, 65.58, 59.24, 71.92, 41.37, 50.23, -42.88, 52.85, -35.33, 29.97, -34.58, 4

In [109]:
#Create a list for wind speed
wind_speed = []
wind_data = dict(df['wind'])
city_count = 0
for city in wind_data:
    wind_speed.append(wind_data[city_count]['speed'])
    city_count += 1
wind_speed

[3.6,
 5.21,
 0.79,
 8.36,
 3.6,
 5.1,
 2,
 6.7,
 3.1,
 4.37,
 1,
 3.6,
 3.36,
 3.36,
 7.51,
 4.1,
 11.3,
 7.2,
 3.7,
 4.1,
 4.6,
 5.7,
 3.76,
 4.76,
 3.79,
 8.2,
 5.7,
 4.6,
 5.1,
 1.55,
 2.1,
 3.1,
 2.6,
 2.66,
 5.1,
 5.7,
 7.64,
 1.5,
 1.5,
 2.97,
 2.89,
 3.6,
 2.6,
 4.79,
 0.45,
 5.1,
 1.52,
 2.1,
 6.7,
 5.56,
 4.6,
 1,
 6.34,
 4.29,
 1.86,
 6.7,
 2.1,
 5.1,
 1,
 0.95,
 7.17,
 5.36,
 2,
 6.89,
 5.28,
 3.1,
 1.08,
 0.31,
 4.1,
 5.38,
 3.6,
 3.6,
 4.92,
 6.2,
 3,
 2.42,
 8.7,
 5.1,
 4.29,
 3,
 4.1,
 1.34,
 3.6,
 3.6,
 3.78,
 1.77,
 0.43,
 5.1,
 2.1,
 2.71,
 5.7,
 7.6,
 0.83,
 7.2,
 3.27,
 2.1,
 4.1,
 2.1,
 4.3,
 3.6,
 1.21,
 4.6,
 3.6,
 1.89,
 3,
 5.24,
 0.49,
 0.94,
 5.71,
 6.2,
 1.64,
 6.7,
 4.6,
 2.1,
 3.6,
 1.74,
 4.83,
 3.6,
 2.6,
 7.03,
 1.58,
 3.6,
 5.7,
 0.68,
 4.1,
 5.7,
 6.34,
 6.2,
 3.05,
 3.72,
 1.53,
 0.21,
 2.31,
 2.1,
 6.2,
 3.6,
 1.85,
 4.81,
 6.7,
 3.1,
 4.6,
 1.5,
 5.74,
 0.51,
 1.4,
 1.1,
 2.84,
 1.5,
 5.7,
 3.1,
 5.7,
 5.1,
 2.1,
 2.27,
 7.53,
 0.73,
 3.6,
 12.9,


In [110]:
weather_df = pd.DataFrame({"City":names, "Cloudiness":cloudiness, "Country":countries, "Dates":dates_unix, "Humidity":humidity,"Lat":lats, "Lng":lons, "Max Temp":max_temps, "Wind Speed":wind_speed})
weather_df = weather_df.set_index('City')
weather_df.head()

Unnamed: 0_level_0,Cloudiness,Country,Dates,Humidity,Lat,Lng,Max Temp,Wind Speed
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Laguna,1,US,1583443000.0,52,38.42,-121.42,297.04,3.6
Sangar,100,RU,1583443000.0,89,63.92,127.47,258.55,5.21
Izvestkovyy,0,RU,1583443000.0,86,48.98,131.55,259.47,0.79
Rikitea,13,PF,1583443000.0,72,-23.12,-134.97,298.56,8.36
Kahului,75,US,1583443000.0,88,20.89,-156.47,297.15,3.6


In [111]:
#Convert dataframe to csv
weather_df.to_csv(output_data_file, sep=',')