In [3]:
# Import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [4]:
## Generate city lists

# Create lists to hold lattitude/longitude data and cities
lat_lngs = []
cities = []

# Create a set of random latitude and longitude combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat/lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm there are at least 500
len(cities)

629

In [5]:
#Set Up using information from OpenWeatherAPI documentation to prepare for calls
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

#Create partial query URL to use for calls 
query_url = f"{url}appid={weather_key}&units={units}&q="

In [6]:
#Test API on one city in list to see how the information is printed out
test_response = requests.get(query_url + cities[0])
test_json = test_response.json()

#pretty print test_json to get a picture of the API output so you know what to call
import pprint as pp

pp.pprint(test_json)

{'base': 'stations',
 'clouds': {'all': 50},
 'cod': 200,
 'coord': {'lat': -17.3383, 'lon': -49.9314},
 'dt': 1611594493,
 'id': 3464363,
 'main': {'feels_like': 32.68,
          'grnd_level': 948,
          'humidity': 43,
          'pressure': 1010,
          'sea_level': 1010,
          'temp': 31.85,
          'temp_max': 31.85,
          'temp_min': 31.85},
 'name': 'Edéia',
 'sys': {'country': 'BR', 'sunrise': 1611565425, 'sunset': 1611612010},
 'timezone': -10800,
 'visibility': 10000,
 'weather': [{'description': 'scattered clouds',
              'icon': '03d',
              'id': 802,
              'main': 'Clouds'}],
 'wind': {'deg': 10, 'speed': 2.62}}


In [15]:
#Create empty lists to receive temperature, humidity, cloudiness, and wind speed
temp = []
humidity = []
cloudiness = []
wind_speed = []
cities_found = []

print("Beginning Data Retrieval")
print("-------------------------")
#Loop through the list of random cities and perform a request for data on each
for city in cities:
    print(f"Processing {city}")
    response = requests.get(query_url + city).json()

    #If city found, add data to list
    try:
        temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        cities_found.append(city)
    except:
        print(f"{city} not found. Skipping...")
    


Beginning Data Retrieval
-------------------------
Processing edeia
Processing cuamba
Processing georgetown
Processing kirakira
Processing mataura
Processing albany
Processing ushuaia
Processing tanout
Processing moussoro
Processing yellowknife
Processing marica
Processing chara
Processing upernavik
Processing sola
Processing zhoucheng
Processing samusu
samusu not found. Skipping...
Processing cape town
Processing middletown
Processing tacoronte
Processing mutata
Processing acheng
Processing victoria
Processing lavrentiya
Processing horodnya
Processing chokurdakh
Processing cap malheureux
Processing vaini
Processing avarua
Processing saleaula
saleaula not found. Skipping...
Processing port elizabeth
Processing esperance
Processing busselton
Processing dana point
Processing madaoua
Processing alice springs
Processing illoqqortoormiut
illoqqortoormiut not found. Skipping...
Processing lubana
Processing tiksi
Processing dikson
Processing cabo san lucas
Processing qaanaaq
Processing carnar

Processing kieta
Processing qaqortoq
Processing chumikan
Processing comodoro rivadavia
Processing tessalit
Processing hovd
Processing shestakovo
Processing boysun
Processing nioro
Processing ambilobe
Processing chernyakhovsk
Processing pato branco
Processing hokitika
Processing akhtanizovskaya
Processing nouadhibou
Processing yuanping
Processing otane
Processing kimbe
Processing kokoda
Processing khasan
Processing mezen
Processing buin
Processing contamana
Processing vardo
Processing manokwari
Processing ambon
Processing bulgan
Processing viedma
Processing hambantota
Processing benghazi
Processing castries
Processing nishihara
Processing puerto escondido
Processing muros
Processing arkansas city
Processing acin
acin not found. Skipping...
Processing temaraia
temaraia not found. Skipping...
Processing yanan
yanan not found. Skipping...
Processing tazovskiy
Processing kano
Processing mago
Processing marsa matruh
Processing ponta delgada
Processing ciudad bolivar
Processing scottsburgh
sc

In [16]:
#export city data into csv file


#create dataframe of city data
city_data_df = pd.DataFrame({"City": cities_found, "Lat": lat_lng[0], "Lng": lat_lng[1], "Max Temp": temp,
                            "Humidity": humidity, "Cloudiness": cloudiness, "Wind Speed": wind_speed})
city_data_df
#still need country and date columns

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,edeia,24.086336,-92.15157,31.85,43,50,2.62
1,cuamba,24.086336,-92.15157,22.35,91,94,0.64
2,georgetown,24.086336,-92.15157,26.67,94,40,2.06
3,kirakira,24.086336,-92.15157,26.65,82,100,3.62
4,mataura,24.086336,-92.15157,15.56,73,98,0.89
...,...,...,...,...,...,...,...
573,calama,24.086336,-92.15157,23.00,13,40,9.26
574,port blair,24.086336,-92.15157,24.50,74,0,3.03
575,jamame,24.086336,-92.15157,24.40,77,52,4.66
576,konevo,24.086336,-92.15157,1.87,96,100,4.15


In [20]:
#get summary statistics data frame on Lat, Lng, Max Temp, Humidity, Cloudiness, Wind Speed, Date

lat_summary = city_data_df["Lat"].describe()
lng_summary = city_data_df["Lng"].describe()
max_temp_summary = city_data_df["Max Temp"].describe()
humidity_summary = city_data_df["Humidity"].describe()
cloudiness_summary = city_data_df["Cloudiness"].describe()
wind_speed_summary = city_data_df["Wind Speed"].describe()
#date_summary = city_data_df["Date"].describe()

summary_df = pd.DataFrame({"Lat":lat_summary,"Lng":lng_summary,"Max Temp": max_temp_summary,
                          "Humidity":humidity_summary,"Cloudiness":cloudiness_summary,"Wind Speed":wind_speed_summary})

summary_df

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
count,578.0,578.0,578.0,578.0,578.0,578.0
mean,24.08634,-92.15157,9.53154,72.250865,55.754325,3.766332
std,1.315643e-13,2.986864e-13,19.455436,20.727439,38.052985,2.732542
min,24.08634,-92.15157,-51.0,8.0,0.0,0.07
25%,24.08634,-92.15157,-0.6425,62.0,20.0,1.6425
50%,24.08634,-92.15157,16.0,78.0,69.0,3.09
75%,24.08634,-92.15157,25.0,87.0,90.0,5.14
max,24.08634,-92.15157,38.17,100.0,100.0,15.3


In [28]:
#Inspect data and skip cities with humidity > 100 
big_humid_indx = city_data_df.index[city_data_df["Humidity"]>100]
big_humid_indx

#if no city has humidity > 100, skip this step
#create new data frame
#clean_city_df = city_data_df.drop(big_humid)

Int64Index([], dtype='int64')