In [217]:
pip install citipy




In [218]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import requests
import time
import json
from scipy.stats import linregress

#import api key from configs.py file
from configs import api_key

#city data based on lat. and long.
from citipy import citipy

#output file (csv)
output_data_file = "output_data/cities.csv"

#Range of latitudes and logitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [230]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

#np.random.seed(1)
# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

616

In [231]:
#set up dataframe and create empty columns for the data to be entered into
cities_df = pd.DataFrame({"City": cities})
cities_df["Lat"] = ""
cities_df["Lng"] = ""
cities_df["Max Temp"] = ""
cities_df["Humidity"] = ""
cities_df["Cloudiness"] = ""
cities_df["Wind Speed"] = ""
cities_df["Country"] = ""
cities_df["Date"] = ""

#base url ready for the city name to be added to the end
base_url = f'http://api.openweathermap.org/data/2.5/weather?appid={api_key}&q='

#starting data retrieval
print("Beginning Data Retrieval")
print("-----------------------------")
#go through cities_df by city name and start entering in the info from the API
for index, row in cities_df.iterrows():
    #get city from df 
    city_name = row["City"]
    #assemble url and make api call
    response = requests.get(base_url + city_name).json()
    
    #try to fill in the df, else print skipping
    try:
        cities_df.loc[index, 'Lat'] = response['coord']['lat']
        cities_df.loc[index, 'Lng'] = response['coord']['lon']
        cities_df.loc[index, 'Max Temp'] = response['main']['temp_max']
        cities_df.loc[index, 'Humidity'] = response['main']['humidity']
        cities_df.loc[index, 'Cloudiness'] = response['weather'][0]['main']
        cities_df.loc[index, 'Wind Speed'] = response['wind']['speed']
        cities_df.loc[index, 'Country'] = response['sys']['country']
        cities_df.loc[index, 'Date'] = response['dt']
        print(f'Processing record {index} | {city_name}')
    except (KeyError, IndexError):
        print("City not found. Skipping...")
        #delete that row from df
        cities_df = cities_df.drop([index])

print("-----------------------------")        
print("Data Retrieval Complete")
print("-----------------------------")

Beginning Data Retrieval
-----------------------------
Processing record 0 | sergeyevka
Processing record 1 | albany
Processing record 2 | ushuaia
City not found. Skipping...
Processing record 4 | khatanga
City not found. Skipping...
Processing record 6 | kapaa
Processing record 7 | ancud
Processing record 8 | cherskiy
Processing record 9 | shingu
Processing record 10 | hamilton
Processing record 11 | hobart
Processing record 12 | vestmanna
Processing record 13 | swift current
Processing record 14 | evans
Processing record 15 | mason city
Processing record 16 | mataura
Processing record 17 | vila franca do campo
Processing record 18 | sajanan
Processing record 19 | bredasdorp
Processing record 20 | carman
City not found. Skipping...
Processing record 22 | rikitea
Processing record 23 | sterling
Processing record 24 | flinders
Processing record 25 | leningradskiy
Processing record 26 | vaini
Processing record 27 | upernavik
Processing record 28 | saldanha
Processing record 29 | lukovets

Processing record 255 | bafoulabe
Processing record 256 | dafeng
Processing record 257 | codrington
Processing record 258 | fukue
Processing record 259 | liuzhou
Processing record 260 | manali
City not found. Skipping...
Processing record 262 | nevelsk
Processing record 263 | ryotsu
Processing record 264 | berbera
Processing record 265 | marsh harbour
Processing record 266 | cairns
Processing record 267 | boysun
Processing record 268 | iquique
Processing record 269 | kieta
Processing record 270 | egvekinot
Processing record 271 | srednekolymsk
Processing record 272 | colwyn bay
Processing record 273 | kutum
Processing record 274 | hasaki
Processing record 275 | trat
Processing record 276 | barcelos
Processing record 277 | vanavara
Processing record 278 | romny
City not found. Skipping...
Processing record 280 | herzberg
Processing record 281 | qui nhon
Processing record 282 | mao
Processing record 283 | mareeba
Processing record 284 | constantine
Processing record 285 | pangnirtung
Pro

Processing record 509 | malinovskiy
Processing record 510 | huangnihe
Processing record 511 | arys
Processing record 512 | talcahuano
Processing record 513 | gold coast
Processing record 514 | salaga
Processing record 515 | pozarevac
Processing record 516 | tomatlan
Processing record 517 | westport
Processing record 518 | cerqueira cesar
Processing record 519 | tornio
City not found. Skipping...
Processing record 521 | linxia
Processing record 522 | namibe
Processing record 523 | sirsilla
Processing record 524 | asau
Processing record 525 | presidencia roque saenz pena
Processing record 526 | constitucion
Processing record 527 | cabatuan
Processing record 528 | arona
Processing record 529 | yaritagua
Processing record 530 | tiarei
Processing record 531 | kolokani
Processing record 532 | letpadan
Processing record 533 | boyolangu
Processing record 534 | itarema
Processing record 535 | rio branco
Processing record 536 | berlevag
Processing record 537 | moshenskoye
Processing record 538 |

In [221]:
#export data to csv
cities_df.to_csv('../output_data/cities_df.csv')

#display number of values for each column in df
cities_df.astype(bool).sum(axis = 0)

City          598
Lat           598
Lng           598
Max Temp      598
Humidity      598
Cloudiness    598
Wind Speed    598
Country       598
Date          598
dtype: int64

In [222]:
cities_df.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,rikitea,-23.12,-134.97,294.04,61,Clouds,7.14,PF,1592843562
1,parsabad,39.65,47.92,303.15,37,Clouds,6.2,IR,1592843562
2,port alfred,-33.59,26.89,287.04,85,Clear,0.45,ZA,1592843562
3,arraial do cabo,-22.97,-42.03,305.15,51,Clear,1.91,BR,1592843562
4,mataura,-46.19,168.86,277.04,83,Clear,1.34,NZ,1592843484


In [229]:
#remove cities where the humidity is over 100
low_humidity_df = cities_df[cities_df.Humidity <= 100]
low_humidity.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,rikitea,-23.12,-134.97,294.04,61,Clouds,7.14,PF,1592843562
1,parsabad,39.65,47.92,303.15,37,Clouds,6.2,IR,1592843562
2,port alfred,-33.59,26.89,287.04,85,Clear,0.45,ZA,1592843562
3,arraial do cabo,-22.97,-42.03,305.15,51,Clear,1.91,BR,1592843562
4,mataura,-46.19,168.86,277.04,83,Clear,1.34,NZ,1592843484
