In [1]:
# Dependencies
import matplotlib.pyplot as plt
import requests
import json
from scipy import stats
import pandas as pd
import random
from citipy import citipy
from config import weather_api_key

## Generate Cities List

In [36]:
# We need 500 cities spread across varying latitude ranges, between -90 to 90.
# Longitude ranges between -180 to 180.
# To get random cities, we can generate a list of random numbers between these ranges, then find the
# nearest cities using Citipy

# Create empty lists
cities = []

# Loop for 1000 cities, in case of duplicates
for x in range(1500):
    # Generate random coordinates
    # N.B. Numbers are 100x lat/long range so we can turn it into a float with 2 decimal places
    lat = random.randrange(-9000,9000)/100
    long = random.randrange(-18000,18000)/100

    # Get nearest cities
    city = citipy.nearest_city(lat,long)
    cities.append(f"{city.city_name.title()}, {city.country_code.upper()}")
    
#print(f"Cities: {cities}")

In [54]:
# Add cities to DataFrame
weather_df = pd.DataFrame({"City": cities})

weather_df.head()

Unnamed: 0,City
0,"Rikitea, PF"
1,"Catamarca, AR"
2,"Lebu, CL"
3,"Christchurch, NZ"
4,"Benjamin Constant, BR"


In [55]:
# Drop duplicate cities
weather_df = weather_df.drop_duplicates(subset=None,keep="first", inplace=False)
weather_df

Unnamed: 0,City
0,"Rikitea, PF"
1,"Catamarca, AR"
2,"Lebu, CL"
3,"Christchurch, NZ"
4,"Benjamin Constant, BR"
...,...
1491,"Papara, PF"
1492,"Muros, ES"
1493,"Moerai, PF"
1495,"Marzuq, LY"


In [56]:
# Reset the index of the data frame so we don't have missing index numbers
weather_df = weather_df.reset_index(drop=True)
weather_df


Unnamed: 0,City
0,"Rikitea, PF"
1,"Catamarca, AR"
2,"Lebu, CL"
3,"Christchurch, NZ"
4,"Benjamin Constant, BR"
...,...
622,"Papara, PF"
623,"Muros, ES"
624,"Moerai, PF"
625,"Marzuq, LY"


In [59]:
# Set up initial query URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Loop through the Data Frame for cities and perform a request for data on each, then store the requested data in the Data Frame
for index, row in weather_df.iterrows():
    # Get city from data frame
    city = row["City"]

    # Call OpenWeatherMap API
    response = requests.get(query_url + city).json()

    print(f"Checking index # {index}, city: {city}")
    # Store data only if info found
    try:
        # Store Latitude, Longitude, Temperature (F), Humidity (%), Cloudiness (%), Wind Speed (mph) data
        weather_df.loc[index, "Latitude"] = float(response["coord"]["lat"])
        weather_df.loc[index, "Longitude"] = float(response["coord"]["lon"])
        weather_df.loc[index, "Temperature (F)"] = float(response["main"]["temp_max"])
        weather_df.loc[index, "Humidity (%)"] = float(response["main"]["humidity"])
        weather_df.loc[index, "Cloudiness (%)"] = float(response["clouds"]["all"])
        weather_df.loc[index, "Wind Speed (mph)"] = float(response["wind"]["speed"])
        
    except:
        # If city not found, let us know!
        print(f"{city} not found on Weather API")
        
    # Use this just while testing if only want to test on a few rows
    #if index > 10:
    #    break

# Check to make sure data saved
weather_df.head()

Checking index # 0, city: Rikitea, PF
Checking index # 1, city: Catamarca, AR
Checking index # 2, city: Lebu, CL
Checking index # 3, city: Christchurch, NZ
Checking index # 4, city: Benjamin Constant, BR
Checking index # 5, city: Salamiyah, SY
Checking index # 6, city: Zambezi, ZM
Checking index # 7, city: Mataura, PF
Mataura, PF not found on Weather API
Checking index # 8, city: Laguna, BR
Checking index # 9, city: Karauzyak, UZ
Karauzyak, UZ not found on Weather API
Checking index # 10, city: Dzhusaly, KZ
Dzhusaly, KZ not found on Weather API
Checking index # 11, city: Komsomolskiy, RU
Checking index # 12, city: Tumannyy, RU
Tumannyy, RU not found on Weather API
Checking index # 13, city: Avarua, CK
Checking index # 14, city: Elliot Lake, CA
Checking index # 15, city: Ribeira Grande, PT
Checking index # 16, city: Karabash, RU
Checking index # 17, city: Dikson, RU
Checking index # 18, city: Punta Arenas, CL
Checking index # 19, city: Barrow, US
Checking index # 20, city: Waipawa, NZ
C

Checking index # 176, city: Aguimes, ES
Checking index # 177, city: Palmer, US
Checking index # 178, city: Dingle, IE
Checking index # 179, city: Rio Novo Do Sul, BR
Checking index # 180, city: Verkhnyaya Inta, RU
Checking index # 181, city: Hidrolandia, BR
Checking index # 182, city: Polewali, ID
Checking index # 183, city: Cockburn Harbour, TC
Cockburn Harbour, TC not found on Weather API
Checking index # 184, city: Margate, ZA
Checking index # 185, city: Fernley, US
Checking index # 186, city: Cheuskiny, RU
Cheuskiny, RU not found on Weather API
Checking index # 187, city: Kazalinsk, KZ
Kazalinsk, KZ not found on Weather API
Checking index # 188, city: Sao Joao Da Barra, BR
Checking index # 189, city: Norman Wells, CA
Checking index # 190, city: Porto Murtinho, BR
Checking index # 191, city: Bara, SD
Bara, SD not found on Weather API
Checking index # 192, city: Souillac, MU
Checking index # 193, city: Cascais, PT
Checking index # 194, city: Constitucion, MX
Checking index # 195, cit

Checking index # 356, city: Pachino, IT
Checking index # 357, city: Thinadhoo, MV
Checking index # 358, city: Hunza, PK
Hunza, PK not found on Weather API
Checking index # 359, city: Sao Filipe, CV
Checking index # 360, city: Sola, VU
Checking index # 361, city: Kerouane, GN
Checking index # 362, city: Jawa, JO
Checking index # 363, city: Kachikau, BW
Kachikau, BW not found on Weather API
Checking index # 364, city: Yangambi, CD
Checking index # 365, city: Igrim, RU
Checking index # 366, city: Aksarka, RU
Checking index # 367, city: Oksfjord, NO
Checking index # 368, city: Lakes Entrance, AU
Checking index # 369, city: Puerto Narino, CO
Checking index # 370, city: Karratha, AU
Checking index # 371, city: Surovikino, RU
Checking index # 372, city: Svetlogorsk, RU
Checking index # 373, city: Kalach-Na-Donu, RU
Checking index # 374, city: Ketchikan, US
Checking index # 375, city: Zarate, AR
Checking index # 376, city: Praia, CV
Checking index # 377, city: Pangkalanbuun, ID
Checking index 

Checking index # 539, city: Opuwo, NA
Checking index # 540, city: Lalomanu, WS
Lalomanu, WS not found on Weather API
Checking index # 541, city: Kollumerland, NL
Kollumerland, NL not found on Weather API
Checking index # 542, city: Culpeper, US
Checking index # 543, city: Bonavista, CA
Checking index # 544, city: Laredo, ES
Checking index # 545, city: Prince Rupert, CA
Checking index # 546, city: Westport, NZ
Checking index # 547, city: Torata, PE
Checking index # 548, city: Yenangyaung, MM
Checking index # 549, city: Ajaccio, FR
Checking index # 550, city: Alihe, CN
Checking index # 551, city: Norwich, GB
Checking index # 552, city: Kastamonu, TR
Checking index # 553, city: Parabel, RU
Checking index # 554, city: San Matias, BO
Checking index # 555, city: Iralaya, HN
Checking index # 556, city: Sao Paulo De Olivenca, BR
Checking index # 557, city: Katherine, AU
Checking index # 558, city: Qingdao, CN
Checking index # 559, city: Mbaiki, CF
Checking index # 560, city: Mogoytuy, RU
Check

Unnamed: 0,City,Latitude,Longitude,Temperature (F),Humidity (%),Cloudiness (%),Wind Speed (mph)
0,"Rikitea, PF",-23.12,-134.97,73.0,84.0,67.0,14.32
1,"Catamarca, AR",-28.47,-65.79,89.08,37.0,99.0,4.29
2,"Lebu, CL",-37.62,-73.65,53.89,78.0,53.0,11.41
3,"Christchurch, NZ",-43.53,172.63,48.99,100.0,64.0,4.7
4,"Benjamin Constant, BR",-4.38,-70.03,87.8,74.0,40.0,4.7


In [60]:
# Check data types
weather_df.dtypes

City                 object
Latitude            float64
Longitude           float64
Temperature (F)     float64
Humidity (%)        float64
Cloudiness (%)      float64
Wind Speed (mph)    float64
dtype: object

In [61]:
# Check how many cities missing data
weather_df.count()

City                627
Latitude            559
Longitude           559
Temperature (F)     559
Humidity (%)        559
Cloudiness (%)      559
Wind Speed (mph)    559
dtype: int64

In [62]:
# Drop cities with missing data
weather_df = weather_df.dropna(how="any")

# Check if all columns have same number
weather_df.count()

City                559
Latitude            559
Longitude           559
Temperature (F)     559
Humidity (%)        559
Cloudiness (%)      559
Wind Speed (mph)    559
dtype: int64

In [63]:
# Yay! Now we have our data set with 500+ cities
# Now let's save our data to CSV
weather_df.to_csv("data/weather_data.csv")