In [1]:
# Dependencies
import matplotlib.pyplot as plt
import requests
import json
from scipy import stats
import pandas as pd
import random
from citipy import citipy
from config import weather_api_key

## Generate Cities List

In [64]:
# We need 500 cities spread across varying latitude ranges, between -90 to 90.
# Longitude ranges between -180 to 180.
# To get random cities, we can generate a list of random numbers between these ranges, then find the
# nearest cities using Citipy

# Create empty lists
cities = []
countries = []

# Loop for 1000 cities, in case of duplicates
for x in range(1500):
    # Generate random coordinates
    # N.B. Numbers are 100x lat/long range so we can turn it into a float with 2 decimal places
    lat = random.randrange(-9000,9000)/100
    long = random.randrange(-18000,18000)/100

    # Get nearest cities
    city = citipy.nearest_city(lat,long)
    cities.append(city.city_name.title())
    countries.append(city.country_code.upper())
    
#print(f"Cities: {cities}")

In [66]:
# Add cities to DataFrame
weather_df = pd.DataFrame({"City": cities,
                         "Country": countries})

weather_df.head()

Unnamed: 0,City,Country
0,Belmonte,BR
1,Soledade,BR
2,Tuktoyaktuk,CA
3,Wanning,CN
4,Taolanaro,MG


In [67]:
# Drop duplicate cities
weather_df = weather_df.drop_duplicates(subset=None,keep="first", inplace=False)
weather_df

Unnamed: 0,City,Country
0,Belmonte,BR
1,Soledade,BR
2,Tuktoyaktuk,CA
3,Wanning,CN
4,Taolanaro,MG
...,...,...
1487,Luena,AO
1495,Nanton,CA
1496,San Lorenzo,BO
1497,Petropavlovsk-Kamchatskiy,RU


In [68]:
# Reset the index of the data frame so we don't have missing index numbers
weather_df = weather_df.reset_index(drop=True)
weather_df


Unnamed: 0,City,Country
0,Belmonte,BR
1,Soledade,BR
2,Tuktoyaktuk,CA
3,Wanning,CN
4,Taolanaro,MG
...,...,...
605,Luena,AO
606,Nanton,CA
607,San Lorenzo,BO
608,Petropavlovsk-Kamchatskiy,RU


In [91]:
# Set up initial query URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Loop through the Data Frame for cities and perform a request for data on each, then store the requested data in the Data Frame
for index, row in weather_df.iterrows():
    # Get search city from data frame
    city = f'{row["City"]}, {row["Country"]}'

    # Call OpenWeatherMap API
    response = requests.get(query_url + city).json()

    print(f"Checking record # {index} | city: {city}")
    # Store data only if info found
    try:
        # Store Latitude, Longitude, Temperature (F), Humidity (%), Cloudiness (%), Wind Speed (mph) data
        weather_df.loc[index, "Latitude"] = float(response["coord"]["lat"])
        weather_df.loc[index, "Longitude"] = float(response["coord"]["lon"])
        weather_df.loc[index, "Temperature (F)"] = float(response["main"]["temp_max"])
        weather_df.loc[index, "Humidity (%)"] = float(response["main"]["humidity"])
        weather_df.loc[index, "Cloudiness (%)"] = float(response["clouds"]["all"])
        weather_df.loc[index, "Wind Speed (mph)"] = float(response["wind"]["speed"])
        weather_df.loc[index, "Date Time"] = response["dt"]
        
    except:
        # If city not found, let us know!
        print(f"{city} not found on Weather API")
        
    # Use this just while testing if only want to test on a few rows
    #if index > 10:
    #    break

# Check to make sure data saved
weather_df.head()

Checking record # 0 | city: Belmonte, BR
Checking record # 1 | city: Soledade, BR
Checking record # 2 | city: Tuktoyaktuk, CA
Checking record # 3 | city: Wanning, CN
Checking record # 4 | city: Taolanaro, MG
Taolanaro, MG not found on Weather API
Checking record # 5 | city: Khatanga, RU
Checking record # 6 | city: Sur, OM
Checking record # 7 | city: Mataura, PF
Mataura, PF not found on Weather API
Checking record # 8 | city: Khatassy, RU
Checking record # 9 | city: Kampong Thum, KH
Checking record # 10 | city: Hobart, AU
Checking record # 11 | city: Najran, SA
Checking record # 12 | city: Tsihombe, MG
Tsihombe, MG not found on Weather API
Checking record # 13 | city: Dikson, RU
Checking record # 14 | city: Gawler, AU
Checking record # 15 | city: Victoria, SC
Checking record # 16 | city: Cherskiy, RU
Checking record # 17 | city: Port Elizabeth, ZA
Checking record # 18 | city: Benton Harbor, US
Checking record # 19 | city: Tilichiki, RU
Checking record # 20 | city: Vila Franca Do Campo, 

Checking record # 170 | city: Rock Sound, BS
Checking record # 171 | city: Dubbo, AU
Checking record # 172 | city: Cabedelo, BR
Checking record # 173 | city: Vila Velha, BR
Checking record # 174 | city: Mayumba, GA
Checking record # 175 | city: Ukiah, US
Checking record # 176 | city: Chapais, CA
Checking record # 177 | city: La Union, GT
Checking record # 178 | city: Henties Bay, NA
Checking record # 179 | city: Halalo, WF
Halalo, WF not found on Weather API
Checking record # 180 | city: Marsh Harbour, BS
Checking record # 181 | city: Kaniama, CD
Checking record # 182 | city: Grindavik, IS
Checking record # 183 | city: Ahipara, NZ
Checking record # 184 | city: Wahran, DZ
Wahran, DZ not found on Weather API
Checking record # 185 | city: Iqaluit, CA
Checking record # 186 | city: New Norfolk, AU
Checking record # 187 | city: Davidson, CA
Checking record # 188 | city: Calvinia, ZA
Checking record # 189 | city: Kyren, RU
Checking record # 190 | city: San Patricio, MX
Checking record # 191 |

Checking record # 343 | city: Guerrero Negro, MX
Checking record # 344 | city: Gumdag, TM
Checking record # 345 | city: Acapulco, MX
Checking record # 346 | city: Vaitupu, WF
Vaitupu, WF not found on Weather API
Checking record # 347 | city: Siddipet, IN
Checking record # 348 | city: Kamaishi, JP
Checking record # 349 | city: Luderitz, NA
Checking record # 350 | city: Sobolevo, RU
Checking record # 351 | city: Sabaudia, IT
Checking record # 352 | city: Voznesenye, RU
Checking record # 353 | city: Vila, VU
Vila, VU not found on Weather API
Checking record # 354 | city: Hoquiam, US
Checking record # 355 | city: Srednekolymsk, RU
Checking record # 356 | city: Cayenne, GF
Checking record # 357 | city: Necochea, AR
Checking record # 358 | city: Karpathos, GR
Checking record # 359 | city: Flinders, AU
Checking record # 360 | city: Kabo, CF
Checking record # 361 | city: Santa Cruz, CR
Checking record # 362 | city: Coahuayana, MX
Checking record # 363 | city: Mayo, CA
Checking record # 364 | c

Checking record # 524 | city: Krutikha, RU
Checking record # 525 | city: Hervey Bay, AU
Checking record # 526 | city: Trollhattan, SE
Checking record # 527 | city: Ystad, SE
Checking record # 528 | city: Errol, GB
Checking record # 529 | city: Salamiyah, SY
Checking record # 530 | city: Warrnambool, AU
Checking record # 531 | city: Satitoa, WS
Satitoa, WS not found on Weather API
Checking record # 532 | city: Comodoro Rivadavia, AR
Checking record # 533 | city: Elverum, NO
Checking record # 534 | city: La Rioja, AR
Checking record # 535 | city: Saint-Pierre, RE
Checking record # 536 | city: Vestmannaeyjar, IS
Checking record # 537 | city: Presidencia Roque Saenz Pena, AR
Checking record # 538 | city: Kichmengskiy Gorodok, RU
Checking record # 539 | city: Kasongo-Lunda, CD
Checking record # 540 | city: Tupik, RU
Checking record # 541 | city: Ulaanbaatar, MN
Checking record # 542 | city: Zhigalovo, RU
Checking record # 543 | city: Laurel, US
Checking record # 544 | city: Bababe, MR
Babab

Unnamed: 0,City,Country,Latitude,Longitude,Temperature (F),Humidity (%),Cloudiness (%),Wind Speed (mph),Date Time
0,Belmonte,BR,-15.86,-38.88,80.6,61.0,40.0,9.17,1601056674
1,Soledade,BR,-28.82,-52.51,80.6,42.0,20.0,11.41,1601056729
2,Tuktoyaktuk,CA,69.45,-133.04,35.6,100.0,90.0,14.99,1601056437
3,Wanning,CN,18.8,110.4,79.59,80.0,99.0,1.97,1601056386
4,Taolanaro,MG,,,,,,,NaT


In [87]:
# Check data types
weather_df.dtypes

City                 object
Country              object
Latitude            float64
Longitude           float64
Temperature (F)     float64
Humidity (%)        float64
Cloudiness (%)      float64
Wind Speed (mph)    float64
Date Time            object
dtype: object

In [61]:
# Check how many cities missing data
weather_df.count()

City                627
Latitude            559
Longitude           559
Temperature (F)     559
Humidity (%)        559
Cloudiness (%)      559
Wind Speed (mph)    559
dtype: int64

In [62]:
# Drop cities with missing data
weather_df = weather_df.dropna(how="any")

# Check if all columns have same number
weather_df.count()

City                559
Latitude            559
Longitude           559
Temperature (F)     559
Humidity (%)        559
Cloudiness (%)      559
Wind Speed (mph)    559
dtype: int64

In [63]:
# Yay! Now we have our data set with 500+ cities
# Now let's save our data to CSV
weather_df.to_csv("data/weather_data.csv")