In [1]:
# Dependencies
import matplotlib.pyplot as plt
import requests
import json
from scipy import stats
import pandas as pd
import random
from citipy import citipy
from config import weather_api_key

## Generate Cities List

In [36]:
# We need 500 cities spread across varying latitude ranges, between -90 to 90.
# Longitude ranges between -180 to 180.
# To get random cities, we can generate a list of random numbers between these ranges, then find the
# nearest cities using Citipy

# Create empty lists
cities = []

# Loop for 1000 cities, in case of duplicates
for x in range(1500):
    # Generate random coordinates
    # N.B. Numbers are 100x lat/long range so we can turn it into a float with 2 decimal places
    lat = random.randrange(-9000,9000)/100
    long = random.randrange(-18000,18000)/100

    # Get nearest cities
    city = citipy.nearest_city(lat,long)
    cities.append(f"{city.city_name.title()}, {city.country_code.upper()}")
    
#print(f"Cities: {cities}")

In [54]:
# Add cities to DataFrame
weather_df = pd.DataFrame({"City": cities})

weather_df.head()

Unnamed: 0,City
0,"Rikitea, PF"
1,"Catamarca, AR"
2,"Lebu, CL"
3,"Christchurch, NZ"
4,"Benjamin Constant, BR"


In [55]:
# Drop duplicate cities
weather_df = weather_df.drop_duplicates(subset=None,keep="first", inplace=False)
weather_df

Unnamed: 0,City
0,"Rikitea, PF"
1,"Catamarca, AR"
2,"Lebu, CL"
3,"Christchurch, NZ"
4,"Benjamin Constant, BR"
...,...
1491,"Papara, PF"
1492,"Muros, ES"
1493,"Moerai, PF"
1495,"Marzuq, LY"


In [56]:
# Reset the index of the data frame so we don't have missing index numbers
weather_df = weather_df.reset_index(drop=True)
weather_df


Unnamed: 0,City
0,"Rikitea, PF"
1,"Catamarca, AR"
2,"Lebu, CL"
3,"Christchurch, NZ"
4,"Benjamin Constant, BR"
...,...
622,"Papara, PF"
623,"Muros, ES"
624,"Moerai, PF"
625,"Marzuq, LY"


In [57]:
# Set up initial query URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Loop through the Data Frame for cities and perform a request for data on each, then store the requested data in the Data Frame
for index, row in weather_df.iterrows():
    # Get city from data frame
    city = row["City"]

    # Call OpenWeatherMap API
    response = requests.get(query_url + city).json()

    print(f"Checking index # {index}, city: {city}")
    # Store data only if info found
    try:
        # Store Latitude, Longitude, Temperature (F), Humidity (%), Cloudiness (%), Wind Speed (mph) data
        weather_df.loc[index, "Latitude"] = float(response["coord"]["lat"])
        weather_df.loc[index, "Longitude"] = float(response["coord"]["lon"])
        weather_df.loc[index, "Temperature (F)"] = float(response["main"]["temp_max"])
        weather_df.loc[index, "Humidity (%)"] = float(response["main"]["humidity"])
        weather_df.loc[index, "Cloudiness (%)"] = float(response["clouds"]["all"])
        weather_df.loc[index, "Wind Speed (mph)"] = float(response["wind"]["speed"])
        
    except:
        # If city not found, let us know!
        print(f"{city} not found on Weather API")
        
    # Use this just while testing at the moment
    if index > 10:
        break

# Check to make sure data saved
weather_df.head()

Checking index # 0, city: Rikitea, PF
Checking index # 1, city: Catamarca, AR
Checking index # 2, city: Lebu, CL
Checking index # 3, city: Christchurch, NZ
Checking index # 4, city: Benjamin Constant, BR
Checking index # 5, city: Salamiyah, SY
Checking index # 6, city: Zambezi, ZM
Checking index # 7, city: Mataura, PF
Mataura, PF not found on Weather API
Checking index # 8, city: Laguna, BR
Checking index # 9, city: Karauzyak, UZ
Karauzyak, UZ not found on Weather API
Checking index # 10, city: Dzhusaly, KZ
Dzhusaly, KZ not found on Weather API
Checking index # 11, city: Komsomolskiy, RU


Unnamed: 0,City,Latitude,Longitude,Temperature (F),Humidity (%),Cloudiness (%),Wind Speed (mph)
0,"Rikitea, PF",-23.12,-134.97,73.0,84.0,67.0,14.32
1,"Catamarca, AR",-28.47,-65.79,89.08,37.0,99.0,4.29
2,"Lebu, CL",-37.62,-73.65,53.89,78.0,53.0,11.41
3,"Christchurch, NZ",-43.53,172.63,48.99,100.0,64.0,4.7
4,"Benjamin Constant, BR",-4.38,-70.03,87.8,74.0,40.0,4.7


In [58]:
# Check data types
weather_df.dtypes

City                 object
Latitude            float64
Longitude           float64
Temperature (F)     float64
Humidity (%)        float64
Cloudiness (%)      float64
Wind Speed (mph)    float64
dtype: object

In [52]:
# Change data types for columns that should be numbers
weather_df["Latitude"].astype("float")
weather_df.dtypes

ValueError: could not convert string to float: 

In [43]:
# Check how many cities missing data
weather_df.info()

City                object
Latitude            object
Longitude           object
Temperature (F)     object
Humidity (%)        object
Cloudiness (%)      object
Wind Speed (mph)    object
dtype: object