In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x11e0b6fbb88>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [4]:
# Use the citipy module to determine city based on latitude and longitude.
from citipy import citipy

In [5]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)
cities

['crab hill',
 'jamestown',
 'albany',
 'avarua',
 'viedma',
 'bredasdorp',
 'salalah',
 'mehamn',
 'atuona',
 'barrow',
 'mataura',
 'barentsburg',
 'kapaa',
 'vaini',
 'bathsheba',
 'goderich',
 'bagdarin',
 'auki',
 'phangnga',
 'praia da vitoria',
 'provideniya',
 'kampen',
 'punta arenas',
 'port alfred',
 'saldanha',
 'saint-philippe',
 'yellowknife',
 'hermanus',
 'tuktoyaktuk',
 'fare',
 'carutapera',
 'aban',
 'kruisfontein',
 'maceio',
 'leningradskiy',
 'severo-kurilsk',
 'khatanga',
 'hasaki',
 'brae',
 'rikitea',
 'huacho',
 'grand gaube',
 'kalmunai',
 'tsihombe',
 'vila franca do campo',
 'georgetown',
 'taolanaro',
 'lazaro cardenas',
 'vanimo',
 'shache',
 'motala',
 'rognan',
 'cape town',
 'aden',
 'waingapu',
 'marawi',
 'lorengau',
 'kahta',
 'chokurdakh',
 'lagoa',
 'mys shmidta',
 'sohag',
 'zastron',
 'karaul',
 'ketou',
 'saleaula',
 'hithadhoo',
 'otradnoye',
 'lamidan',
 'samusu',
 'jawar',
 'clifton',
 'castro',
 'port elizabeth',
 'ushuaia',
 'puerto ayora'

In [6]:
# Some of this code should look familiar, but let's break it down:

# We create a cities list to store city names.
# We iterate through the coordinates, as in our practice, and retrieve the nearest city
# using the latitude and longitude pair.
# We add a decision statement with the logical operator not in to determine whether the found city
# is already in the cities list. If not, then we'll use the append() function to add it. 
# We are doing this because among the 1,500 latitudes and longitudes, there might be duplicates, 
# which will retrieve duplicate cities, and we want to be sure we capture only the unique cities.

In [7]:
# import requests library
import requests

In [8]:
# Import the API key.
from config import weather_api_key

In [9]:
# Starting URL for Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [10]:
# Import the time library and the datetime module from the datetime library 
import time
from datetime import datetime

In [None]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1


# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1

# Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | crab hill
City not found. Skipping...
Processing Record 2 of Set 1 | jamestown
Processing Record 3 of Set 1 | albany
Processing Record 4 of Set 1 | avarua
Processing Record 5 of Set 1 | viedma
Processing Record 6 of Set 1 | bredasdorp
Processing Record 7 of Set 1 | salalah
Processing Record 8 of Set 1 | mehamn
Processing Record 9 of Set 1 | atuona
Processing Record 10 of Set 1 | barrow
Processing Record 11 of Set 1 | mataura
Processing Record 12 of Set 1 | barentsburg
City not found. Skipping...
Processing Record 13 of Set 1 | kapaa
Processing Record 14 of Set 1 | vaini
Processing Record 15 of Set 1 | bathsheba
Processing Record 16 of Set 1 | goderich
Processing Record 17 of Set 1 | bagdarin
Processing Record 18 of Set 1 | auki
Processing Record 19 of Set 1 | phangnga
Processing Record 20 of Set 1 | praia da vitoria
Processing Record 21 of Set 1 | provideniya
Processing Record 22 of Set 1 | kampe

In [None]:
# Let's break down the code so we understand fully before continuing:

# We create the for loop with the enumerate() method and reference the index and the city in the list.
# In the conditional statement (inside the for loop??), we check if the remainder of the index divided 
# by 50 is equal to 0 and if 
# the index is greater than or equal to 50. If the statement is true, then the set_count and the record_count 
# are incremented by 1.
# Inside the conditional statement (inside the for loop??), we create the URL endpoint for each city, as before. 
# However, we are removing 
# the blank spaces in the city name and concatenating the city name with, city.replace(" ","+"). This will find 
# the corresponding weather data for the city instead of finding the weather data for the first part of the city name.

# ^^^KNOWING THE NEED TO DO THIS SOUNDS LIKE A RESULT OF READING DOCUMENTATION^^^ (OR EXPERIENCING ERRORS ON WHITESPACE CITIES)

# Also, we add a print statement that tells us the record count and set count, and the city that is being processed.
# Then we add one to the record count before the next city is processed.
# Next, we will retrieve the data from the JSON weather response for each city.


# Let's review the code:

# We parse the JSON file for the current city.
# If there is no weather data for the city, i.e., a <Response [404]> then there is no weather to retrieve 
# and City not found. Skipping... is printed.
# If there is weather data for the city, we will retrieve the latitude, longitude, maximum temperature, humidity, 
# cloudiness, wind speed, and date and assign those values to variables.
# We could write a try-except block for each one of these parameters to handle the KeyError if the data 
# wasn't found, but since these parameters are always present in the response this won't be necessary.
# We append the cities list with a dictionary for that city, where the key-value pairs are the values 
# from our weather parameters.
# Finally, below the try block and after the except block, we add the closing print statement, which will 
# let us know the data retrieval has been completed. Make sure that your except block is indented and in line 
# with the try block, and that the print statements are flush with the margin.
# Under the print statement in the except block, we add the pass statement, which is a general purpose 
# statement to handle all errors encountered and to allow the program to continue.

In [None]:
len(city_data)

In [None]:
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
col_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[col_order]
city_data_df.head(10)

In [None]:
#Create the output csv file
output_data_file = "weather_data/cities.csv"

#export the city_data into a csv
city_data_df.to_csv(output_data_file, index_label="City_ID")

# The last line in the code block will export the DataFrame to a CSV file, with the index label 
# (or column A) header as "City_ID." If we ever need to export the CSV file to a DataFrame, 
# that header will be present in the DataFrame.

In [None]:
# Extract relevant fields from the DataFrame for plotting.
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"]

In [None]:
# Import time module
import time

# Build the scatter plot for latitude vs. max temperature.
plt.scatter(lats,
            max_temps,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Max Temperature "+ time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig1.png")

# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. humidity.
plt.scatter(lats,
            humidity,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Humidity "+ time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig2.png")
# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. cloudiness.
plt.scatter(lats,
            cloudiness,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Cloudiness (%) "+ time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig3.png")
# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. wind speed.
plt.scatter(lats,
            wind_speed,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Wind Speed "+ time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig4.png")
# Show plot.
plt.show()

In [None]:
# # Perform linear regression.
# (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

# # Calculate the regression line "y values" from the slope and intercept.
# regress_values = x_values * slope + intercept

# # Get the equation of the line.
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# # Create a scatter plot of the x and y values.
# plt.scatter(x_values,y_values)
# # Plot the regression line with the x-values and the y coordinates based on the intercept and slope.
# plt.plot(x_values,regress_values,"r")
# # Annotate the text for the line equation and add its coordinates.
# plt.annotate(line_eq, (10,40), fontsize=15, color="red")
# plt.title(title)
# plt.xlabel('Latitude')
# plt.ylabel('Temp')
# plt.show()

In [None]:
# Import linregress
from scipy.stats import linregress

# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):

    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.title(title)
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.show()

In [None]:
# Create Northern and Southern Hemisphere DataFrames.
northern_hemi_df = city_data_df.loc[(city_data_df["Lat"] >= 0)]
southern_hemi_df = city_data_df.loc[(city_data_df["Lat"] < 0)]

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(10,40))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(-50,90))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for % Humidity', '% Humidity',(40,10))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for % Humidity', '% Humidity',(-50,15))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for % Cloudiness', '% Cloudiness',(40,10))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for % Cloudiness', '% Cloudiness',(-50,15))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for Wind Speed', 'Wind Speed',(40,35))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for Wind Speed', 'Wind Speed',(-50,35))