# I. Collect the Data

## Use NumPy module to generate over 1500 random latitude and longitude pairs

In [1]:
# Import dependencies
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from citipy import citipy
import requests
from datetime import datetime
import time
from config import weather_api_key
from scipy.stats import linregress

In [2]:
# Create a set of random latitude and longitude combinations
lats = np.random.uniform(low = -90, high = 90, size = 1500)
lngs = np.random.uniform(low = -180, high = 180, size = 1500)
lats_lngs = zip(lats, lngs)
lats_lngs

<zip at 0x7f944055ecd0>

In [3]:
# Add latitude and longitude tuples to list
coordinates = list(lats_lngs)

## Match coordinates to the nearest city using Python's citipy module

In [4]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

625

## Use the OpenWeatherMap API to request the current weather data from each unique city in your list. Parse the JSON data from the API request, and obtain the following data for each city:
    - City, country, and date
    - Latitude and longitude
    - Maximum temperature
    - Humidity
    - Cloudiness
    - Wind speed

## Then add the data to a pandas DataFrame

In [5]:
# Starting URL for Weather Map API Call. Units = Imperial
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key
print(url)

http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=114bba6fa0eed51bc8e39279807587ea


In [None]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city. replace method used to take out blank spaces and concatenate city names.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | babanusah
City not found. Skipping...
Processing Record 2 of Set 1 | marawi
Processing Record 3 of Set 1 | teguldet
Processing Record 4 of Set 1 | klaksvik
Processing Record 5 of Set 1 | reoti
Processing Record 6 of Set 1 | davila
Processing Record 7 of Set 1 | illoqqortoormiut
City not found. Skipping...
Processing Record 8 of Set 1 | koslan
Processing Record 9 of Set 1 | cape town
Processing Record 10 of Set 1 | la ronge
Processing Record 11 of Set 1 | kijang
Processing Record 12 of Set 1 | tuktoyaktuk
Processing Record 13 of Set 1 | arraial do cabo
Processing Record 14 of Set 1 | kapaa
Processing Record 15 of Set 1 | punta arenas
Processing Record 16 of Set 1 | busselton
Processing Record 17 of Set 1 | vaini
Processing Record 18 of Set 1 | teruel
Processing Record 19 of Set 1 | bluff
Processing Record 20 of Set 1 | east london
Processing Record 21 of Set 1 | college
Processing Record 22 of Set

Processing Record 38 of Set 4 | vardo
Processing Record 39 of Set 4 | cabo san lucas
Processing Record 40 of Set 4 | chuy
Processing Record 41 of Set 4 | ambon
Processing Record 42 of Set 4 | ilulissat
Processing Record 43 of Set 4 | vao
Processing Record 44 of Set 4 | preobrazheniye
Processing Record 45 of Set 4 | sayyan
Processing Record 46 of Set 4 | avarua
Processing Record 47 of Set 4 | portland
Processing Record 48 of Set 4 | dudinka
Processing Record 49 of Set 4 | umzimvubu
City not found. Skipping...
Processing Record 50 of Set 4 | taungdwingyi
Processing Record 1 of Set 5 | bathsheba
Processing Record 2 of Set 5 | hambantota
Processing Record 3 of Set 5 | padang
Processing Record 4 of Set 5 | diban
Processing Record 5 of Set 5 | souillac
Processing Record 6 of Set 5 | avanashi
City not found. Skipping...
Processing Record 7 of Set 5 | hay river
Processing Record 8 of Set 5 | greymouth
Processing Record 9 of Set 5 | vila velha
Processing Record 10 of Set 5 | rawson
Processing R

Processing Record 29 of Set 8 | charagua
Processing Record 30 of Set 8 | shimoda
Processing Record 31 of Set 8 | kamenskoye
City not found. Skipping...
Processing Record 32 of Set 8 | ammon
Processing Record 33 of Set 8 | huarmey
Processing Record 34 of Set 8 | nalut
Processing Record 35 of Set 8 | ixtapa
Processing Record 36 of Set 8 | cartagena
Processing Record 37 of Set 8 | acaxochitlan
Processing Record 38 of Set 8 | nome
Processing Record 39 of Set 8 | troitskoye
Processing Record 40 of Set 8 | tam ky
Processing Record 41 of Set 8 | hami
Processing Record 42 of Set 8 | dzerzhinsk
Processing Record 43 of Set 8 | mount gambier
Processing Record 44 of Set 8 | port lincoln
Processing Record 45 of Set 8 | teya
Processing Record 46 of Set 8 | bacolod
Processing Record 47 of Set 8 | tuatapere
Processing Record 48 of Set 8 | bandarbeyla
Processing Record 49 of Set 8 | oranjestad
Processing Record 50 of Set 8 | makakilo city
Processing Record 1 of Set 9 | saint-pierre
Processing Record 2 

Processing Record 14 of Set 12 | faya
Processing Record 15 of Set 12 | havre-saint-pierre
Processing Record 16 of Set 12 | kaeo
Processing Record 17 of Set 12 | honningsvag
Processing Record 18 of Set 12 | cabildo
Processing Record 19 of Set 12 | sabha
Processing Record 20 of Set 12 | diffa
Processing Record 21 of Set 12 | novoye leushino
Processing Record 22 of Set 12 | el alto
Processing Record 23 of Set 12 | baghdad
Processing Record 24 of Set 12 | birjand
Processing Record 25 of Set 12 | ilo
Processing Record 26 of Set 12 | shaki
Processing Record 27 of Set 12 | suez
Processing Record 28 of Set 12 | soyo
Processing Record 29 of Set 12 | altay
Processing Record 30 of Set 12 | uray


In [None]:
# Check to ensure data from at least 500 cities has been collected.
len(city_data)

In [None]:
# Convert city_data to a dataframe
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
# reorder the columns as City, Country, Date, Lat, Lng, Max Temp, Humidity, Cloudiness, and Wind Speed
new_column_order = ['City', 'Country', 'Date', 'Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed']
city_data_df = city_data_df[new_column_order]
city_data_df.head()

In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")

# II. Plot the Data

## Create visualizations that showcase the retrieved weather parameters with changing latitude for the 500-plus cities from all over the world.

In [None]:
# Extract relevant fields from the DataFrame for plotting.
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"]

### Latitude vs. Maximum Temperature

In [None]:
# Build the scatter plot for latitude vs. max temperature.
plt.scatter(lats,
            max_temps,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Max Temperature "+ time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig1.png")

# Show plot.
plt.show()

### Latitude vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity.
plt.scatter(lats,
            humidity,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Humidity "+ time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig2.png")
# Show plot.
plt.show()

### Latitude vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness.
plt.scatter(lats,
            cloudiness,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Cloudiness (%) "+ time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig3.png")
# Show plot.
plt.show()

### Latitude vs. Wind Speed

In [None]:
# Build the scatter plots for latitude vs. wind speed.
plt.scatter(lats,
            wind_speed,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Wind Speed "+ time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig4.png")
# Show plot.
plt.show()

# III. Determine Correlations

## Use linear regression to find relationships between plotted variables.

In [None]:
# Create linear regression algorithm

def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):
    
    ## Perform linear regression
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)
    
    ## Get regression line equation
    regress_eq = f"y = {round(slope, 2)}x + {round(intercept, 2)}"
    #print(regress_eq)
    
    ## Get predicted y-values
    regress_values = x_values * slope + intercept
    
    ## Plotting
    ### Create scatter plot of x- vs. y- values
    plt.scatter(x_values, y_values,
               edgecolor="black",
               linewidths=1,
                marker="o", alpha=0.8)
    ### Add regression line
    plt.plot(x_values, regress_values, "r")
    ### Annotate with regression line equation, axis labels, title
    plt.annotate(f"{regress_eq} \n r-value = {round(r_value, 4)}", text_coordinates, fontsize=15, color="red")
    plt.xlabel("Latitude")
    plt.ylabel(y_label)
    plt.title(title)

In [None]:
# Create DataFrames for the Northern and Southern Hemispheres
north_hemi_df = city_data_df.loc[city_data_df['Lat'] >= 0]
south_hemi_df = city_data_df.loc[city_data_df['Lat'] < 0]

### Latitude vs. Maximum Temperature

In [None]:
# Linear regression on Northern Hemisphere
x_values = north_hemi_df['Lat']
y_values = north_hemi_df['Max Temp']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Maximum Temperature (N. Hemisphere)",
                      "Max Temp", (10, 40))

In [None]:
# Linear regression on Southern Hemisphere
x_values = south_hemi_df['Lat']
y_values = south_hemi_df['Max Temp']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Maximum Temperature (S. Hemisphere)",
                      "Max Temp", (-25, 35))

### Latitude vs. Humidity

In [None]:
# Linear regression on Northern Hemisphere
x_values = north_hemi_df['Lat']
y_values = north_hemi_df['Humidity']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Percent Humidity (N. Hemisphere)",
                      "% Humidity", (47, 7))

In [None]:
# Linear regression on Southern Hemisphere
x_values = south_hemi_df['Lat']
y_values = south_hemi_df['Humidity']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Percent Humidity (S. Hemisphere)",
                      "% Humidity", (-55, 18))

### Latitude vs. Cloudiness

In [None]:
# Linear regression on Northern Hemisphere
x_values = north_hemi_df['Lat']
y_values = north_hemi_df['Cloudiness']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Cloudiness (N. Hemisphere)",
                      "Cloudiness", (70, 0))

In [None]:
# Linear regression on Southern Hemisphere
x_values = south_hemi_df['Lat']
y_values = south_hemi_df['Cloudiness']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Cloudiness (S. Hemisphere)",
                      "Cloudiness", (-57, 22))

### Latitude vs. Wind Speed

In [None]:
# Linear regression on Northern Hemisphere
x_values = north_hemi_df['Lat']
y_values = north_hemi_df['Wind Speed']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Wind Speed (N. Hemisphere)",
                      "Wind Speed", (15, 25))

In [None]:
# Linear regression on Southern Hemisphere
x_values = south_hemi_df['Lat']
y_values = south_hemi_df['Wind Speed']
# Perform linear regression and plot with function
plot_linear_regression(x_values, y_values,
                      "Linear Regression on Latitude vs. Wind Speed (S. Hemisphere)",
                      "Wind Speed", (-25, 25))