In [None]:
# 6.1.4 cont....


# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# Create a set of random latitude and longitude combinations.
# zip lat and lngs together
# zip object will be in memory creating a tuple with index

lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

In [None]:
# unpack lat_lngs zip object into a list
#  NOTE:  CAN ONLY UNZIP TUPLE ONCE BEFORE IT IS REMOVED FROM COMOPUTER MEMORY
#       MAKE SURE UNZIP INTO LIST BEFORE MOVING ON


# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [None]:
# 6.1.5   Generate Random World Cities

In [None]:
# USE citipy

In [None]:
# Use the citipy module to determine city based on latitude and longitude.
from citipy import citipy

In [None]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    # using not in to determine whether the found city is already in the
    # cities list.  If not then append to add it (to avoid duplicates)
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

In [None]:
# 6.2.6  Get the City Weather Data

In [None]:
# Import the requests library.
import requests

# Import the API key.
from config import weather_api_key

In [None]:
# Starting URL for Weather Map API Call.
# units= Imperial    standard, metric, imperial options
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key
#print(url)

In [None]:
# Import the datetime module from the datetime library.
from datetime import datetime

In [None]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters. Initialized at 1 becasue want the first iteration of 
# logging for each recorded response and set to start at 1
record_count = 1
set_count = 1

# Loop through all the cities in the list. 
# using enumerate() get index of the city for logging purposes
# city for creating an endpoint URL
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    # if remainder of index divided by 50 is equal to 0
    # and if index greater than or equal to 50
    # if true then set count and record count increment by 1
    
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city.
    # removing blank spaces in the city name and concat city name
    
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count. before the next city is processed
    record_count += 1
    
    # ADD TRY-EXCEPT TO THE RUN
    
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
# pass statement general purpose to handle all errors encountered and allow
# program to continue
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")
    
    

In [None]:
# how many rows of data did we just spit out

len(city_data)

In [None]:
# 6.2.7 Create a DataFrame of City Wx Data

In [None]:
# Convert the array of dictionaries to a Pandas DataFrame.
# city_data was just created above
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [None]:
# put columns in the right order

new_column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp",
                    "Humidity", "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[new_column_order]
city_data_df.head(10)

In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
# with index label (or column A) header of City_ID
city_data_df.to_csv(output_data_file, index_label="City_ID")

In [None]:
# 6.3.1   Plot latitude vs Temp

# need lats, max temps, humidity, cloudiness, wind speed

In [None]:
# Extract relevant fields from the DataFrame for plotting.
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"]

In [None]:
# use time module from standard Python library

In [None]:
#  SCATTER PLOT MAX TEMP

# Import time module
import time

# Build the scatter plot for latitude vs. max temperature.
# marker is circles, transparency 0.8
plt.scatter(lats,
            max_temps,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
# use time and format with .strftime() so ouput standard vs. all seconds
# time.time would be all in seconds
plt.title(f"City Latitude vs. Max Temperature "+ time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig1.png")

# Show plot.
plt.show()

In [None]:
# 6.3.2  Plot Latitude vs Humidity

In [None]:
#  SCATTER PLOT HUMIDITY

# Import time module
import time

# Build the scatter plot for latitude vs. max temperature.
# marker is circles, transparency 0.8
plt.scatter(lats,
            humidity,
            color = "red",
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
# use time and format with .strftime() so ouput standard vs. all seconds
# time.time would be all in seconds
plt.title(f"City Latitude vs. Humidity"+ time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig2.png")

# Show plot.
plt.show()

In [None]:
# 6.3.3 Plot Latitude vs Cloudiness

In [None]:
#  SCATTER PLOT CLOUDINESS

# Import time module
import time

# Build the scatter plot for latitude vs. max temperature.
# marker is circles, transparency 0.8
plt.scatter(lats,
            cloudiness,
            color="green",
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
# use time and format with .strftime() so ouput standard vs. all seconds
# time.time would be all in seconds
plt.title(f"City Latitude vs. Cloudiness "+ time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig3.png")

# Show plot.
plt.show()

In [None]:
# 6.3.4 Plot Latitude vs Wind Speed

In [None]:
#  SCATTER PLOT WIND SPEED

# Import time module
import time

# Build the scatter plot for latitude vs. max temperature.
# marker is circles, transparency 0.8
plt.scatter(lats,
            wind_speed,
            color="yellow",
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
# use time and format with .strftime() so ouput standard vs. all seconds
# time.time would be all in seconds
plt.title(f"City Latitude vs. Wind Speed"+ time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig4.png")

# Show plot.
plt.show()

In [None]:
# 6.4.2 find correlation between latitude and Max temp

# use the linear Regression function (practiced in random_numbers 6.4.1)

In [None]:
# will want to run this for Nothern and Southern Hemispheres and all 
# four weather parameters: max temp, humidity, cloudiness, wind speed

# reuse the code for minor changes for each weather parameter in each hemisphere

# x values - latitude
# y values - 4 wx parameters
# y label - wx parameter
# x and y values given as a tuple (10,40) for regression line eq to be on plot

# Import linregress
from scipy.stats import linregress

# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
# this makes it easier to switch for different values without completely
# changing code each time.  Just change the values

def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):

    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.title(title)
    plt.show()

In [None]:
# Create Hemisphere DataFrames

# use city_data_df and loc method

In [None]:
# row 13 example

index13 = city_data_df.loc[13]
index13

In [None]:
# filter with loc on value of a row example
# all Lat greater then 0 for Northern Latitudes
# will show true if greater then 0 or false if not

city_data_df["Lat"] >= 0

In [None]:
# now this returns a DataFrame with the rows where lat is >= 0 so northern

city_data_df.loc[(city_data_df["Lat"] >= 0)].head()

In [None]:
# Create Northern and Southern Hemisphere DataFrames.
northern_hemi_df = city_data_df.loc[(city_data_df["Lat"] >= 0)]
southern_hemi_df = city_data_df.loc[(city_data_df["Lat"] < 0)]

In [None]:
# northern_hemi_df.head()

In [None]:
 # southern_hemi_df.head()

In [None]:
# NOTE:  had to add plt.title(title) to function above
# NOTE: title below wasn't printing write - had to add \n and rearrange 
# title to work

In [None]:
# Perform linear regressionon the Max Temp for Northern Hemi

# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]

# Call the function. created above (x value, y value, title, y label, text coordinates)
# text coordinates is where placing equation - change range if not showing
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \n for Maximum Temperature', 
                       'Max Temp',(10,40))

In [None]:
# perform linear regeression Max Temp southern hemi

# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n for Maximum Temperature', 
                       'Max Temp',(-50,90))

In [None]:
# FINDING:  correlation between lat and max temp is very strong
# r-value (slope) is less then -0.7 for north
# r-value is greater than 0.7 for south
# this means as we approach equator temp becomes warmer
# further from equator temp is cooler

In [None]:
# 6.4.3 Correlation between Lat and Humidity

In [None]:
# NORTH lat and Humidity

# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]

# Call the function. created above (x value, y value, title, y label, text coordinates)
# text coordinates is where placing equation - change range if not showing
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \n for % Humidity', 
                       '% Humidity',(40,10))

In [None]:
# SOUTH Lat and Humidity

# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n for % Humidity', 
                       '% Humidity',(-50,15))

In [None]:
# FINDING correlation between lat and humidity is very low
# because r-value is less than 0.4 fo north and south
# means humidity is un predictable due to changing weather patterns

In [None]:
# 6.4.4 Correlation between lat and Cloudiness

In [None]:
# NORTH lat and Cloudiness

# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]

# Call the function. created above (x value, y value, title, y label, text coordinates)
# text coordinates is where placing equation - change range if not showing
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \n for % Cloudiness', 
                       '% Cloudiness',(0,10))

In [None]:
# SOUTH Lat and Cloudiness

# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n for % Cloudiness', 
                       '% Cloudiness',(-50,60))

In [None]:
# FINDING a little different then guide - south pretty steep

In [None]:
# 6.4.5 Correlation between lat and wind Speed

In [None]:
# NORTH lat and Wind Speed

# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]

# Call the function. created above (x value, y value, title, y label, text coordinates)
# text coordinates is where placing equation - change range if not showing
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \n for Wind Speed', 
                       'Wind Speed',(40,25))

In [None]:
# SOUTH Lat and Wind Speed

# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n for Wind Speed', 
                       'Wind Speed',(-50,20))

In [None]:
# FINDING correlation between lat and wind pseed very low
# r values less tha -0.07 for noth and -0.3 for south
# means wind speed is unpredictable due to changing weather patterns