### Conclusions:
* We can conclude that the max temperature is higher when you get closer to the equator and lower when you get further from it, as expected. Additionally, we see good correlation of this in the Northern Hemisphere.
* Humidity shows a fairly week correlation between position and humidity level. It seems as though, in the northern hemisphere, as you get further away from the equator, there are less locations with lower humidity.  The southern hemisphere has a weaker correlation of this.
* If we examine cloudiness, there does not seem to be any sort of meaningful correlation.  The data points are evenly distributed and can almost be characterized as "random."

In [None]:
 # Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [None]:
temp_f = []
humidity = []
cloudiness = []
wind_speed = []
name_of_city = []
country = []
date = []
latitude = []
longitude = []

counter = 1

print("Beginning Data Retrieval")
print("---------------------------")

for city in cities:
    #we need to include exception handling if an entry is not found, so the code doesn't terminate
    #we also need to json-ify the request
    try:
        response = requests.get(f"{url}&q={city}").json()
        temp_f.append(response["main"]["temp_max"])
        humidity.append(response["main"]["humidity"])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        name_of_city.append(response["name"])        
        country.append(response["sys"]["country"])
        date.append(response["dt"])        
        latitude.append(response["coord"]["lat"])
        longitude.append(response["coord"]["lon"])        
        pick_city = response["name"]
        print(f"Processing Record {counter} of Set | {pick_city}")
        print(f"{url}&q={city}")
        counter = counter + 1
        
    except:
        print("City not found. Skipping...")
    continue
        
print("---------------------------")
print("Data Retrieval Complete")
print("---------------------------")

In [None]:
#Assign a value to each category
weather_cats = {
    "City": name_of_city,
    "Cloudiness": cloudiness, 
    "Country": country,
    "Date": date, 
    "Humidity": humidity,
    "Latitude": latitude, 
    "Longitude":longitude, 
    "Max Temperature": temp_f,
    "Wind Speed":wind_speed    
}

# Create a data frame from dictionary
weather_df = pd.DataFrame(weather_cats)

#export to csv
weather_df.to_csv("weather_df.csv")

# Display count of weather data values 
weather_df.count()

In [None]:
weather_df.head()

In [None]:
# Making a scatter plot for Latitude versus Max Temp
plt.scatter(weather_df["Latitude"], weather_df["Max Temperature"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude vs. Max Temperature")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
#show the grid for vizualization
plt.grid(True)

plt.savefig("temp_v_latitude.png")
plt.show()

In [None]:
# Making a scatter plot for Latitude versus Humidity
plt.scatter(weather_df["Latitude"], weather_df["Humidity"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude vs. Humidity")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

plt.savefig("humidity_v_latitude.png")
plt.show()

In [None]:
# Making a scatter plot for Latitude versus the Cloudiness
plt.scatter(weather_df["Latitude"], weather_df["Cloudiness"], marker = "o", color = "blue",
            edgecolor = "black", s = 25)

plt.title("City Latitude vs. Cloudiness")
plt.ylabel("Cloudiness")
plt.xlabel("Latitude")
plt.grid(True)

plt.savefig("cloudiness_v_latitude.png")
plt.show()

In [None]:
# Making a scater plot for Latitude versus Wind Speed
plt.scatter(weather_df["Latitude"], weather_df["Wind Speed"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude vs. Wind Speed")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

plt.savefig("windspeed_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the northern hemisphere for regression
df_north_hem = weather_df[weather_df.Latitude >= 0]

plt.scatter(df_north_hem["Latitude"], df_north_hem["Max Temperature"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Northern Hemisphere) vs. Max Temperature")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
a_slope, a_int, a_r, a_p, a_std_err = linregress(df_north_hem["Latitude"], df_north_hem["Max Temperature"])
a_fit = a_slope * df_north_hem["Latitude"] + a_int
plt.plot(df_north_hem["Latitude"], a_fit, "-", color = "red")
line_eq = "y = " + str(round(a_slope, 2)) + "x + " + str(round(a_int, 2))
plt.annotate(line_eq, (5, -20), fontsize = 15, color = "red")
print(f"R squared: {a_r ** 2}")

plt.grid(True)

plt.savefig("north_temp_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the southern hemisphere for regression
df_south_hem = weather_df[weather_df.Latitude < 0]

plt.scatter(df_south_hem["Latitude"], df_south_hem["Max Temperature"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Southern Hemisphere) vs. Max Temperature")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
b_slope, b_int, b_r, b_p, b_std_err = linregress(df_south_hem["Latitude"], df_south_hem["Max Temperature"])
b_fit = b_slope * df_south_hem["Latitude"] + b_int
plt.plot(df_south_hem["Latitude"], b_fit, "-", color = "red")
line_eq_2 = "y = " + str(round(b_slope, 2)) + "x + " + str(round(b_int, 2))
plt.annotate(line_eq_2, (-30, 50), fontsize = 15, color = "red")
print(f"R squared: {b_r ** 2}")

plt.grid(True)

plt.savefig("south_temp_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the northern hemisphere for regression
plt.scatter(df_north_hem["Latitude"], df_north_hem["Humidity"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Northern Hemisphere) vs. Humidity")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
c_slope, c_int, c_r, c_p, c_std_err = linregress(df_north_hem["Latitude"], df_north_hem["Humidity"])
c_fit = c_slope * df_north_hem["Latitude"] + c_int
plt.plot(df_north_hem["Latitude"], c_fit, "-", color = "red")
line_eq_3 = "y = " + str(round(c_slope, 2)) + "x + " + str(round(c_int, 2))
plt.annotate(line_eq_3, (25, 0), fontsize = 15, color = "red")
print(f"R squared: {c_r ** 2}")

plt.grid(True)

plt.savefig("north_humidity_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the southern hemisphere for regression

plt.scatter(df_south_hem["Latitude"], df_south_hem["Humidity"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Southern Hemisphere) vs. Humidity")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
d_slope, d_int, d_r, d_p, d_std_err = linregress(df_south_hem["Latitude"], df_south_hem["Humidity"])
d_fit = d_slope * df_south_hem["Latitude"] + d_int
plt.plot(df_south_hem["Latitude"], b_fit, "-", color = "red")
line_eq_4 = "y = " + str(round(d_slope, 2)) + "x + " + str(round(d_int, 2))
plt.annotate(line_eq_4, (-30, 30), fontsize = 15, color = "red")
print(f"R squared: {d_r ** 2}")

plt.grid(True)

plt.savefig("south_humidity_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the northern hemisphere for regression
plt.scatter(df_north_hem["Latitude"], df_north_hem["Cloudiness"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Northern Hemisphere) vs. Cloudiness")
plt.ylabel("Cloudiness")
plt.xlabel("Latitude")
e_slope, e_int, e_r, e_p, e_std_err = linregress(df_north_hem["Latitude"], df_north_hem["Cloudiness"])
e_fit = e_slope * df_north_hem["Latitude"] + e_int
plt.plot(df_north_hem["Latitude"], e_fit, "-", color = "red")
line_eq_5 = "y = " + str(round(e_slope, 2)) + "x + " + str(round(e_int, 2))
plt.annotate(line_eq_5, (40, 25), fontsize = 15, color = "red")
print(f"R squared: {e_r ** 2}")

plt.grid(True)

plt.savefig("north_cloudiness_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the southern hemisphere for regression
plt.scatter(df_south_hem["Latitude"], df_south_hem["Cloudiness"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Southern Hemisphere) vs. Cloudiness")
plt.ylabel("Cloudiness")
plt.xlabel("Latitude")
f_slope, f_int, f_r, f_p, f_std_err = linregress(df_south_hem["Latitude"], df_south_hem["Cloudiness"])
f_fit = f_slope * df_south_hem["Latitude"] + f_int
plt.plot(df_south_hem["Latitude"], f_fit, "-", color = "red")
line_eq_6 = "y = " + str(round(f_slope, 2)) + "x + " + str(round(f_int, 2))
plt.annotate(line_eq_6, (-30, 25), fontsize = 15, color = "red")
print(f"R squared: {f_r ** 2}")

plt.grid(True)

plt.savefig("south_cloudiness_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the northern hemisphere for regression
plt.scatter(df_north_hem["Latitude"], df_north_hem["Wind Speed"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Northern Hemisphere) vs. Wind Speed")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
g_slope, g_int, g_r, g_p, g_std_err = linregress(df_north_hem["Latitude"], df_north_hem["Wind Speed"])
g_fit = g_slope * df_north_hem["Latitude"] + g_int
plt.plot(df_north_hem["Latitude"], g_fit, "-", color = "red")
line_eq_7 = "y = " + str(round(g_slope, 2)) + "x + " + str(round(g_int, 2))
plt.annotate(line_eq_7, (10, 40), fontsize = 15, color = "red")
print(f"R squared: {g_r ** 2}")

plt.grid(True)

plt.savefig("north_windspeed_v_latitude.png")
plt.show()

In [None]:
# Make a scatter plot with only the southern hemisphere for regression
plt.scatter(df_south_hem["Latitude"], df_south_hem["Wind Speed"], marker = "o", color = "blue", 
            edgecolor = "black", s = 25)

plt.title("City Latitude (Southern Hemisphere) vs. Wind Speed")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
h_slope, h_int, h_r, h_p, h_std_err = linregress(df_south_hem["Latitude"], df_south_hem["Wind Speed"])
h_fit = h_slope * df_south_hem["Latitude"] + h_int
plt.plot(df_south_hem["Latitude"], h_fit, "-", color = "red")
line_eq_8 = "y = " + str(round(b_slope, 2)) + "x + " + str(round(b_int, 2))
plt.annotate(line_eq_8, (-50, 25), fontsize = 15, color = "red")
print(f"R squared: {h_r ** 2}")

plt.grid(True)

plt.savefig("south_windspeed_v_latitude.png")
plt.show()