In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy import stats

# Import API key
from api_weather_key import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../Output_Data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)



General Cities List

In [None]:
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count

total_calls = round((len(cities)) / 50,0)
num_record = 0
num_set = 1

print(len(cities))
print(total_calls)

API Calls

-Perform a weather check on each city using a series of successive API calls.

-Print log of each city as it's being processed reflecting the number of the city in the list and its name.

In [None]:
#lists that will house the API call results to then the form the dictionary that will be the basis for the data frame:
lat = []
long = []
maxtemp = []
humidity = []
cloudiness = []
windspeed = []
country = []
date = []
cities_data = []


#f string creating the query url of call: city name from the cities list, along with API key and units set to metric.
for city in cities:
    response = requests.get(f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}&units=imperial").json()
#add a slight timed delay of 3 seconds to stay within the revolving 60 calls/minute on the free tier of OWM subscription:
    time.sleep(1)
    
    #appending the query response to the lists created:
    try:
        lat.append(response['coord']['lat'])
        long.append(response['coord']['lon'])
        maxtemp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        windspeed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        cities_data.append(city)

        if (num_record <= 50):
            print(f"Processing Record {num_record} of Set {num_set} | {city}")
            num_record += 1
        
        elif num_record == 51:
            num_record = 0
            num_set += 1 
            print(f"Processing Record {num_record} of Set {num_set} | {city}")
            num_record = 1
        
    except (KeyError, IndexError):
        print("City not found. Skipping.")    
print("-------------------------")
print("Data retrieval completed.")


Building Data Frame

In [None]:
#update City Name to be = to cities.
weather_data = {"City Name": cities_data,
                "Lat": lat,
               "Long": long,
               "Max Temp": maxtemp,
               "Humidity": humidity,
               "Cloudiness": cloudiness,
               "Wind Speed": windspeed,
               "Country": country,
               "Date": date}
weather_df = pd.DataFrame(weather_data)
weather_df.head(10000)

Writing to .CSV and displaying summary statistics

In [None]:
weather_df.to_csv(output_data_file)
weather_df.describe()

Adjusting data frame to remove a city where the humidity is greater than 100%.

In [None]:
humidity_df = weather_df[weather_df["Humidity"] > 100]
humidity_df.head(10000)

In [None]:
weather_clean_df = weather_df[weather_df["Humidity"] <= 100]
weather_clean_df.head(10000)

Scatter Plots

Latitude vs Temperature:

-Code is creating a scatter plot of the data frame (excluding cities where humidity is > 100%) plotting the latitude along the x-axis and max temperature on the y.

-Findings indicate that temperatures are warmest between -20 and 20 Latitude - closest to the equator.

In [None]:
#plot the scatter plot, label, save as .png file and display:
weather_clean_df.plot(kind="scatter", x="Lat", y="Max Temp",grid=True, figsize=(4,4),title="City Latitude vs. Max Temperature")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.savefig("../Output_Data/Lat_Temp.png")
plt.show()


Latitude vs Humdity:

-Code is creating a scatter plot of the data frame (excluding cities where humidity is > 100%) plotting the latitude along the x-axis and humidity on the y.

-Findings indicate humidity increases in terms of number of cities as latitude increases.

In [None]:
#plot the scatter plot, label, save as .png file and display:
weather_clean_df.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(4,4),
              title="City Latitude vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.savefig("../Output_Data/Lat_Humidity.png")
plt.show()

Latitude vs Cloudiness:
    
-Code is creating a scatter plot of the data frame (excluding cities where humidity is > 100%) plotting the latitude along the x-axis and cloudiness on the y.

-Latitude does not appear to be a determining factor of cloudiness as a roughly even number of cities appear to be either very cloudy (100) or not cloudy (0).    

In [None]:
#plot the scatter plot, label, save as .png file and display:
weather_clean_df.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(4,4),
              title="City Latitude vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.savefig("../Output_Data/Lat_Cloudiness.png")
plt.show()

Latitude vs. Wind Speed:
    
-Code is creating a scatter plot of the data frame (excluding cities where humidity is > 100%) plotting the latitude along the x-axis and wind speed on the y.

-Findings indicate wind speeds tend to be lower on average.

In [None]:
#plot the scatter plot, label, save as .png file and display:
weather_clean_df.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True, figsize=(4,4),
              title="City Latitude vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.savefig("../Output_Data/Lat_Wind.png")
plt.show()

Linear Regression - Northern and Southern Hemispheres

In [None]:
#create northern and southern hemisphere data frames to generate the scatter plot and regressions from. Zero is included in both
#as it cannot really be claimed by either.
north_df = weather_clean_df[weather_clean_df["Lat"] >= 0]
south_df = weather_clean_df[weather_clean_df["Lat"] <= 0]

#declaration of variables to be used in each linear regression:
n_lat = north_df.iloc[:,1]
n_temp = north_df.iloc[:,3]
n_hum = north_df.iloc[:,4]
n_cloud = north_df.iloc[:,5]
n_wind = north_df.iloc[:,6]

s_lat = south_df.iloc[:,1]
s_temp = south_df.iloc[:,3]
s_hum = south_df.iloc[:,4]
s_cloud = south_df.iloc[:,5]
s_wind = south_df.iloc[:,6]

Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
nt_slope, nt_int, nt_r, nt_p, nt_std_err = stats.linregress(n_lat, n_temp)

# Create equation of line to calculate predicted temperature
nt_fit = nt_slope * n_lat + nt_int

#plot the scatter plot and regression line:
plt.scatter(n_lat,n_temp)
plt.plot(n_lat,nt_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Max Temp')

#reflect the equation on the graph:
nt_line = "y = " + str(round(nt_slope,2)) + "x + " + str(round(nt_int,2))
plt.annotate(nt_line,(6,10),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {nt_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/NorthernLat_Temp.png")
plt.show()


Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
st_slope, st_int, st_r, st_p, st_std_err = stats.linregress(s_lat, s_temp)

# Create equation of line to calculate predicted temperature
st_fit = st_slope * s_lat + st_int

#plot the scatter plot and regression line:
plt.scatter(s_lat,s_temp)
plt.plot(s_lat,st_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Max Temp')

#reflect the equation on the graph:
st_line = "y = " + str(round(st_slope,2)) + "x + " + str(round(st_int,2))
plt.annotate(st_line,(-50,95),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {st_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/SouthernLat_Temp.png")
plt.show()

Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
nh_slope, nh_int, nh_r, nh_p, nh_std_err = stats.linregress(n_lat, n_hum)

# Create equation of line to calculate predicted temperature
nh_fit = nh_slope * n_lat + nh_int

#plot the scatter plot and regression line:
plt.scatter(n_lat,n_hum)
plt.plot(n_lat,nh_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Humidity')

#reflect the equation on the graph:
nh_line = "y = " + str(round(nh_slope,2)) + "x + " + str(round(nh_int,2))
plt.annotate(nh_line,(45,25),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {nh_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/NorthernHum_Temp.png")
plt.show()

Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
sh_slope, sh_int, sh_r, sh_p, sh_std_err = stats.linregress(s_lat, s_hum)

# Create equation of line to calculate predicted temperature
sh_fit = sh_slope * s_lat + sh_int

#plot the scatter plot and regression line:
plt.scatter(s_lat,s_hum)
plt.plot(s_lat,sh_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Humidity')

#reflect the equation on the graph:
sh_line = "y = " + str(round(sh_slope,2)) + "x + " + str(round(sh_int,2))
plt.annotate(sh_line,(-50,20),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {sh_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/SouthernHum_Temp.png")
plt.show()

Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
nc_slope, nc_int, nc_r, nc_p, nc_std_err = stats.linregress(n_lat, n_cloud)

# Create equation of line to calculate predicted temperature
nc_fit = nc_slope * n_lat + nc_int

#plot the scatter plot and regression line:
plt.scatter(n_lat,n_cloud)
plt.plot(n_lat,nc_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Cloudiness')

#reflect the equation on the graph:
nc_line = "y = " + str(round(nc_slope,2)) + "x + " + str(round(nc_int,2))
plt.annotate(nc_line,(45,25),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {nc_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/NorthernCloud_Temp.png")
plt.show()

Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
sc_slope, sc_int, sc_r, sc_p, sc_std_err = stats.linregress(s_lat, s_cloud)

# Create equation of line to calculate predicted temperature
sc_fit = sc_slope * s_lat + sc_int

#plot the scatter plot and regression line:
plt.scatter(s_lat,s_cloud)
plt.plot(s_lat,sc_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Cloudiness')

#reflect the equation on the graph:
sc_line = "y = " + str(round(sc_slope,2)) + "x + " + str(round(sc_int,2))
plt.annotate(sc_line,(-50,40),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {sc_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/SouthernCloud_Temp.png")
plt.show()

Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
nw_slope, nw_int, nw_r, nw_p, nw_std_err = stats.linregress(n_lat, n_wind)

# Create equation of line to calculate predicted temperature
nw_fit = nw_slope * n_lat + nw_int

#plot the scatter plot and regression line:
plt.scatter(n_lat,n_wind)
plt.plot(n_lat,nw_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Wind Speed')

#reflect the equation on the graph:
nw_line = "y = " + str(round(nw_slope,2)) + "x + " + str(round(nw_int,2))
plt.annotate(nw_line,(45,25),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {nw_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/NorthernWind_Temp.png")
plt.show()

Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#calculate the linear regression:
sw_slope, sw_int, sw_r, sw_p, sw_std_err = stats.linregress(s_lat, s_wind)

# Create equation of line to calculate predicted temperature
sw_fit = sw_slope * s_lat + sw_int

#plot the scatter plot and regression line:
plt.scatter(s_lat,s_wind)
plt.plot(s_lat,sw_fit,"r--")
plt.xlabel('Lat')
plt.ylabel('Wind Speed')

#reflect the equation on the graph:
sw_line = "y = " + str(round(sw_slope,2)) + "x + " + str(round(sw_int,2))
plt.annotate(sw_line,(-50,20),fontsize=15,color="red")

#display the r-value:
print(f"The r-value is: {sw_r**2}")

#save .png and display graph:
plt.savefig("../Output_Data/SouthernWind_Temp.png")
plt.show()