In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
import json


# Import API key
from api_keys import api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy


# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Generate Cities List¶

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

630

# Perform API Calls¶
#Perform a weather check on each city using a series of successive API calls.
#Include a print log of each city as it'sbeing processed (with the city number and city name).

In [None]:
#Get the Weather data
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"
query_url = f"{url}appid={api_key}&units={units}&q="

weather_response = requests.get(query_url + city)
weather_json = weather_response.json()

#print(json.dumps(weather_json, indent=4))
#print(requests.get(query_url+city))

# Set Up Lists to Hold Reponse Info
city_name = []
latitude= []
longitude = []
max_temperature = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

# Processing Record Counter Starting a 1
processing_record = 1

# Print Starting Log Statement
print(f"Beginning Data Retrieval")
print(f"-------------------------------")

# Loop Through List of Cities & Perform a Request for Data on Each
for city in cities:
   
     
# Exception Handling
    try:
        response = requests.get(query_url+ city).json()
        city_name.append(response["name"])
        latitude.append(response["coord"]["lat"])
        longitude.append(response["coord"]["lon"])
        max_temperature.append(response["main"]["temp_max"])
        humidity.append(response["main"]["humidity"])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        country.append(response["sys"]["country"])
        date.append(pd.to_datetime(response["dt"],unit='s'))
        current_city = response["name"]
        print(f"Processing Record {processing_record}| {current_city}")
    
        # Increase Processing Record Counter by 1 For Each Loop
        processing_record += 1
          
    except:
        print("City not found next ...") 
    continue

# Print Ending Log Statement
print(f"-------------------------------")
print(f"Data Retrieval Complete")
print(f"-------------------------------")

Beginning Data Retrieval
-------------------------------
Processing Record 1| Saskylakh
Processing Record 2| Casa Nova
Processing Record 3| Tuktoyaktuk
Processing Record 4| Bud
Processing Record 5| Mbabane
Processing Record 6| Nishihara
Processing Record 7| Bonavista
Processing Record 8| Ungaran
Processing Record 9| Rikitea
Processing Record 10| Kapaa
Processing Record 11| Kaitangata
Processing Record 12| Dongli
Processing Record 13| Tasiilaq
Processing Record 14| Port Alfred
Processing Record 15| Albany
Processing Record 16| San Patricio
Processing Record 17| Cape Town
Processing Record 18| Laguna
Processing Record 19| Busselton
Processing Record 20| Lavrentiya
Processing Record 21| Buta
Processing Record 22| Caucaia
Processing Record 23| Iqaluit
Processing Record 24| Saint Paul Harbor
Processing Record 25| Kadaň
Processing Record 26| Nizhnyaya Tavda
Processing Record 27| Lianzhou
Processing Record 28| Bunia
Processing Record 29| New Norfolk
Processing Record 30| Hermanus
Processing R

Processing Record 238| Ayan
Processing Record 239| Enid
Processing Record 240| Mossamedes
Processing Record 241| Bima
Processing Record 242| Kruisfontein
Processing Record 243| Batemans Bay
Processing Record 244| Mīzan Teferī
Processing Record 245| Ponta Delgada
Processing Record 246| Cururupu
Processing Record 247| Owatonna
Processing Record 248| Longyearbyen
Processing Record 249| Taseyevo
Processing Record 250| Nuuk
Processing Record 251| Vila do Maio
City not found next ...
Processing Record 252| Aasiaat
Processing Record 253| Evensk
Processing Record 254| Muli
City not found next ...
Processing Record 255| Henties Bay
Processing Record 256| Geraldton
City not found next ...
Processing Record 257| Davila
Processing Record 258| Vanimo
Processing Record 259| Tadine
Processing Record 260| Olinda
Processing Record 261| Talnakh
City not found next ...
Processing Record 262| Hearst
City not found next ...
Processing Record 263| Desaguadero
Processing Record 264| Puerto Ayora
Processing R

# Convert Raw Data to DataFrame¶
#Export the city data into a .csv.
#Display the DataFrame

In [7]:
weather_df = pd.DataFrame({"City":city_name, "Latitude":latitude, "Longitude":longitude,
    "Maximum Temperature": max_temperature,"Humidity %":humidity, "Cloudiness %": cloudiness
   ,"Wind Speed(mph)": wind_speed,"Country":country,"Date":date})

weather_df

Unnamed: 0,City,Latitude,Longitude,Maximum Temperature,Humidity %,Cloudiness %,Wind Speed(mph),Country,Date
0,Ancud,-41.87,-73.82,13.00,62,5,5.70,CL,2020-11-08 00:47:51
1,Ushuaia,-54.80,-68.30,14.00,47,40,9.30,AR,2020-11-08 00:47:38
2,Hermanus,-34.42,19.23,13.89,86,91,2.24,ZA,2020-11-08 00:47:59
3,Ribeira Grande,38.52,-28.70,16.36,72,8,5.96,PT,2020-11-08 00:43:56
4,Forestville,38.85,-76.88,17.22,81,1,1.50,US,2020-11-08 00:47:59
...,...,...,...,...,...,...,...,...,...
564,Wendo,6.60,38.42,14.37,87,81,1.61,ET,2020-11-08 00:51:06
565,Pundaguitan,6.37,126.17,27.90,76,56,2.13,PH,2020-11-08 00:51:06
566,Aquiraz,-3.90,-38.39,27.00,78,20,3.60,BR,2020-11-08 00:51:05
567,Makakilo City,21.35,-158.09,31.00,55,40,3.10,US,2020-11-08 00:51:06


In [10]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"
weather_df.to_csv(output_data_file)

FileNotFoundError: [Errno 2] No such file or directory: '../../output_data/cities.csv'

# Inspect the data and remove the cities where the humidity > 100%.¶
# Skip this step if there are no cities that have humidity > 100%.

In [None]:
weather_humidity_inspected = weather_df.loc[weather_df["Humidity %"] <= 100 , :]
weather_humidity_inspected

In [None]:
#  Get the indices of cities that have humidity over 100%.
weather_high_humidity = weather_df.loc[weather_df["Humidity %"] > 100 , :]
indices_high_humidity = weather_high_humidity.index.values
print(indices_high_humidity)

In [None]:

# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
weather_df.drop (indices_high_humidity, inplace=True)
weather_df

# Plotting the Data¶
#Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
#Save the plotted figures as .pngs

# Latitude vs. Temperature Plot

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties
from datetime import datetime
plt.scatter(weather_df["Latitude"],weather_df["Maximum Temperature"], facecolors="red", marker="o", edgecolor="black")
plt.title(f"Latitude vs. Temperature on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Maximum Temperature")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/City_Latitude_vs_Max_Temperature.png")

plt.show()

# Latitude vs. Humidity Plot

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

plt.scatter(weather_df["Latitude"],weather_df["Humidity %"],facecolors="blue", marker="o", edgecolor="black")
plt.title(f"Latitude vs. Humidity on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Humididty")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/City_Latitude_vs_Humidity.png")


plt.show()

# Latitude vs. Cloudiness Plot

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

plt.scatter(weather_df["Latitude"],weather_df["Cloudiness %"],facecolors="green", marker="o", edgecolor="black")
plt.title(f"Latitude vs. Cloudiness on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.grid(True)
# Save Figure
plt.savefig("../output_data/City_Latitude_vs_Cloudiness.png")


plt.show()

# Latitude vs. Wind Speed Plot

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

plt.scatter(weather_df["Latitude"],weather_df["Wind Speed(mph)"],facecolors="purple", marker="o", edgecolor="black")
plt.title(f"Latitude vs. Wind Speed on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/City_Latitude_vs_Wind_Speed.png")


plt.show()

# Linear Regression

# Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

#Extraction of Northern Hemisphere Data
northern_hemisphere = weather_df.loc[weather_df["Latitude"]>0]
x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Maximum Temperature"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")
plt.title(f"Northern Hemisphere Max Temp vs. Latitude Linear Regression on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Maximum Temperaure (C)")
plt.grid(True)


# Save Figure
plt.savefig("../output_data/Northern Hemisphere - Max Temp vs. Latitude Linear Regression.png")

print(line_eq)
plt.show()

# Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

#Extraction of Northern Hemisphere Data
southern_hemisphere = weather_df.loc[weather_df["Latitude"]<0]
x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Maximum Temperature"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")
plt.title(f"Southern Hemisphere - Max Temp vs. Latitude Linear Regression on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Maximum Temperaure (C)")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/southern Hemisphere - Max Temp vs. Latitude Linear Regression.png")


print(line_eq)
plt.show()

# Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

y_values = northern_hemisphere["Humidity %"]
x_values =  northern_hemisphere["Latitude"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")
plt.title(f"Northern Hemisphere Humidity (%) vs. Latitude Linear Regression on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Humidity(%)")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression.png")

print(line_eq)
plt.show()

# Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

y_values = southern_hemisphere["Humidity %"]
x_values =  southern_hemisphere["Latitude"] 

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")
plt.title(f"Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Humidity(%)")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression.png")

print(line_eq)
plt.show()

# Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

y_values = northern_hemisphere["Cloudiness %"]
x_values =  northern_hemisphere["Latitude"] 

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")
plt.title(f"Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%)")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression.png")

print(line_eq)
plt.show()

# Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Build Scatter Plot for Each Data Type
# Incorporate Other Graph Properties

y_values = southern_hemisphere["Cloudiness %"]
x_values = southern_hemisphere["Latitude"] 

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")


plt.title(f"Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%)")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression.png")

print(line_eq)
plt.show()

# Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
y_values = northern_hemisphere["Wind Speed(mph)"]
x_values = northern_hemisphere["Latitude"] 

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")


plt.title(f"Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed(mph)")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/Northern Hemisphere - WindSpeed(mph)) vs. Latitude Linear Regression.png")

print(line_eq)
plt.show()

# Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
y_values = southern_hemisphere["Wind Speed(mph)"]
x_values = southern_hemisphere["Latitude"] 

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")


plt.title(f"Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression  on {datetime.today().date()}")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed(mph)")
plt.grid(True)

# Save Figure
plt.savefig("../output_data/Southern Hemisphere - WindSpeed(mph)) vs. Latitude Linear Regression.png")

print(line_eq)
plt.show()