In [72]:
# Dependencies
import requests
import json
from citipy import citipy
import numpy as np
from api_keys import api_key
import matplotlib.pyplot as plt
import pandas as pd
from pprint import pprint
import datetime as dt

csv_save = "output_data/cities.csv"
lat_range = (-90, 90)
lng_range = (-180, 180)

In [73]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={api_key}&units={units}&q="

In [74]:
response = requests.get(url)

print(response.url)


http://api.openweathermap.org/data/2.5/weather


In [75]:
cities = []
lat_lngs = []

lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    if city not in cities:
        cities.append(city)
        
len(cities)


589

In [None]:
city_name = []
cloudiness = []
country = []
date = []
humidity = []
lat = []
lng = []
max_temp = []
wind_speed = []

count = 1

print(f"Beginning Data Retrieval")   
print("-"*60)

for city in cities:
    try:
        response = requests.get(query_url + city).json()
        city_name.append(response["name"])
        cloudiness.append(response["clouds"]["all"])
        country.append(response["sys"]["country"])
        date.append(response["dt"])
        humidity.append(response["main"]["humidity"])
        max_temp.append(response["main"]["temp_max"])
        lat.append(response["coord"]["lat"])
        lng.append(response["coord"]["lon"])
        wind_speed.append(response["wind"]["speed"])
        
        print(f"Processing Record {count} | {city}") 
        
        count += 1
   
    except:
        print(f"City not found. Skipping...")
        continue
print("-"*60)
print(f"Data Retrieval Complete")   
print("-"*60)


Beginning Data Retrieval
------------------------------------------------------------
Processing Record 1 | albany
Processing Record 2 | kapaa
Processing Record 3 | roald
Processing Record 4 | iqaluit
Processing Record 5 | zhigansk
Processing Record 6 | huangnihe
Processing Record 7 | jalu
Processing Record 8 | vicuna
Processing Record 9 | haines junction
Processing Record 10 | hithadhoo
Processing Record 11 | vaini
Processing Record 12 | hermanus
Processing Record 13 | provideniya
City not found. Skipping...
Processing Record 14 | torbay
Processing Record 15 | busselton
Processing Record 16 | sechura
Processing Record 17 | mangrol
Processing Record 18 | taltal
Processing Record 19 | bud
Processing Record 20 | victoria
Processing Record 21 | petropavlovsk-kamchatskiy
Processing Record 22 | helong
Processing Record 23 | cidreira
Processing Record 24 | huarmey
Processing Record 25 | jurado
Processing Record 26 | faanui
Processing Record 27 | sahaspur
Processing Record 28 | hilo
Processin

In [None]:
weather_dict = {
    "City": city_name,
    "Cloudiness":cloudiness, 
    "Country":country,
    "Date":date, 
    "Humidity": humidity,
    "Lat":lat, 
    "Lng":lng, 
    "Max Temp": max_temp,
    "Wind Speed":wind_speed
}

# Create df 
weather_data = pd.DataFrame(weather_dict)

# Display count
weather_data.count()


In [None]:
weather_data.to_csv('output_data.csv')

weather_data.head()

In [None]:
date = dt.date.today().strftime("%m/%d/%Y")

plt.scatter(weather_data["Lat"], weather_data["Max Temp"], marker="o",s=20,alpha=.75, edgecolors="k")
plt.title(f"City Latitude vs. Max Temperature {date}")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

plt.savefig("City Latitude vs Max Temperature.png")
plt.show()

In [None]:
plt.scatter(weather_data["Lat"], weather_data["Humidity"], marker="o", s=20,alpha=.75, edgecolors="k")
plt.title(f"City Latitude vs. Humidity {date}")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

plt.savefig("City Latitude vs Humidity.png")
plt.show()

In [None]:
plt.scatter(weather_data["Lat"], weather_data["Cloudiness"], marker="o", s=20,alpha=.75, edgecolors="k")
plt.title(f"City Latitude vs. Cloudiness {date}")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

plt.savefig("City Latitude vs Cloudiness.png")
plt.show()

In [None]:
plt.scatter(weather_data["Lat"], weather_data["Wind Speed"], marker="o", s=20,alpha=.75, edgecolors="k")
plt.title(f"City Latitude vs. Wind Speed {date}")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

plt.savefig("City Latitude vs Wind Speed.png")
plt.show()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
weather_data["Humidity"].value_counts()
print( f"There are no cities with humidity above 100%")

In [None]:
north_hemi_df = weather_data.loc[(weather_data['Lat']>0)]
south_hemi_df = weather_data.loc[(weather_data['Lat']<0)]

In [None]:
south_hemi_df.count()

In [None]:
north_hemi_df.count()

In [None]:
from scipy.stats import linregress
def line_regr(x, y,yaxis):
    
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
    y_pred = intercept + slope*x
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

    # Plot
    plt.scatter(x,y)
    plt.plot(x,y_pred,"r-")
    plt.xlabel('Latitude')
    plt.ylabel(yaxis)
    print(f"r-squared: {rvalue}")
    plt.show()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x = north_hemi_df['Lat']
y = north_hemi_df['Max Temp']
line_regr(x,y,'Max Temp')

plt.savefig("reg_north_temp_vs_lat.png")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x = south_hemi_df['Lat']
y = south_hemi_df['Max Temp']
line_regr(x,y,'Max Temp')

plt.savefig("reg_south_temp_vs_lat.png")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x = north_hemi_df['Lat']
y = north_hemi_df['Humidity']
line_regr(x,y,'Humidity')

plt.savefig("reg_north_hum_vs_lat.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x = south_hemi_df['Lat']
y = south_hemi_df['Humidity']
line_regr(x,y,'Humidity')

plt.savefig("reg_south_hum_vs_lat.png")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x = north_hemi_df['Lat']
y = north_hemi_df['Cloudiness']
line_regr(x,y,'Cloudiness')

plt.savefig("reg_north_cloud_vs_lat.png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x = south_hemi_df['Lat']
y = south_hemi_df['Cloudiness']
line_regr(x,y,'Cloudiness')

plt.savefig("reg_south_cloud_vs_lat.png")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x = north_hemi_df['Lat']
y = north_hemi_df['Wind Speed']
line_regr(x,y,'Wind Speed (mph)')

plt.savefig("reg_north_wind_vs_lat.png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x = south_hemi_df['Lat']
y = south_hemi_df['Wind Speed']
line_regr(x,y,'Wind Speed (mph)')

plt.savefig("reg_south_wind_vs_lat.png")