# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [48]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from datetime import date
from scipy.stats import linregress
from requests.exceptions import MissingSchema
import json

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
# need to install citipy [pip install citipy via terminal]
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [49]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)
cities

['atuona',
 'tarudant',
 'barrow',
 'paamiut',
 'mar del plata',
 'taolanaro',
 'port elizabeth',
 'novorudnyy',
 'lossiemouth',
 'chuy',
 'chilliwack',
 'hasaki',
 'puerto ayora',
 'venice',
 'adrar',
 'longyearbyen',
 'tilichiki',
 'rikitea',
 'ambilobe',
 'tahta',
 'avarua',
 'tieling',
 'tombouctou',
 'east london',
 'bukama',
 'north branch',
 'provideniya',
 'pevek',
 'saint-pierre',
 'bredasdorp',
 'itoman',
 'hermanus',
 'ushuaia',
 'tank',
 'hofn',
 'avera',
 'jiddah',
 'georgetown',
 'kavieng',
 'butaritari',
 'khatanga',
 'mys shmidta',
 'lebu',
 'necochea',
 'hobart',
 'belushya guba',
 'yellowknife',
 'boca do acre',
 'komsomolskiy',
 'cagayan de tawi-tawi',
 'qaanaaq',
 'punta arenas',
 'port alfred',
 'xining',
 'busselton',
 'albany',
 'hinton',
 'saint-philippe',
 'attawapiskat',
 'hornepayne',
 'muros',
 'paradwip',
 'kahului',
 'palabuhanratu',
 'nanzhang',
 'tasiilaq',
 'cape town',
 'cap malheureux',
 'samusu',
 'vardo',
 'ancud',
 'bluff',
 'lensk',
 'vaini',
 'ch

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [50]:
# base url:
units = 'imperial'
base_url = f"api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units={units}&q="

# query_url = f"base_url {city} &appid= {weather_api_key}"

# create lists for dataFrame
city_name = []
city_lat = []
city_lang = []
city_cloud = []
city_temp = []
city_maxtemp = []
city_humidity = []
city_wind_speed = []
city_country = []
date = []
# set couter for city and set
item = 1
batch = 1

# begin to display data
print('Beginning data retrival...')
# loop thourgh list of citys to request data for each city
for city in cities:
    query_url = base_url + city
    response = requests.get(query_url).json()
    try:
        print(f"Processing record {item} of set {batch} | {city}")
        city_name.append(response['name'])
        city_lat.append(response['coord']['lat'])
        city_lang.append(response['coord']['lon'])
        city_cloud.append(response['clouds']['all'])
        city_temp.append(response['main']['temp'])
        city_maxtemp.append(response['main']['temp_max'])
        city_humidity.append(response['main']['humidity'])
        city_wind_speed.append(response['wind']['speed'])
        date.append(response['dt'])
        
        item += 1
        # limit # of api calls
        if item > 50:
              batch += 1
              # reset item
              item = 1
              time.sleep(2)
    except:
        print(f"City {city} not found. Skipping...")


Beginning data retrival...


MissingSchema: Invalid URL 'api.openweathermap.org/data/2.5/weather?appid=6cdacdddfba572011f9a399c3e340102&units=imperial&q=atuona': No schema supplied. Perhaps you meant http://api.openweathermap.org/data/2.5/weather?appid=6cdacdddfba572011f9a399c3e340102&units=imperial&q=atuona?

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [51]:
# create df from retriaved data
city_df = pd.DataFrame({'City':city_name,
                        'Country':city_country,
                        'Date':date,
                        'Latitude':city_lats,
                        'Longitude':city_lngs,
                        'Cloudiness':city_clouds,
                        'Max Temp (f)':city_maxtemp,
                        'Humidity (%)':city_humidity,
                        'Wind speed (mph)':city_wind_speed
                        })
city_df
# convert times using pd.to_datetime


NameError: name 'city_lats' is not defined

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [52]:
# create lat vs temp scatter plot
plt.scatter(city_df['Lat'],city_df['Max Temp'], facecolor = 'blue', edgecolor = 'black')
# set title and labels:
today = date.today()
plt.title('Citi Latitude vs. max temperature {today}')
plt.xlabel('Latitude')
plt.ylabel('Max Temp (F)')
plt.grid(linestyle='-', linewidth=1, alpha = 0.75)
# export fig 
plt.savfig('../output_data/city_lat_vs_temp.jpg')

KeyError: 'Lat'

## Latitude vs. Humidity Plot

In [53]:
# create lat vs humidity scatter plot
plt.scatter(city_df['Lat'],city_df['Humidity'], facecolor = 'blue', edgecolor = 'black')
# set title and labels:
today = date.today()
plt.title('Citi Latitude vs. Humidity {today}')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.grid(linestyle='-', linewidth=1, alpha = 0.75)
# export fig 
plt.savfig('../output_data/city_lat_vs_Humidity.jpg')

KeyError: 'Lat'

## Latitude vs. Cloudiness Plot

In [None]:
# create lat vs humidity scatter plot
plt.scatter(city_df['Lat'],city_df['cloudiness'], facecolor = 'blue', edgecolor = 'black')
# set title and labels:
today = date.today()
plt.title('Citi Latitude vs. cloudiness {today}')
plt.xlabel('Latitude')
plt.ylabel('cloudiness (%)')
plt.grid(linestyle='-', linewidth=1, alpha = 0.75)
# export fig 
plt.savfig('../output_data/city_lat_vs_cloudiness.jpg')

## Latitude vs. Wind Speed Plot

In [None]:
# create lat vs humidity scatter plot
plt.scatter(city_df['Lat'],city_df['Wind Speed'], facecolor = 'blue', edgecolor = 'black')
# set title and labels:
today = date.today()
plt.title('Citi Latitude vs. wind speed (mph) {today}')
plt.xlabel('Latitude')
plt.ylabel('wind speed (mph)')
plt.grid(linestyle='-', linewidth=1, alpha = 0.75)
# export fig 
plt.savfig('../output_data/city_lat_vs_wind.jpg')

## Linear Regression

In [None]:
# create northern and southern hemisphere df
x_values = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values = city_df.loc[city_df['Lat'] < 0, 'Max Temp']

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# calc line_eq for northern hemisphere max temp vs lat
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.annotate(line_eq, (0,-20), color='red')
# set title and lables
plt.title(f"Norther Hemisphere: City Latitude vs Max Temp {today}")
plt.xlabel('Latitude')
plt.ylabel('Max Temp (f)')
plt.show()
print(f"The r-value is {rvalue}")


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# calc line_eq for souther hemisphere max temp vs lat
x_values_s = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values_s = city_df.loc[city_df['Lat'] < 0, 'Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values_s, y_values_s)
regress_values_s = x_values * slope + intercept
line_eq_s = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values_s, y_values_s)
plt.plot(x_values, regress_values, color='red')
plt.annotate(line_eq_s, (0,-20), color='red')
# set title and lables
plt.title(f"Southern Hemisphere: City Latitude vs Max Temp {today}")
plt.xlabel('Latitude')
plt.ylabel('Max Temp (f)')
plt.show()
print(f"The r-value is {rvalue}")


####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# calc line_eq for norther hemisphere; humidity vs latitude -- hn = huminidy northern
x_values_hn = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values_hn = city_df.loc[city_df['Lat'] < 0, 'Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values_hn, y_values_hn)
regress_values_hn = x_values * slope + intercept
line_eq_hn = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values_hn, y_values_hn)
plt.plot(x_values, regress_values_hn, color='red')
plt.annotate(line_eq_hn, (0,-20), color='red')
# set title and lables
plt.title(f"Nother Hemisphere: City Latitude vs Humidity {today}")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.show()
print(f"The r-value is {rvalue}")


####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# calc line_eq for souther hemisphere; humidity vs latitude -- hs = huminidy southern hemisphere
x_values_hs = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values_hs = city_df.loc[city_df['Lat'] < 0, 'Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values_hs, y_values_hs)
regress_values_hs = x_values * slope + intercept
line_eq_hn = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values_hs, y_values_hs)
plt.plot(x_values_hs, regress_values_hs, color='red')
plt.annotate(line_eq_hs, (0,-20), color='red')
# set title and lables
plt.title(f"Nother Hemisphere: City Latitude vs Humidity {today}")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.show()
print(f"The r-value is {rvalue}")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# calc line_eq for norther hemisphere; cloudiness vs latitude -- cn = cloudiness northern
x_values_cn = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values_cn = city_df.loc[city_df['Lat'] < 0, 'Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values_cn, y_values_cn)
regress_values_cn = x_values * slope + intercept
line_eq_cn = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values_cn, y_values_cn)
plt.plot(x_values, regress_values_cn, color='red')
plt.annotate(line_eq_cn, (0,-20), color='red')
# set title and lables
plt.title(f"Nother Hemisphere: City Latitude vs Cloudiness {today}")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.show()
print(f"The r-value is {rvalue}")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# calc line_eq for norther hemisphere; cloudiness vs latitude -- cs = cloudiness southern
x_values_cs = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values_cs = city_df.loc[city_df['Lat'] < 0, 'Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values_cs, y_values_cs)
regress_values_cs = x_values * slope + intercept
line_eq_cs = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values_cs, y_values_cs)
plt.plot(x_values, regress_values_cs, color='red')
plt.annotate(line_eq_cs, (0,-20), color='red')
# set title and lables
plt.title(f"Nother Hemisphere: City Latitude vs Cloudiness {today}")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.show()
print(f"The r-value is {rvalue}")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# calc line_eq for norther hemisphere; wind speed vs latitude -- wn = wind speed  northern
x_values_wn = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values_wn = city_df.loc[city_df['Lat'] < 0, 'Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values_wn, y_values_wn)
regress_values_wn = x_values * slope + intercept
line_eq_wn = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values_wn, y_values_wn)
plt.plot(x_values, regress_values_wn, color='red')
plt.annotate(line_eq_wn, (0,-20), color='red')
# set title and lables
plt.title(f"Nother Hemisphere: City Latitude vs Wind speed {today}")
plt.xlabel('Latitude')
plt.ylabel('Wind speed (mph)')
plt.show()
print(f"The r-value is {rvalue}")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# calc line_eq for norther hemisphere; wind speed vs latitude -- ws = wind speed  southern
x_values_ws = city_df.loc[city_df['Lat'] >= 0, 'Lat']
y_values_ws = city_df.loc[city_df['Lat'] < 0, 'Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values_ws, y_values_ws)
regress_values_ws = x_values * slope + intercept
line_eq_ws = f"y = {str(round(slope, 2))} x + {str(round(intercept,2))}"
# plot chart
plt.scatter(x_values_ws, y_values_ws)
plt.plot(x_values, regress_values_ws, color='red')
plt.annotate(line_eq_ws, (0,-20), color='red')
# set title and lables
plt.title(f"Nother Hemisphere: City Latitude vs Wind speed {today}")
plt.xlabel('Latitude')
plt.ylabel('Wind speed (mph)')
plt.show()
print(f"The r-value is {rvalue}")