# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = list(zip(lats, lngs))

In [3]:
# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

631

In [4]:
cities

['ushuaia',
 'sonderso',
 'punta arenas',
 'belushya guba',
 'mataura',
 'bluff',
 'naze',
 'severo-kurilsk',
 'skibbereen',
 'los lunas',
 'albany',
 'dikson',
 'yellowknife',
 'asyut',
 'nantucket',
 'ponta do sol',
 'hermanus',
 'lorengau',
 'kharhial',
 'bikaner',
 'atuona',
 'hasaki',
 'nikolskoye',
 'talnakh',
 'mount gambier',
 'sataua',
 'boissevain',
 'hithadhoo',
 'hilo',
 'rikitea',
 'barrow',
 'vaini',
 'tommot',
 'fort saint john',
 'port alfred',
 'khatanga',
 'saldanha',
 'taolanaro',
 'klaksvik',
 'beohari',
 'thunder bay',
 'tuktoyaktuk',
 'mount isa',
 'yining',
 'richards bay',
 'sentyabrskiy',
 'sterling',
 'saint-philippe',
 'saskylakh',
 'kruisfontein',
 'carnarvon',
 'chimore',
 'bredasdorp',
 'novyye lyady',
 'kommunisticheskiy',
 'qaanaaq',
 'taburao',
 'imeni poliny osipenko',
 'georgetown',
 'puerto ayora',
 'butaritari',
 'ossora',
 'piacabucu',
 'illoqqortoormiut',
 'avarua',
 'busselton',
 'kerouane',
 'lancut',
 'hualmay',
 'tecoanapa',
 'hurup',
 'turayf

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [5]:
city_looking_for = 'los angeles'
url = f"https://api.openweathermap.org/data/2.5/weather?q={city_looking_for}&appid={weather_api_key}"

In [6]:
response = requests.get(url).json()
response

{'coord': {'lon': -118.2437, 'lat': 34.0522},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01n'}],
 'base': 'stations',
 'main': {'temp': 298.26,
  'feels_like': 297.98,
  'temp_min': 293.13,
  'temp_max': 305.48,
  'pressure': 1013,
  'humidity': 44},
 'visibility': 10000,
 'wind': {'speed': 4.63, 'deg': 120},
 'clouds': {'all': 0},
 'dt': 1660449053,
 'sys': {'type': 2,
  'id': 2034962,
  'country': 'US',
  'sunrise': 1660396391,
  'sunset': 1660444958},
 'timezone': -25200,
 'id': 5368361,
 'name': 'Los Angeles',
 'cod': 200}

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [7]:
city = response["name"]
lat = response["coord"]["lat"]
lng = response["coord"]["lon"]
max_temp = response["main"]["temp_max"]
humid = response["main"]["humidity"]
cloudiness = response["clouds"]["all"]
wind = response["wind"]["speed"]
country = response["sys"]["country"]
date = response["dt"]

city_weather = {"City":[city],"Lat":[lat],"Lng":[lng],"Max Temp":[max_temp],
                 "Humidity":[humid], "Cloudiness": [cloudiness], "Wind Speed":[wind],
                "Country":[country], "Date":[date]}
pd.DataFrame(city_weather)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Los Angeles,34.0522,-118.2437,305.48,44,0,4.63,US,1660449053


In [None]:
all_cities = []
url = f"https://api.openweathermap.org/data/2.5/weather?units=imperial&appid={weather_api_key}&q="

for c in cities:
    response = requests.get(url + c).json()
    try:
        city = response["name"]
        lat = response["coord"]["lat"]
        lng = response["coord"]["lon"]
        max_temp = response["main"]["temp_max"]
        humid = response["main"]["humidity"]
        cloudiness = response["clouds"]["all"]
        wind = response["wind"]["speed"]
        country = response["sys"]["country"]
        date = response["dt"]

        city_dict = {"City":city,"Lat":lat,"Lng":lng,"Max Temp":max_temp,
                     "Humidity":humid, "Cloudiness": cloudiness, "Wind Speed":wind,
                    "Country":country, "Date":date}
        all_cities.append(city_dict)
        print("Found " + c)
    except Exception as e:
        print("Could not find " + c)

waeather_df = pd.DataFrame(all_cities)
weather_df
    

Found ushuaia
Found sonderso
Found punta arenas
Could not find belushya guba
Found mataura
Found bluff
Found naze
Found severo-kurilsk
Found skibbereen
Found los lunas
Found albany
Found dikson
Found yellowknife
Found asyut
Found nantucket
Found ponta do sol
Found hermanus
Found lorengau
Found kharhial
Found bikaner
Found atuona
Found hasaki
Found nikolskoye
Found talnakh
Found mount gambier
Could not find sataua
Found boissevain
Found hithadhoo
Found hilo
Found rikitea
Found barrow
Found vaini
Found tommot
Could not find fort saint john
Found port alfred
Found khatanga
Found saldanha
Could not find taolanaro
Found klaksvik
Found beohari
Found thunder bay
Found tuktoyaktuk
Found mount isa
Found yining
Found richards bay
Could not find sentyabrskiy
Found sterling
Found saint-philippe
Found saskylakh
Found kruisfontein
Found carnarvon
Found chimore
Found bredasdorp
Found novyye lyady
Found kommunisticheskiy
Found qaanaaq
Could not find taburao
Found imeni poliny osipenko
Found georgetown

In [9]:
waeather_df.to_csv(output_data_file)

NameError: name 'waeather_df' is not defined

In [None]:
output_data = pd.read_csv(output_data_file)
output_data.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
humidity_df = output_data.loc[(output_data["Humidity"] >= 100)]
humidity_df.head(6)

In [None]:
humidity_df.count()

In [None]:
reduced_humidity_df = output_data.loc[(output_data["Humidity"] < 100)]
reduced_humidity_df.head()

In [None]:
reduced_humidity_df.describe()

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

del reduced_humidity_df['Unnamed: 0']
reduced_humidity_df.head()

In [None]:
clean_city_data = reduced_humidity_df

In [None]:
clean_city_data.to_csv("output_data/clean_city_data.csv")

In [None]:
reduced_humidity_df["Humidity"].plot.hist()

Skip this step if there are no cities that have humidity > 100%.

In [None]:
reduced_humidity_df["Humidity"].plot.box(figsize=(10,10))

In [None]:
reduced_humidity_df["Humidity"].describe()

In [None]:
reduced_humidity_df["Humidity"].quantile([.25,.5,.75])

In [None]:
quartiles = reduced_humidity_df["Humidity"].quantile([.25,.5,.75])
lowerq = quartiles[.25]
upperq = quartiles[.75]
iqr = upperq-lowerq
print(lowerq, upperq)
print("INNER QUARTILE RANGE", iqr)

In [None]:
quartiles = reduced_humidity_df["Humidity"].quantile([.25,.5,.75])
lowerq = quartiles[.25]
upperq = quartiles[.75]
iqr = upperq-lowerq

print(f"The lower quartile of reduced_humidity_df.head() is: {lowerq}")
print(f"The upper quartile of reduced_humidity_df.head() is: {upperq}")
print(f"The interquartile range of reduced_humidity_df.head() is: {iqr}")
print(f"The the median of reduced_humidity_df.head() is: {quartiles[.5]} ")

lower_bound = lowerq - (1.5*iqr)
upper_bound = upperq + (1.5*iqr)
print(f"Values below {lower_bound} could be outliers.")
print(f"Values above {upper_bound} could be outliers.")

In [None]:
reduced_humidity_df.loc[reduced_humidity_df["Humidity"] < lower_bound]

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
clean_city_data.plot(kind="scatter", x="Lat", y="Max Temp", grid=True, figsize=(5,5),
              title="Latitute Vs. Max Temperature")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
clean_city_data.plot(kind="scatter", x="Lat", y="Cloudiness", grid=True, figsize=(5,5),
              title="Latitute Vs. Cloudiness")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
clean_city_data.plot(kind="scatter", x="Lat", y="Humidity", grid=True, figsize=(5,5),
              title="Latitute Vs. Cloudiness")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
clean_city_data.plot(kind="scatter", x="Lat", y="Wind Speed", grid=True, figsize=(5,5),
              title="Latitute Vs. Wind Speed")
plt.show()

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
noth_hamisphere = clean_city_data.loc[(clean_city_data["Lat"] >= 0)]
noth_hamisphere.head()

In [None]:
x_values = noth_hamisphere['Lat']
y_values = noth_hamisphere['Max Temp']
plt.scatter(x_values,y_values)
plt.xlabel('Cities')
plt.ylabel('Max Temperature')
plt.show()

In [None]:
x_values = noth_hamisphere['Lat']
y_values = noth_hamisphere['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Max Temperature')
plt.title("Max Temp vs. Latitude")
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
south_hamisphere = clean_city_data.loc[(clean_city_data["Lat"] < 0)]
south_hamisphere.head()

In [None]:
x_values = south_hamisphere['Lat']
y_values = south_hamisphere['Max Temp']
plt.scatter(x_values,y_values)
plt.xlabel('Cities')
plt.ylabel('Max Temperature')
plt.show()

In [None]:
x_values = south_hamisphere['Lat']
y_values = south_hamisphere['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Max Temperature')
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = noth_hamisphere['Lat']
y_values = noth_hamisphere['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Humidity (%)')
plt.title("Humidity (%) vs. Latitude")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = south_hamisphere['Lat']
y_values = south_hamisphere['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Humidity (%)')
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = noth_hamisphere['Lat']
y_values = noth_hamisphere['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Humidity (%)')
plt.title("Cloudiness (%) vs. Latitude")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = south_hamisphere['Lat']
y_values = south_hamisphere['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Cloudiness (%)')
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = noth_hamisphere['Lat']
y_values = noth_hamisphere['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Wind Speed (mph)')
plt.title("Wind Speed (mph) vs. Latitude")
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = south_hamisphere['Lat']
y_values = south_hamisphere['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")
plt.xlabel('Cities')
plt.ylabel('Wind Speed (mph)')
plt.show()