# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

# Observations
-----

### 1. There is a relatively strong negative correlation (r = 0.71) between maximum temperature and latitude for the northern hemisphere. In other words, the further the city is from the equator in the northern hemisphere, the lower the maximum temperature is. 

### 2. There is a weaker positive correlation (r = 0.49) between maximum temperature and latitude for the southern hemisphere. 

### 3. There does not appear to be much of a relationship betwen latitude and cloudiness. In other words, city latitude does not necessarily predict cloudiness. A similar observation can be made between latitude and wind speed. 

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Base url and save config information
url = "http://api.openweathermap.org/data/2.5/weather?"
city = "Chicago"
units = "Imperial"

In [None]:
# Build query url
query_url = f"{url}appid={weather_api_key}&q={city}&units{units}"

# Preview data pull
weather_response=requests.get(query_url).json()
weather_response

In [None]:
# Create lists to hold respoonse data
city_names = []
latitudes = []
longitudes = []
max_temps = []
humidity_levels = []
cloudiness = []
wind_speeds = []
countries = []
dates = []

# Set the record counter
record = 1

# Opening print statements
print(f"Beginning Data Retrieval")
print(f"----------------------------")

# Loop through the list of cities and perform data request for each
# Include try/except function for cities with unavailable data
for city in cities:
    try:
        query_url = url + "appid=" + weather_api_key + "&q=" + city + "&units=" + units
    
        weather_response = requests.get(query_url).json()
    
        city_name = weather_response["name"]
        latitude = weather_response["coord"]["lat"]
        longitude = weather_response["coord"]["lon"]
        max_temp = weather_response["main"]["temp_max"]
        humidity = weather_response["main"]["humidity"]
        clouds = weather_response["clouds"]["all"]
        wind_speed = weather_response["wind"]["speed"]
        country = weather_response["sys"]["country"]
        date = weather_response["dt"]
        city_record = weather_response["name"]
    
    
        city_names.append(city_name)
        latitudes.append(latitude)
        longitudes.append(longitude)
        max_temps.append(max_temp)
        humidity_levels.append(humidity)
        cloudiness.append(clouds)
        wind_speeds.append(wind_speed)
        countries.append(country)
        dates.append(date)
        
        print(f"Processing Record {record} | {city_record}")
        
        # Increase record counter by 1
        record = record + 1
        
        # Timer!!
        time.sleep(1)
        
    except:
        print("City not found. Skipping...")
    continue
    
# Closing print statements
print(f"---------------------------------")
print(f"Data Retrieval Complete")
print(f"---------------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create weather dictionary with generated lists
weather_dict = {"City": city_names,
               "Lat": latitudes,
               "Lon": longitudes,
               "Max Temp": max_temps,
               "Humidity": humidity_levels,
               "Cloudiness": cloudiness,
               "Wind Speed": wind_speeds,
               "Country": countries,
               "Dates": dates}

# Create weather dataframe from dictionary and display data count to ensure captured correctly
weather_df = pd.DataFrame(weather_dict)
weather_df.count()

In [None]:
# Display weather dataframe
weather_df.describe()

In [None]:
# Export city weather data into a .csv
weather_df.to_csv("../output_data/cities_weather_output_final.csv", index=False)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
index = weather_df.index
condition = weather_df["Humidity"] > 100
humidity_outlier_indices = index[condition]

humidity_outlier_indices_list = humidity_outlier_indices.tolist()
print(humidity_outlier_indices_list)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
import datetime 
date = datetime.datetime.now()
print(dir(datetime))
current_date = ("(" + str(date.month) + "/" + str(date.day) + "/" + str(date.year) + ")")
current_date

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Max Temp"], marker="o")

plt.title("City Latitude vs. Max Temperature (F)" + " " + current_date)
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")

plt.savefig("../output_images/Latitude vs. Temperature.png")

plt.show()

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Humidity"])

plt.title("City Latitude vs. Humidity" + " " + current_date)
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")

plt.savefig("../output_images/Latitude vs. Humidity.png")

plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Cloudiness"])

plt.title("City Latitude vs. Cloudiness" + " " + current_date)
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")

plt.savefig("../output_images/Latitude vs. Cloudiness.png")

plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Wind Speed"])

plt.title("City Latitude vs. Wind Speed" + " " + current_date)
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")

plt.savefig("../output_images/Latitude vs. Wind Speed.png")

plt.show()

## Linear Regression

In [None]:
# Create conditional regression variables dataframe
northern_hemisphere = weather_df.loc[weather_df["Lat"] >= 0]

southern_hemisphere = weather_df[weather_df["Lat"] < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Max Temp"]

# Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(6,10), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Northern Hemisphere - Max Temp vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")

print(f"The r-value is: {rvalue**2}")

plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Max Temp"]

# Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(-30,40), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Southern Hemisphere - Max Temp vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")

print(f"The r-value is: {rvalue**2}")

plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Humidity"]

# Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(40,10), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Northern Hemisphere - Humidity vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

print(f"The r-value is: {rvalue**2}")

plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Humidity"]

# Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(-55, 18), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Southern Hemisphere - Humidity vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

print(f"The r-value is: {rvalue**2}")

plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Cloudiness"]

# Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(30,50), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Northern Hemisphere - Cloudiness vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

print(f"The r-value is: {rvalue**2}")

plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Cloudiness"]

# Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(-55, 12), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Southern Hemisphere - Cloudiness vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

print(f"The r-value is: {rvalue**2}")

plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Wind Speed"]

#Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(0,26), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Northern Hemisphere - Wind Speed vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

print(f"The r-value is: {rvalue**2}")

plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Wind Speed"]

# Add the linear regression equation and line to scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line = "y = "+ str(round(slope, 2)) + "x + " + str(round(intercept,2))

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line,(-55, 23), fontsize=15, color="r")

# Create plot title, labels and print r-value
plt.title("Southern Hemisphere - Wind Speed vs. Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

print(f"The r-value is: {rvalue**2}")

plt.show()