# WeatherPy
----

#### Observations and Insights
* Scatter plot of Latitude Vs Temperature shows that as you move away from equator in either direction the Max Temperature of the palce decresaes
* Humidity tends to be higher as we move away from Equator
* There is a positive correlation between Cloudiness and Latitude
* There is negligible correlation between Windspeed and Latitude

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from datetime import datetime

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Ouput Directory for chart images
output_dir = "../output_data/"

# Output File for weather data (CSV)
output_data_file = "../output_data/cities.csv"


# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Getting date when this notebook was executed
today_date = datetime.today().strftime('%m/%d/%Y')

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Creatint a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(len(cities))

# Creating subset of cities in count of 50
n = 50
def divide_cities(cities, n):
    for i in range(0, len(cities), n):
        yield cities[i:i + n]
cities = list(divide_cities(cities, n))

### Openweathermap API Calls

In [None]:
# Configuring gmaps information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Building partial query URL to be used for data pull request
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# creating list for response results to be stored
citynames = []
lat = []
lon = []
temp_max = []
humidity = []
cloudiness = []
windspeed = []
country = []
date = []

print("Begining Data Retrieval")
print("------------------------------")
    
# looping through cities, making API request and append desired results into respective list
for idx1, city_list in enumerate(cities, start = 1):
    for idx2, city in enumerate(city_list):
        try:
            response = requests.get(query_url + city).json()
            print(f"Processing Record {idx2} of set {idx1} | {city}")
            citynames.append(response['name'])
            lat.append(response['coord']['lat'])
            lon.append(response['coord']['lon'])
            temp_max.append(response['main']['temp_max'])
            humidity.append(response['main']['humidity'])
            cloudiness.append(response['clouds']['all'])
            windspeed.append(response['wind']['speed'])
            country.append(response['sys']['country'])
            date.append(response['dt'])
        except KeyError:
            print("City not found. Skipping...")
    
    
print("------------------------------")
print("Data Retrieval Complete")
print("------------------------------")

### Convert Raw Data to DataFrame

In [None]:
# Building a dataset from the response data saved in data lists.

weather_data = {"City": citynames
                ,"Lat": lat
                ,"Lon": lon
                ,"Max Temp": temp_max
                ,"Humidity": humidity
                ,"Cloudiness": cloudiness
                ,"Windspeed": windspeed
                ,"Country": country
                ,"Date": date
               }

# Converting the dataset into a Pandas Dataframe
weather_data = pd.DataFrame(weather_data)

# Previewing the Dataframe
weather_data.head()

In [None]:
# Describing the dataframe to understand limits in the data
weather_data.describe()

## Inspecting the data and removing the cities where the humidity > 100%.

In [None]:
# Cleaning Weather data from dataframe, where humidity % is greater than 100
clean_weather_data = weather_data[weather_data["Humidity"]<=100.00]

# Describing the dataframe again to check the data limits
clean_weather_data.describe()

In [None]:
# Previewing the Clean data
clean_weather_data.head()

In [None]:
# Naming the index of the dataframe for export to csv
clean_weather_data.index.names = ["City_ID"]

# Exporting the data to CSV file
clean_weather_data.to_csv(output_data_file)

## Plotting the Data

## Latitude vs. Temperature Plot

In [None]:
# Plotting scatter plot for Latitude Vs Temperature from Cleaned Data
plot_chart = plt.scatter(lat, temp_max, marker="o", facecolors="green", alpha=0.75)

# Setting up chart display parameters
plt.title(f"City Latitude vs. Max Temperature ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "Lat Vs Temp Plot.png"
export_chart.savefig(output_dir + file_name)


#### Observations
* Above scatter plot of Latitude Vs Temperature shows that as you move away from equator in either direction the Max Temperature of the palce decresaes

## Latitude vs. Humidity Plot

In [None]:
# Plotting scatter plot for Latitude Vs Humidity from Cleaned Data
plot_chart = plt.scatter(lat, humidity, marker="o", facecolors="green"
            , alpha=0.75)

# Setting up chart display parameters
plt.title(f"City Latitude vs. Humidity ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("humidity (%)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "Lat Vs Humidity Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* Above scatter plot of Latitude Vs Humidity shows that Humidity tends to be higher as we move away from Equator towards Northern Hemisphere

## Latitude vs. Cloudiness Plot

In [None]:
# Plotting scatter plot for Latitude Vs Cloudiness from Cleaned Data
plot_chart = plt.scatter(lat, cloudiness , marker="o", facecolors="green"
            , alpha=0.75)

# Setting up chart display parameters
plt.title(f"City Latitude vs. Cloudiness ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "Lat Vs Cloudiness(%) Plot.png"
export_chart.savefig(output_dir + file_name)


#### Observations
* It is difficult to interprete any relationship between Latitude and cloudiness.

## Latitude vs. Wind Speed Plot

In [None]:
# Plotting scatter plot for Latitude Vs Windspeed from Cleaned Data
plot_chart = plt.scatter(lat, windspeed, marker="o", facecolors="green"
            , alpha=0.75)

# Setting up chart display parameters
plt.title(f"City Latitude vs. Wind Speed ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "Lat Vs Wind Speed Plot.png"
export_chart.savefig(output_dir + file_name)


#### Observations
* There are very few cities with windspeed > 25 mph and those are in Northern Hemisphere

## Linear Regression

In [None]:
# Creating 2 dataframes for Northern and Southern Hemisphere
weather_data_n = clean_weather_data[clean_weather_data["Lat"]>=0.00]
weather_data_s = clean_weather_data[clean_weather_data["Lat"]<0.00]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_n["Lat"]
y_axis = weather_data_n["Max Temp"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(5,15),fontsize=15,color="red")
plt.title(f"Northern Hemisphere - Max Temp vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Max Temprature")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "NH - Lat Vs Temp Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* In Northerhemisphere, as we are moving away from equator, temperature is decreasing. This is supported by the regression equation which shows near 1 slope value.

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_s["Lat"]
y_axis = weather_data_s["Max Temp"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(-55,95),fontsize=15,color="red")
plt.title(f"Southern Hemisphere - Max Temp vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Max Temprature")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "SH - Lat Vs Temp Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* In Southern Hemisphere, as we are moving towards Equator, Temp is increasing. It is mirror of the plot for Northern Hemisphere, due to x axis values. The relationship is strong, but not as strong as for Northern Hemisphere.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_n["Lat"]
y_axis = weather_data_n["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(50,15),fontsize=15,color="red")
plt.title(f"Northern Hemisphere - Humidity(%) vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Humidity(%)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "NH - Lat Vs Humidity(%) Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* There is a slightly positive relationship between Humidity and Latitude. Lower Humidity levels almost disappear at higher latitude values.

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_s["Lat"]
y_axis = weather_data_s["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(-55,15),fontsize=15,color="red")

# Setting Chart Display Parameters
plt.title(f"Southern Hemisphere - Humidity(%) vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Humidity(%)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "SH - Lat Vs Humidity(%) Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* This tren is similar to Northern Hemisphere, however the regressin is almost flat, which suggests there is very low probability of prediction of Humidity based on Latitude.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_n["Lat"]
y_axis = weather_data_n["Cloudiness"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(10,25),fontsize=15,color="red")

# Setting Chart Display Parameters
plt.title(f"Northern Hemisphere - Cloudiness(%) vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "NH - Lat Vs Cloudiness(%) Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* This shows a positive relationship between Latitude and Cloudiness in Northern Hemisphere, that is as we move away from equator, Cloudiness increases.

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_s["Lat"]
y_axis = weather_data_s["Cloudiness"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(-55,30),fontsize=15,color="red")

# Setting Chart Display Parameters
plt.title(f"Southern Hemisphere - Cloudiness(%) vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "SH - Lat Vs Cloudiness(%) Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* Southern Hemisphere following the same trend as Northern Hemisphere, which is with increase in Latitude, there is an increase in Cloudiness.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_n["Lat"]
y_axis = weather_data_n["Windspeed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(5,25),fontsize=15,color="red")

# Setting Chart Display Parameters
plt.title(f"Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "NH - Lat Vs Wind Speed (mph) Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* There is no correlation between Windspeed and Latitude. Slope of line is near 0.

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Setting parameters for Linear Regression
x_axis = weather_data_s["Lat"]
y_axis = weather_data_s["Windspeed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_axis, y_axis)

# Calculating and storing predicted values from regression equation
regress_values = x_axis * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plot_chart = plt.scatter(x_axis,y_axis)
plt.plot(x_axis,regress_values,"r-")
plt.annotate(line_eq,(-50,20),fontsize=15,color="red")

# Setting Chart Display Parameters
plt.title(f"Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression ({today_date})")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.show()

# Exporting chart to output folder in png format
export_chart = plot_chart.get_figure()
file_name = "SH - Lat Vs Wind Speed (mph) Plot.png"
export_chart.savefig(output_dir + file_name)

#### Observations
* Resutls are similar to Norther Hemisphere. There is no correlation between Windspeed and Latitude.