# WeatherPy
----



In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
query_url = f"{url}appid={weather_api_key}&units={units}&q="
city_name = []
cloudiness = []
country = []
date = []
humidity = []
latitude = []
longitude = []
max_temp = []
wind_speed = []

for city in cities:
    
    response = requests.get(query_url + city).json()
    
    try:
        city_name.append(response["name"])
        cloudiness.append(response["clouds"]["all"])
        country.append(response["sys"]["country"])
        date.append(response["dt"])
        humidity.append(response["main"]["humidity"])
        latitude.append(response["coord"]["lat"])
        longitude.append(response["coord"]["lon"])
        max_temp.append(response["main"]["temp_max"])
        wind_speed.append(response["wind"]["speed"])
        print(f"Able to process {city}")

    except:
        print(f"not able to process {city}... skipping")
        pass
    
        
        

In [None]:
dataframe_results = pd.DataFrame({"City Name":city_name,
                                 "Cloudiness":cloudiness,
                                 "Country": country,
                                 "Date": date,
                                 "Humidity": humidity,
                                 "latitude": latitude,
                                 "longitude": longitude,
                                 "Max Temp (fahrenheit)": max_temp,
                                 "Wind Speed": wind_speed})


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
dataframe_results.to_csv(output_data_file)

In [None]:
dataframe_results.head()

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

#### Latitude vs. Temperature Plot

In [None]:
x_plots = dataframe_results["latitude"]
y_plots = dataframe_results["Max Temp (fahrenheit)"]
plt.scatter(x_plots, y_plots, color = "darkblue")
plt.title("City Latitude vs Max Temp")
plt.ylabel("Max Temp (Fahrenheit)")
plt.xlabel("Latitude")
plt.grid()
plt.savefig("output_data/Latitude vs Max_Temp")
plt.show()


Above appears to show temperatures are higher around the equator (0 latitude) and temperatures trend lower as you go further away from the equator, both north and south. 

#### Latitude vs. Humidity Plot

In [None]:
x_plots = dataframe_results["latitude"]
y_plots = dataframe_results["Humidity"]
plt.scatter(x_plots, y_plots, color = "darkblue")
plt.title("City Latitude vs Humidity")
plt.ylabel("Humidity")
plt.xlabel("Latitude")
plt.grid()
plt.savefig("output_data/Latitude vs Humidity")
plt.show()

Doesn't appear to be much pattern to humidity levels. You might say there are more lower humidity levels at the latitudes closer to the equator, but hard to tell from this scatter, because there are also a lot of very high humidity cities near and far from equator. 

#### Latitude vs. Cloudiness Plot

In [None]:
x_plots = dataframe_results["latitude"]
y_plots = dataframe_results["Cloudiness"]
plt.scatter(x_plots, y_plots, color = "darkblue")
plt.title("City Latitude vs Cloudiness")
plt.ylabel("Cloudiness")
plt.xlabel("Latitude")
plt.grid()
plt.savefig("output_data/Latitude vs Cloudiness")
plt.show()

This isn't telling a whole lot either. It looks like there are particular numbers that are more frequently chosen to represent "cloudiness" in any given city. Would not be able to infer from this scatter that it is more or less cloudy near the equator. 

#### Latitude vs. Wind Speed Plot

In [None]:
x_plots = dataframe_results["latitude"]
y_plots = dataframe_results["Wind Speed"]
plt.scatter(x_plots, y_plots, color = "darkblue")
plt.title("City Latitude vs Wind Speed")
plt.ylabel("Wind Speed")
plt.xlabel("Latitude")
plt.grid()
plt.savefig("output_data/Latitude vs Wind Speed")
plt.show()

Wind speeds this particular day were pretty consistent no matter what city. 0-20 would pretty much describe the wind speed both near or far from the equator - a couple outliers up well north of the equator with very high winds. 

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots
fig_label=[]
def lin_reg(x,y):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x,y)
    plt.plot(x,regress_values, "r-")
    plt.xlabel(x.name)
    plt.ylabel(y.name)
    plt.title(f'{x.name} vs {y.name}')
    plt.annotate(line_eq,(min(x),min(y)),fontsize=15, color="red")
    plt.savefig(f"output_data/{x.name}vs{y.name}:{fig_label}")
    plt.show()
    print(f'The r value is: {rvalue}')


In [None]:
# Create Northern and Southern Hemisphere DataFrames
north = dataframe_results.loc[dataframe_results["latitude"]>=0]
south = dataframe_results.loc[dataframe_results["latitude"]<0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
fig_label = "north"
x = north["latitude"]
y = north["Max Temp (fahrenheit)"]
lin_reg(x,y)


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
fig_label = "south"
x = south["latitude"]
y = south["Max Temp (fahrenheit)"]
lin_reg(x,y)


Although the southern hemisphere looks less predicatable than the northern hemisphere, both models explain it is generally warmer the closer one gets to the equator. The southern hemisphere may look less predicatable because right now it is the "summer" season in the southern hemisphere and the "winter" season in northern hemisphere, therefore, the relationship might be stronger in June or July in the southern hemisphere, I would predict. 

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
fig_label = "north"
x = north["latitude"]
y = north["Humidity"]
lin_reg(x,y)

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
fig_label = "south"
x = south["latitude"]
y = south["Humidity"]
lin_reg(x,y)

No real relationship with humidity and proximity to the equator. in both northern and southern hemisphere you have positive sloping linear regression models with pretty week r squared values for each indicating not strong correlation.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
fig_label = "north"
x = north["latitude"]
y = north["Cloudiness"]
lin_reg(x,y)

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
fig_label="south"
x = south["latitude"]
y = south["Cloudiness"]
lin_reg(x,y)

Similar analaysis here as the humidity and proximity to equator. very weak correlation between the variables, indicating it could be cloudy anywhere you go!

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
fig_label="north"
x = north["latitude"]
y = north["Wind Speed"]
lin_reg(x,y)

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
fig_label="south"
x = south["latitude"]
y = south["Wind Speed"]
lin_reg(x,y)

It appears there is a slight relationship trending toward lower wind speed as one gets closer to the equator, however the r value indicates not a strong correlation. 