# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress, pearsonr
import csv

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
# print(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

query_url = f"{url}appid={weather_api_key}&units={units}&q="

City = []
Cloudiness = []
Country = []
Date = []
Humidity = []
Lat = []
Lng = []
Max_Temp = []
Wind_Speed = []

print("Retrieving Data")
print("-----------------")

x=0
# y=1
for city in cities:
    try:
        city_query_url = query_url + city
        x=x+1
        print(f"Retrieving Record {x} | {city}")
    #     print(city_query_url)
        data = requests.get(city_query_url).json()
    #     print(data)

        #assign temporary values
        cn = data["name"]
        cloud = data["clouds"]["all"]
        ctry = data["sys"]["country"]
        dt = data["dt"]
        hm = data["main"]["humidity"]
        lt = data["coord"]["lat"]
        ln = data["coord"]["lon"]
        mt = data["main"]["temp_max"]
        ws = data["wind"]["speed"]
    
    

        
        City.append(cn)
        Cloudiness.append(cloud)
        Country.append(ctry)
        Date.append(dt)
        Humidity.append(hm)
        Lat.append(lt)
        Lng.append(ln)
        Max_Temp.append(mt)
        Wind_Speed.append(ws)
        
    except KeyError:
        print(f"City not available... skip!")
        
print("-----------------")
print("Retrival Compete")

In [None]:
#   name = data["name"]
#   print(name)
    

# query_params = {
#     "appid": weather_api_key,
#     "q": city,
#     "units": units
# }
# weather_response = requests.get(url, params=query_params)
# weather_json = weather_response.json() 
# print(Cloudiness)

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# base = {"City": [], "Cloudiness": [], "Country": [], "Date": [], "Humidity": [], "Lat": [],
#      "Lng": [], "Max Temp": [], "Wind Speed":[]}
# cities_df = pd.DataFrame(data=base)
# cities_df
weather_df = pd.DataFrame({"City": City, "Cloudiness": Cloudiness, "Country": Country, "Date": Date, "Humidity": Humidity, "Latitude":  Lat, "Longitude":  Lng, "Max Temp": Max_Temp, "Wind Speed": Wind_Speed})

weather_df["Date"] = pd.to_datetime(weather_df["Date"], unit = "s")

weather_df.to_csv("city_output.csv")

weather_df

In [None]:
weather_df.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
hum_100 = weather_df.loc[weather_df["Humidity"] >= 100]
hum_100

In [None]:
#  Get the indices of cities that have humidity over 100%.

dropped = weather_df.drop(hum_100.index, inplace=False)
dropped

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
x1 = weather_df["Latitude"]
y1 = weather_df["Max Temp"]
plt.scatter(x1, y1)

plt.title("Latitude vs. Max Temp.")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")
plt.savefig("lat_vs_maxtemp")

## Latitude vs. Humidity Plot

In [None]:
x2 = weather_df["Latitude"]
y2 = weather_df["Humidity"]
plt.scatter(x2, y2)

plt.title("Latitude vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.savefig("lat_vs_humidity")

## Latitude vs. Cloudiness Plot

In [None]:
x3 = weather_df["Latitude"]
y3 = weather_df["Cloudiness"]
plt.scatter(x3, y3)

plt.title("Latitude vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.savefig("lat_vs_cloudiness")

## Latitude vs. Wind Speed Plot

In [None]:
x4 = weather_df["Latitude"]
y4 = weather_df["Wind Speed"]
plt.scatter(x4, y4)

plt.title("Latitude vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.savefig("lat_vs_windspeed")

## Linear Regression

In [None]:
north_df = weather_df.loc[weather_df["Latitude"] > 0]
north_df

In [None]:
south_df = weather_df.loc[weather_df["Latitude"] < 0]
south_df

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x5 = north_df["Latitude"]
y5 = north_df["Max Temp"]
plt.scatter(x5, y5)

plt.title("Latitude vs. Max Temp. (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")

np.polyfit(x5, y5, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x5, y5)
slope, intercept

predict = x5*slope + intercept
predict

line_eq = f"y = {round(slope)}x + {round(intercept)}"
r_val = pearsonr(x5, y5)[0]
r_sq =  r_val ** 2

plt.plot(x5, predict, color="red")
plt.annotate(line_eq, (60, 70), color="red")
plt.annotate(f"R-squared: {round(r_sq, 4)}", (60,60), color="red")

plt.savefig("N_lat_vs_maxtemp")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x6 = south_df["Latitude"]
y6 = south_df["Max Temp"]
plt.scatter(x6, y6)


plt.title("Latitude vs. Max Temp. (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")

np.polyfit(x6, y6, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x6, y6)
slope, intercept

s_mt_lat_predict = x6*slope + intercept

line_eq6 = f"y = {round(slope)}x + {round(intercept)}"
r_val6 = pearsonr(x6, y6)[0]
r_sq6 =  r_val6 ** 2

plt.plot(x6, s_mt_lat_predict, color="red")
plt.annotate(line_eq6, (-50, 90), color="red")
plt.annotate(f"R-squared: {round(r_sq6, 4)}", (-50, 85), color="red")

plt.savefig("S_lat_vs_maxtemp")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x7 = north_df["Latitude"]
y7 = north_df["Humidity"]
plt.scatter(x7, y7)

plt.title("Latitude vs. Humidity (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

np.polyfit(x7, y7, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x7, y7)
slope, intercept

n_hum_lat_predict = x7*slope + intercept

line_eq7 = f"y = {round(slope)}x + {round(intercept)}"
r_val7 = pearsonr(x7, y7)[0]
r_sq7 =  r_val7 ** 2

plt.plot(x7, n_hum_lat_predict, color="red")
plt.annotate(line_eq7, (60, 20), color="red")
plt.annotate(f"R-squared: {round(r_sq7, 4)}", (60,10), color="red")

plt.savefig("N_lat_vs_humidity")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x8 = south_df["Latitude"]
y8 = south_df["Humidity"]
plt.scatter(x8, y8)


plt.title("Latitude vs. Humidity (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

np.polyfit(x8, y8, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x8, y8)
slope, intercept

s_hum_lat_predict = x8*slope + intercept

line_eq8 = f"y = {round(slope)}x + {round(intercept)}"
r_val8 = pearsonr(x8, y8)[0]
r_sq8 =  r_val8 ** 2

plt.plot(x8, s_hum_lat_predict, color="red")
plt.annotate(line_eq8, (-50, 100), color="red")
plt.annotate(f"R-squared: {round(r_sq8, 4)}", (-50, 95), color="red")

plt.savefig("S_lat_vs_humidity")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x9 = north_df["Latitude"]
y9 = north_df["Cloudiness"]
plt.scatter(x9, y9)

plt.title("Latitude vs. Cloudiness (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

np.polyfit(x9, y9, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x9, y9)
slope, intercept

n_cloud_lat_predict = x9*slope + intercept

line_eq9 = f"y = {round(slope)}x + {round(intercept)}"
r_val9 = pearsonr(x9, y9)[0]
r_sq9 =  r_val9 ** 2

plt.plot(x9, n_cloud_lat_predict, color="red")
plt.annotate(line_eq9, (60, 20), color="red")
plt.annotate(f"R-squared: {round(r_sq9, 4)}", (60,10), color="red")

plt.savefig("N_lat_vs_cloud")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x10 = south_df["Latitude"]
y10 = south_df["Cloudiness"]
plt.scatter(x10, y10)


plt.title("Latitude vs. Cloudiness (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

np.polyfit(x10, y10, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x10, y10)
slope, intercept

s_cloud_lat_predict = x10*slope + intercept

line_eq10 = f"y = {round(slope)}x + {round(intercept)}"
r_val10 = pearsonr(x10, y10)[0]
r_sq10 =  r_val10 ** 2

plt.plot(x10, s_cloud_lat_predict, color="red")
plt.annotate(line_eq10, (-50, 60), color="red")
plt.annotate(f"R-squared: {round(r_sq10, 4)}", (-50, 55), color="red")

plt.savefig("S_lat_vs_cloud")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x11 = north_df["Latitude"]
y11 = north_df["Wind Speed"]
plt.scatter(x11, y11)

plt.title("Latitude vs. Wind Speed (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

np.polyfit(x11, y11, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x11, y11)
slope, intercept

n_ws_lat_predict = x11*slope + intercept

line_eq11 = f"y = {round(slope)}x + {round(intercept)}"
r_val11 = pearsonr(x11, y11)[0]
r_sq11 =  r_val11 ** 2

plt.plot(x11, n_ws_lat_predict, color="red")
plt.annotate(line_eq11, (55, 30), color="red")
plt.annotate(f"R-squared: {round(r_sq11, 4)}", (55,27), color="red")

plt.savefig("N_lat_vs_ws")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x12 = south_df["Latitude"]
y12 = south_df["Wind Speed"]
plt.scatter(x12, y12)


plt.title("Latitude vs. Cloudiness (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

np.polyfit(x12, y12, 1)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x12, y12)
slope, intercept

s_ws_lat_predict = x12*slope + intercept

line_eq12 = f"y = {round(slope)}x + {round(intercept)}"
r_val12 = pearsonr(x12, y12)[0]
r_sq12 =  r_val12 ** 2

plt.plot(x12, s_ws_lat_predict, color="red")
plt.annotate(line_eq12, (-20, 23), color="red")
plt.annotate(f"R-squared: {round(r_sq12, 4)}", (-20, 21), color="red")

plt.savefig("S_lat_vs_ws")