# WeatherPy
----

#### Analysis
Observed Trends
1.  Based on the below data analysis, it appears that northern part of the globe has significantly higher cloudiness, humidity percentages, and wind speeds. 
2. There is a very high correlation between max temperature and latitude location for cities in both northern and southern hemisphere. The closer the city to the equator the higher the max temperature.
3.  It was interesting to note that wind speeds across cities in the northern hemisphere had almost no correlation to the cities location. Hence, it shows that winds could achieve max speeds at any given city in the northern hemisphere. 

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json
from datetime import datetime

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name #ask Erin why this is not "lat_lngs variable why is it lat_lng"
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# config information
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# build query ULR
query_url = f"{url}appid={weather_api_key}&units={units}&q="

#printing beginning of the output message
print(f"Beginning Data Retrieval")
print(f"-----------------------------")

# setting up a variable for processing record
process_record = 0

# setting up lists to hold city data from each API call
city_name = []
city_country = []
city_date = []
city_lat = []
city_lng = []
city_humid = []
city_cloud = []
city_wind = []
city_temp = []

# response = json.dumps((requests.get(query_url + city).json()),indent=4)
# print(response)

# loop thhrough each api call and store each city data
for city in cities:
    
    # including exception logic for missing data in each loop
    try:
        response = requests.get(query_url + city).json()
        city_name.append(response["name"])
        city_country.append(response["sys"]["country"])
        city_date.append(response["dt"])
        city_lat.append(response["coord"]["lat"])
        city_lng.append(response["coord"]["lon"])
        city_humid.append(response["main"]["humidity"])
        city_cloud.append(response["clouds"]["all"])
        city_wind.append(response["wind"]["speed"])
        city_temp.append(response["main"]["temp_max"])
        process_record += 1
        print_city_name = response["name"]
        print(f"Processing Record {process_record} | {print_city_name}")
        
    except:
        print("City not found. Skipping...")
    
    continue
        
    
#printing end of the output message   
print(f"-----------------------------")
print(f"Data Retrieval Complete")
print(f"-----------------------------")


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#creating data frame with data columns 
weather_data = pd.DataFrame({
    "City": city_name,
    "Lat": city_lat,
    "Lng": city_lng,
    "Max Temp": city_temp,
    "Humidity": city_humid,
    "Cloudiness": city_cloud,
    "Wind Speed": city_wind,
    "Country": city_country,
    "Date": city_date
})

weather_data.head()

In [None]:
# producing descriptive statistics
weather_data.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# checking the data for cities with humidity level greater than 100%

if len(weather_data)==len(weather_data.loc[weather_data['Humidity'] <= 100]):
    sorted_weather_data = weather_data
    print("Skipping this step because no cities with himidity level of greater than 100% were identified")
else:
    sorted_weather_data = weather_data.loc[weather_data['Humidity'] <= 100]
    print("Cities with humidity levels of greater than 100% removed from the dataset")

In [None]:
# producing descriptive statistics on filterd data
sorted_weather_data.describe()

In [None]:
#  Get the indices of cities that have humidity over 100%.
humidity_over_100 = weather_data[weather_data['Humidity'] > 100].index
humidity_over_100

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data". 
clean_city_data = weather_data.drop(humidity_over_100,inplace=False)

#saving to csv file
clean_city_data.to_csv("output_data/WeatherPy.csv",index=False)

clean_city_data.head()

## Plotting the Data

## Latitude vs. Temperature Plot

In [None]:
# creating a scatter plot  for latitude vs max temperature
plt.scatter(clean_city_data["Lat"], clean_city_data["Max Temp"], marker="o",facecolor = "steelblue",edgecolor = "black")

# producing today's date for the graph title
date_today = datetime.now().strftime("%m/%d/%y")

# incorporate the other graph properties
plt.title(f"City Latitude vs. Max Temperature ({date_today})")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# saving plot picture
plt.savefig("output_data/City Latitude vs Max Temperature.png")

# the plot is analyzing the range of temperatures across latitudes.

## Latitude vs. Humidity Plot

In [None]:
# creating a scatter plot  for latitude vs humidity
plt.scatter(clean_city_data["Lat"], clean_city_data["Humidity"], marker="o", facecolor = "steelblue",edgecolor = "black")

# # incorporate the other graph properties
plt.title(f"City Latitude vs. Humidity ({date_today})")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

# saving plot picture
plt.savefig("output_data/City Latitude vs. Humidity.png")

# the plot is analyzing the range of humidity % across latitudes.

## Latitude vs. Cloudiness Plot

In [None]:
# creating a scatter plot  for latitude vs Cloudiness
plt.scatter(clean_city_data["Lat"], clean_city_data["Cloudiness"], marker="o", facecolor = "steelblue",edgecolor = "black")

# # incorporate the other graph properties
plt.title(f"City Latitude vs. Cloudiness ({date_today})")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

# saving plot picture
plt.savefig("output_data/City Latitude vs. Cloudiness.png")

# the plot is analyzing the range of cloudiness % across latitudes.

## Latitude vs. Wind Speed Plot

In [None]:
# creating a scatter plot  for latitude vs Wind Speed
plt.scatter(clean_city_data["Lat"], clean_city_data["Wind Speed"], marker="o", facecolor = "steelblue",edgecolor = "black")

# # incorporate the other graph properties
plt.title(f"City Latitude vs. Wind Speed ({date_today})")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

# saving plot picture
plt.savefig("output_data/City Latitude vs. Wind Speed.png")

# the plot is analyzing the range of wind speeds % across latitudes.

## Linear Regression

In [None]:
# sort the dataframe for northern and southern hemisphere and put into separate dataframes
north = clean_city_data.loc[clean_city_data["Lat"] >= 0]
south = clean_city_data.loc[clean_city_data["Lat"] < 0]

# defining a function for the rest of regression graphs
def linear_regression(x,y):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x,y,facecolor = "steelblue",edgecolor = "black")
    plt.plot(x,regress_values,"r-",)
    plt.annotate(line_eq,(0,50),fontsize=15,color="red")
    print(f"The r-value is: {rvalue**2}")

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for northern hemisphere
graph1 = linear_regression(north['Lat'],north['Max Temp'])
plt.xlabel('Latitude')
plt.ylabel('Max temp')
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for southern hemisphere
graph2 = linear_regression(south['Lat'],south['Max Temp'])
plt.xlabel('Latitude')
plt.ylabel('Max temp')
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression
graph2 = linear_regression(north['Lat'],north['Humidity'])
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression
graph2 = linear_regression(south['Lat'],south['Humidity'])
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Reg
graph2 = linear_regression(north['Lat'],north['Cloudiness'])
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Reg
graph2 = linear_regression(south['Lat'],south['Cloudiness'])
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Reg
graph2 = linear_regression(north['Lat'],north['Wind Speed'])
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# using the function above, determine the x,y values for Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Reg
graph2 = linear_regression(south['Lat'],south['Wind Speed'])
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.show()