# WeatherPy


In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json


# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
#lngs

lat_lngs = zip(lats, lngs)

In [None]:
# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

## Generate Cities List

In [None]:
base_url = "http://api.openweathermap.org/data/2.5/forecast?id=524901&appid="

url = base_url + weather_api_key
response = requests.get(url).json()

In [None]:
#empty lists defined to store findings
country = []
city_name =[]
date = []
cloudiness= []
wind_speed = []
max_temp = []
humidity = []
lat = []
lng = []


#Request data on each city 
count = 1

for city in cities:
    
    query_url = url + "&units=Imperial" + "&q=" +city
    
    response_json = requests.get(query_url).json() 
    
    
    try:
        
        country.append(response_json["city"]["country"])
        city_name.append(response_json["city"]["name"])
        date.append(response_json["list"][0]["dt"])
        cloudiness.append(response_json["list"][0]["clouds"]["all"])
        wind_speed.append(response_json["list"][0]["wind"]["speed"])
        max_temp.append(response_json["list"][0]["main"]["temp_max"])
        humidity.append(response_json["list"][0]["main"]["humidity"])
        lat.append(response_json["city"]["coord"]["lat"])  
        lng.append(response_json["city"]["coord"]["lon"])
        
      
        print(f"Processing record {count}|{city}")
        count+=1
        
    except (KeyError, IndexError):
        print("Record not found.skipping.")
        
print("Completed!")
   

In [None]:
weather_data = pd.DataFrame({"City":city_name, "Cloudiness":cloudiness,"Country":country, 
                          "Date":date, "Humidity":humidity, "Latitude":lat,"Longitude":lng, 
                         "Max Temp":max_temp, "Wind Speed":wind_speed})

#weather_data =weather_data.rename_axis("City ID")

#Export the city data into a .csv.
weather_data.to_csv("../output_data/weather.csv")
#Display the DataFrame
weather_data.head()

In [None]:
#weather_data.dtypes == object

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
high_humidity = weather_data.loc[weather_data["Humidity"] > 100]
high_humidity

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
clean_city_data= weather_data.dropna(inplace = False)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#create a scatter plot for latitude vs max temperature to see the relationship between them
x= weather_data["Latitude"]
y1 = weather_data["Max Temp"]

plt.scatter(x,y1)
plt.xlabel("Latitude")
plt.ylabel("Max Temperature")
plt.title("Latitude vs. Max Temperature")
plt.savefig("latvsmaxtemp")
plt.show()

In [None]:
#We see positive correlation on south, and negative correlation on north side when we check latitude vs max temperature relationship.


## Latitude vs. Humidity Plot

In [None]:
#create a scatter plot for latitude vs humidity to see the relationship between them
y2 = weather_data["Humidity"]

plt.scatter(x,y2)
plt.xlabel("Latitude")
plt.ylabel("Humidity(%)")
plt.title("Latitude vs. Humidity")
plt.savefig("latvshumidity")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
#create a scatter plot for latitude vs cloudiness to see the relationship between them
y3 = weather_data["Cloudiness"]

plt.scatter(x,y3)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%)")
plt.title("Latitude vs. Cloudiness")
plt.savefig("latvscloudiness")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
#create a scatter plot for latitude vs wind speed to see the relationship between them
y4 = weather_data["Wind Speed"]

plt.scatter(x,y4)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Latitude vs. Wind Speed")
plt.savefig("latvshumidity")
plt.show()

## Linear Regression

In [None]:
def lin_reg_plot(x_values, y_values):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y =" + str(round(slope,2) ) + "x + " + str(round(intercept,2))

    plt.scatter(x_values, y_values)
    plt.plot(x_values, regress_values, "r-")
    plt.annotate(line_eq,(x_values.median(), y_values.median()),fontsize=15,color="red")
    plt.xlabel("Latitude")
    print(f"The r-squared is : {rvalue**2}")
    plt.show()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
clean_city_data.head()

In [None]:
northern_hemisphere = clean_city_data.loc[clean_city_data["Latitude"]>= 0]
southern_hemisphere = clean_city_data.loc[clean_city_data["Latitude"]< 0]

x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Max Temp"]


#plt.savefig("scatterplotlatvsmaxtemp.north")
print(lin_reg_plot(x_values, y_values))


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Max Temp"]

plt.ylabel("Max Temp(F)")
print(lin_reg_plot(x_values, y_values))

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Humidity"]

plt.ylabel("Humidity")
lin_reg_plot(x_values, y_values)

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Max Temp"]

plt.ylabel("Max Temp(F)")
lin_reg_plot(x_values, y_values)

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Cloudiness"]

plt.ylabel("Cloudiness(%)")
lin_reg_plot(x_values, y_values)

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Cloudiness"]

plt.ylabel("Cloudiness(%)")
lin_reg_plot(x_values, y_values)

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Wind Speed"]

plt.ylabel("Wind Speed(mph)")
lin_reg_plot(x_values, y_values)

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Wind Speed"]

plt.ylabel("Wind Speed(mph)")
lin_reg_plot(x_values, y_values)