# WeatherPy
----

#### Analysis
Study compares geographical latitude's effect on four weather categories--Temperature, Humidity, Cloudiness, and Wind Speed.

#### Observed Trends
1.  Latitude did not show significant relationship to Humidity, Cloudiness, and Wind Speed categories.
2.  Increasing latitude shown to have a direct relationship on decreasing temperatures in the Northern hemisphere; however, for comparable latitude increase in the Southern hemisphere, the relationship was much weaker.
3.  Given comparable latitude numbers in both Northern and Southern hemispheres, and a strong temperature link in the North, but much weaker in the South, it should not be construed that latitude is the only contributing factor to global temperatures.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# set up lists to hold reponse info
city_final = []
lat = []
long = []
maxtemp = []
humidity = []
cloudiness = []
wind = []
country = []
date = []

# Set variables for city numbers/Set counts
record = 0
recordset = 1

# Display header for the print log
print("Beginning Data Retrieval")
print("-----------------------------")

# Loop through the list of cities and perform a request for data on each
for city in cities:
    
    try:
        response = requests.get(query_url + city).json()
        
        # Add to city count
        record += 1

        # Print city to log
        print(f"Processing Record {record} of Set {recordset} | {city}")
        
        # Append city's data to lists
        city_final.append(response['name'])
        lat.append(response['coord']['lat'])
        long.append(response['coord']['lon'])    
        maxtemp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])    
        cloudiness.append(response['clouds']['all'])
        wind.append(response['wind']['speed'])    
        country.append(response['sys']['country'])
        date.append(response['sys']['sunrise'])
        
        # Reset the city count at 50 and step Set number
        if record >= 50:
            record = 0
            recordset += 1
            
    except (KeyError, IndexError):
        # Add to city count
        record += 1        
        # Reset the city count at 50 and step Set number
        if record >= 50:
            record = 0
            recordset += 1
        print("City not found. Skipping...")

# Display footer for the print log
print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Convert data to DataFrame
weather_df = pd.DataFrame({"City": city_final,
                          "Lat": lat,
                          "Lng": long,
                          "Max Temp": maxtemp,
                          "Humidity": humidity,
                          "Cloudiness": cloudiness,
                          "Wind Speed": wind,
                          "Country": country,
                          "Date": date})

# Format latitude/longitude for two decimal places only, retain as float
weather_df['Lat'] = weather_df['Lat'].map('{:.2f}'.format).astype(float)
weather_df['Lng'] = weather_df['Lng'].map('{:.2f}'.format).astype(float)

# Export to CSV
weather_df.to_csv(output_data_file, index=False, header=True)

# Display the DataFrame
weather_df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Describe the DataFrame
weather_df.describe()

In [None]:
# Get the indices of cities that have humidity over 100%.
excess_humidity = weather_df.loc[weather_df['Humidity'] > 100].index
excess_humidity

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data = weather_df.drop(excess_humidity, inplace=False)
clean_city_data.head()

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs in the output_data folder
* Below are examples of what you should get but your results will be different.

## Latitude vs. Temperature Plot

In [None]:
# Convert Date column to datetime for date display
clean_city_data['Date'] = clean_city_data['Date'].astype("datetime64[s]")
# Obtain the oldest date in our dataset to reflect dataset's age and apply formatting
display_date = clean_city_data['Date'].min().strftime('%m/%d/%y')

# Plot Latitude vs. Temperature with title, labels, and grid
plt.scatter(clean_city_data['Lat'], clean_city_data['Max Temp'], marker="o", facecolors="C0", edgecolors="black")
plt.title(f"City Latitude vs. Max Temperature ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid()
plt.savefig("Images/Lat_v_Temp.png")
plt.show()

**Observed Trends:** Plot demonstrates Max Temperature increases as one approaches the equator, decreases as one moves away from the equator to higher latitudes. However, for the immediate dataset, comparing temperatures at equivalent, higher latitude levels on the Northern and Southern hemispheres do not show same equivalency: Northern temperatures in the dataset drop more precipitously as one moves to higher latitudes.

## Latitude vs. Humidity Plot

In [None]:
# Plot Latitude vs. Humidity with title, labels, and grid
plt.scatter(clean_city_data['Lat'], clean_city_data['Humidity'], marker="o", facecolors="C0", edgecolors="black")
plt.title(f"City Latitude vs. Humidity ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid()
plt.savefig("Images/Lat_v_Humidity.png")
plt.show()

**Observed Trends:** Plot compares humidity levels against latitude. Visually, no immediate correlation is observed. However, lower, Northern latitudes may show a more concentrated cluster of lower humidity nearer the equator for reasons not immediately apparent from the plot's study.

## Latitude vs. Cloudiness Plot

In [None]:
# Plot Latitude vs. Cloudiness with title, labels, and grid
plt.scatter(clean_city_data['Lat'], clean_city_data['Cloudiness'], marker="o", facecolors="C0", edgecolors="black")
plt.title(f"City Latitude vs. Cloudiness ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid()
plt.savefig("Images/Lat_v_Cloudiness.png")
plt.show()

**Observed Trends:** Plot compares cloudiness levels against latitude. Study points are distributed throughout the grid; no immediate correlation observed. 

## Latitude vs. Wind Speed Plot

In [None]:
# Plot Latitude vs. Wind Speed with title, labels, and grid
plt.scatter(clean_city_data['Lat'], clean_city_data['Wind Speed'], marker="o", facecolors="C0", edgecolors="black")
plt.title(f"City Latitude vs. Wind Speed ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid()
plt.savefig("Images/Lat_v_Wind_Speed.png")
plt.show()

**Observed Trends:** Plot compares wind speed against latitude levels. A handful of Northern cities show higher average wind speeds, but otherwise, study points are distributed throughout the grid under 20 mph average. No immediate correlation observed. 

## Linear Regression

In [None]:
# Create separate DataFrames for northern and southern hemispheres
clean_city_data_north = pd.DataFrame(clean_city_data.loc[clean_city_data['Lat']>=0])
clean_city_data_south = pd.DataFrame(clean_city_data.loc[clean_city_data['Lat']<0])

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Northern Hemisphere Latitude vs. Max Temperature
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_north['Lat'], clean_city_data_north['Max Temp'])
regress_values = clean_city_data_north['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Northern Hemisphere Latitude vs. Max Temperature with linear regression, labels
plt.scatter(clean_city_data_north['Lat'], clean_city_data_north['Max Temp'], marker="o")

plt.plot(clean_city_data_north['Lat'],regress_values,"r-")
plt.annotate(line_eq,(10,5),fontsize=15,color="red")

plt.title(f"Northern Hemisphere: \n City Latitude vs. Max Temperature ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Temp_North.png")
plt.show()

**Observed Trends:** Plot compares maximum temperature against Northern latitude levels. Correlation shown with a very high r-value.

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Southern Hemisphere Latitude vs. Max Temperature
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_south['Lat'], clean_city_data_south['Max Temp'])
regress_values = clean_city_data_south['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Southern Hemisphere Latitude vs. Max Temperature with linear regression, labels
plt.scatter(clean_city_data_south['Lat'], clean_city_data_south['Max Temp'], marker="o")

plt.plot(clean_city_data_south['Lat'],regress_values,"r-")
plt.annotate(line_eq,(-25,45),fontsize=15,color="red")

plt.title(f"Southern Hemisphere: \n City Latitude vs. Max Temperature ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Temp_South.png")
plt.show()

**Observed Trends:** Plot compares maximum temperature against Southern latitude levels. Weak linear relationship shown with a lower r-value.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Northern Hemisphere Latitude vs. Humidity
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_north['Lat'], clean_city_data_north['Humidity'])
regress_values = clean_city_data_north['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Northern Hemisphere Latitude vs. Humidity with linear regression, labels
plt.scatter(clean_city_data_north['Lat'], clean_city_data_north['Humidity'], marker="o")

plt.plot(clean_city_data_north['Lat'],regress_values,"r-")
plt.annotate(line_eq,(45,0.8),fontsize=15,color="red")

plt.title(f"Northern Hemisphere: \n City Latitude vs. Humidity (%) ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Humidity")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Humidity_North.png")
plt.show()

**Observed Trends:** Plot compares humidity against Northern latitude levels. No real correlation shown with a very low r-value.

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Southern Hemisphere Latitude vs. Humidity
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_south['Lat'], clean_city_data_south['Humidity'])
regress_values = clean_city_data_south['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Southern Hemisphere Latitude vs. Humidity with linear regression, labels
plt.scatter(clean_city_data_south['Lat'], clean_city_data_south['Humidity'], marker="o")

plt.plot(clean_city_data_south['Lat'],regress_values,"r-")
plt.annotate(line_eq,(-25,25),fontsize=15,color="red")

plt.title(f"Southern Hemisphere: \n City Latitude vs. Humidity (%) ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Humidity")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Humidity_South.png")
plt.show()

**Observed Trends:** Plot compares humidity against Southern latitude levels. Very weak correlation shown with a low r-value.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Northern Hemisphere Latitude vs. Cloudiness
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_north['Lat'], clean_city_data_north['Cloudiness'])
regress_values = clean_city_data_north['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Northern Hemisphere Latitude vs. Cloudiness with linear regression, labels
plt.scatter(clean_city_data_north['Lat'], clean_city_data_north['Cloudiness'], marker="o")

plt.plot(clean_city_data_north['Lat'],regress_values,"r-")
plt.annotate(line_eq,(5.8,10),fontsize=15,color="red")

plt.title(f"Northern Hemisphere: \n City Latitude vs. Cloudiness (%) ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Cloudiness_North.png")
plt.show()

**Observed Trends:** Plot compares cloudiness against Northern latitude levels. Weak linear relationship shown by a very low r-value.

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Southern Hemisphere Latitude vs. Cloudiness
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_south['Lat'], clean_city_data_south['Cloudiness'])
regress_values = clean_city_data_south['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Southern Hemisphere Latitude vs. Cloudiness with linear regression, labels
plt.scatter(clean_city_data_south['Lat'], clean_city_data_south['Cloudiness'], marker="o")

plt.plot(clean_city_data_south['Lat'],regress_values,"r-")
plt.annotate(line_eq,(-25,10),fontsize=15,color="red")

plt.title(f"Southern Hemisphere: \n City Latitude vs. Cloudiness (%) ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Cloudiness_South.png")
plt.show()

**Observed Trends:** Plot compares cloudiness against Southern latitude levels. Weak linear relationship shown with a very low r-value.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Northern Hemisphere Latitude vs. Wind Speed
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_north['Lat'], clean_city_data_north['Wind Speed'])
regress_values = clean_city_data_north['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Northern Hemisphere Latitude vs. Wind Speed with linear regression, labels
plt.scatter(clean_city_data_north['Lat'], clean_city_data_north['Wind Speed'], marker="o")

plt.plot(clean_city_data_north['Lat'],regress_values,"r-")
plt.annotate(line_eq,(5.8,0.8),fontsize=15,color="red")

plt.title(f"Northern Hemisphere: \n City Latitude vs. Wind Speed (mph) ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Wind_Speed_North.png")
plt.show()

**Observed Trends:** Plot compares wind speed against Northern latitude levels. Weak linear relationship shown by a very low r-value.

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Calculate linear regression for Southern Hemisphere Latitude vs. Wind Speed
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_south['Lat'], clean_city_data_south['Wind Speed'])
regress_values = clean_city_data_south['Lat'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Plot Southern Hemisphere Latitude vs. Wind Speed with linear regression, labels
plt.scatter(clean_city_data_south['Lat'], clean_city_data_south['Wind Speed'], marker="o")

plt.plot(clean_city_data_south['Lat'],regress_values,"r-")
plt.annotate(line_eq,(-45,3),fontsize=15,color="red")

plt.title(f"Southern Hemisphere: \n City Latitude vs. Wind Speed (mph) ({display_date})")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")

print(f"The r-value is: {rvalue**2}")
plt.savefig("Images/Lat_v_Wind_Speed_South.png")
plt.show()

**Observed Trends:** Plot compares wind speed against Southern latitude levels. Weak linear relationship shown with a very low r-value.