# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

Weather Analysis and Observations:
* With a moderate correlation between temperature and latitude, you are generally more likely to experience warmer temperatures near the equator.  
* Humidity, cloudiness, and wind speeds have no relationship to latitude with r-squared values at or very near zero.
* Southern hemisphere temperatures are directly proportional to latitude and norhtern hemisphere temperatures are inversely proportional to latitude.  As latitude increases, temperature increases if you are in the southern hemisphere.  As latitude decreases, temperature increases if you are in the northern hemisphere.
* Peak temperatures occur at 20 degrees north of the equator.  Looking at seasonal data would be interesting to investgate the impact of the tilting of the Earth on its axis with respect to the latitude of peak temperatures in different seasons.  

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

KeyboardInterrupt: 

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [None]:
#build dataframe
City_List = []

for city in cities:
    
    City_List.append(city.title())

weather_df = pd.DataFrame({"City": City_List,
                           "Latitude": "",
                           "Longitude": "",
                           "Max Temperature": "",
                           "Humidity": "",
                           "Cloudiness": "",
                           "Wind Speed": "",
                           "Country": "",
                           "DateTime": ""})
                          
weather_df

In [None]:
# Build partial query URL
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

#perform a weather check on each city
for index, row in weather_df.iterrows():

    # get desired parameters from API
    city_name = row['City']
    weather_url = f"{url}appid={weather_api_key}&q={city_name}&units={units}"
    response = requests.get(weather_url).json()
    
    try:
        print(f"Retrieving Results for record {index}: {city_name}.")
        # Fill in data series with returned response data
        weather_df.loc[index, 'Latitude'] = response['coord']['lat']
        weather_df.loc[index, 'Longitude'] = response['coord']['lon']
        weather_df.loc[index, 'Max Temperature'] = response['main']['temp_max']
        weather_df.loc[index, 'Humidity'] = response['main']['humidity']
        weather_df.loc[index, 'Cloudiness'] = response['clouds']['all']
        weather_df.loc[index, 'Wind Speed'] = response['wind']['speed']
        weather_df.loc[index, 'Country'] = response['sys']['country']
        weather_df.loc[index, 'DateTime'] = response['dt']
        
    except (KeyError, IndexError):
        print("City not found. Skipping...")        
        
    

In [None]:
weather_df


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Check data types for future analysis
weather_df.dtypes

In [None]:
# Convert data types as needed for analysis
weather_df["Latitude"] = pd.to_numeric(weather_df["Latitude"])
weather_df["Longitude"] = pd.to_numeric(weather_df["Longitude"])
weather_df["Max Temperature"] = pd.to_numeric(weather_df["Max Temperature"])
weather_df["Humidity"] = pd.to_numeric(weather_df["Humidity"])
weather_df["Cloudiness"] = pd.to_numeric(weather_df["Cloudiness"])
weather_df["Wind Speed"] = pd.to_numeric(weather_df["Wind Speed"])

weather_df.dtypes

In [None]:
# Export file as a CSV
weather_df.to_csv("../output_data/weather.csv", index=False, header=True)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
weather_df.describe()

In [None]:
#  Get the indices of cities that have humidity over 100%.
humidity = list(weather_df.loc[weather_df["Humidity"] > 100.0, :].index)
humidity

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.

clean_weather_df = weather_df.drop(weather_df[(weather_df["Humidity"] > 100)].index)

clean_weather_df

In [None]:
clean_weather_df["Humidity"].max()

In [None]:
# Extract relevant fields from the data frame
city_data = clean_weather_df.iloc[:, [0,1,3,4,5,6]]
print(city_data)
# Export the City_Data into a csv
city_data.to_csv("../output_data/cityData.csv", index=True, header=True)

In [None]:
city_data.to_html(buf=str_io, classes='table table-striped')

html_str = str_io.getvalue()

print(html_str)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
x_axis = city_data["Latitude"]
y_axis = city_data["Max Temperature"]
date = pd.to_datetime('today').date()

plt.scatter(x_axis, y_axis, marker="o", facecolors="red", edgecolors="black", alpha=0.7)
plt.title(f"Latitude vs. Temperature {date}")
plt.xlabel("Latitude")
plt.ylabel("Temperature")
plt.grid(True)
plt.savefig("../output_data/Lat_vs_Temp.png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
x_axis = city_data["Latitude"]
y_axis = city_data["Humidity"]
date = pd.to_datetime('today').date()

plt.scatter(x_axis, y_axis, marker="o", facecolors="blue", edgecolors="black", alpha=0.7)
plt.title(f"Latitude vs. Humidity {date}")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.grid(True)
plt.savefig("../output_data/Lat_vs_Hum.png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
x_axis = city_data["Latitude"]
y_axis = city_data["Cloudiness"]
date = pd.to_datetime('today').date()

plt.scatter(x_axis, y_axis, marker="o", facecolors="green", edgecolors="black", alpha=0.7)
plt.title(f"Latitude vs. Cloudiness {date}")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.grid(True)
plt.savefig("../output_data/Lat_vs_Cld.png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
x_axis = city_data["Latitude"]
y_axis = city_data["Wind Speed"]
date = pd.to_datetime('today').date()

plt.scatter(x_axis, y_axis, marker="o", facecolors="purple", edgecolors="black", alpha=0.7)
plt.title(f"Latitude vs. Wind Speed {date}")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.grid(True)
plt.savefig("../output_data/Lat_vs_WS.png")
plt.show()

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots
def linreg():
    data = input("Select a column to analyze (Max Temperature, Humidity, Cloudiness, Wind Speed): ")
    hemisphere = input("Would you like to plot data from the Northern Hemisphere or Southern Hemisphere? say 'north' or 'south'  ")
    
    if hemisphere == "north":
        x_values = north_city_data["Latitude"]
        y_values = north_city_data[str.title(data)]
    elif hemisphere == "south":
        x_values = south_city_data["Latitude"]
        y_values = south_city_data[str.title(data)]
    else:
        print("I do not know what hemisphere you would like.")
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.annotate(line_eq,(x_values.mean(),y_values.mean()),fontsize=15,color="red")
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r-")
    plt.xlabel(str.title("Latitude"))
    plt.ylabel(str.title(data))
    plt.title(f"{str.title(data)} vs. Latitude")
    plt.savefig(f"../output_data/{str.title(data)}_vs_Lat")
    plt.show()
    print(f"R squared: {round(rvalue**2, 2)}")


In [None]:
# Create Northern and Southern Hemisphere DataFrames
north_lat = city_data.loc[city_data["Latitude"] > 0, :]
north_city_data = pd.merge(city_data, north_lat, how="inner")
print(north_city_data)
south_lat = city_data.loc[city_data["Latitude"] < 0, :]
south_city_data = pd.merge(city_data, south_lat, how="inner")
south_city_data

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
linreg()


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
linreg()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
linreg()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
linreg()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
linreg()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
linreg()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
linreg()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
linreg()