# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# import dependencies
from citipy import citipy
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import requests
import json
import datetime as dt
from scipy.stats import linregress

from api_keys import weather_api_key



## Generate Cities List

In [None]:
# create lists for the latitudes and logitudes generated, and the cities returned
lat_lon = []        
cities = []


In [None]:
# Create random latitudes and logitudes to look up cities
lat = np.random.uniform(low=-90, high=90, size=1500)
lon = np.random.uniform(low=-180, high=180, size=1500)
lat_lon = zip(lat, lon)
lat_lon

In [None]:
# Create the city list
for lat_lon in lat_lon:

    # Nearest city to the generated lat_lon
    city = citipy.nearest_city(lat_lon[0], lat_lon[1]).city_name

    # If unique, add to cities
    if city not in cities:
        cities.append(city)

# Check cities returned
# cities

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Create API url
url = 'http://api.openweathermap.org/data/2.5/weather?&units=metric'

In [None]:
# Counter for cities 
number = 1

# Create lists to hold data as called
city_name = []
lat = []
lon = []
temp = []
humidity = []
cloudiness = []
wind_sp = []



In [None]:
# Begin the api call
for city in cities:
    try:
        city_data = (requests.get(url + '&q=' + city +'&appid=' + weather_api_key)).json()
        city_name.append(city_data['name'])
        lat.append(city_data['coord']['lat'])
        lon.append(city_data['coord']['lon'])
        temp.append(city_data['main']['temp_max'])
        humidity.append(city_data['main']['humidity'])
        cloudiness.append(city_data['clouds']['all'])
        wind_sp.append(city_data['wind']['speed'])

        print(f'Processing record {number} of {len(cities)}. | {city}')

        number += 1


    except KeyError:
        print(f'City not found. Skipping {city}...')
        # number = number + 1


In [None]:
#city_name

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create dataframe
city_data_df = pd.DataFrame({'City': city_name,
                                'Latitude': lat,
                                'Longitude': lon, 
                                'Temperature': temp,
                                'Humidity': humidity,
                                'Cloudiness': cloudiness, 
                                'Wind Speed': wind_sp})

#pd.DataFrame.to_csv(city_data_df, 'city_data.csv')

city_data_df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Cities with humidity over 100%
above_100 = city_data_df[city_data_df.Humidity > 100].index
above_100

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_df = city_data_df
for x in above_100:
    clean_city_df = city_data_df.drop([x])

clean_city_df

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Call the cate
date = dt.datetime.today().strftime('%d/%m/%y')

# Plot temperature
plt.scatter(clean_city_df['Latitude'], city_data_df['Temperature'])
plt.title(f'City Latitude vs. Max Temperature {date}')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (C)')
plt.savefig('lat_temp.png')

## Latitude vs. Humidity Plot

In [None]:
# Plot humidity
plt.scatter(clean_city_df['Latitude'], city_data_df['Humidity'])
plt.title(f'City Latitude vs. Humidity {date}')
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.savefig('lat_humidity.png')

## Latitude vs. Cloudiness Plot

In [None]:
# Plot cloudiness
plt.scatter(clean_city_df['Latitude'], city_data_df['Cloudiness'])
plt.title(f'City Latitude vs. Cloudiness {date}')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.savefig('lat_cloudiness.png')

## Latitude vs. Wind Speed Plot

In [None]:
# Plot wind speed
plt.scatter(clean_city_df['Latitude'], city_data_df['Wind Speed'])
plt.title(f'City Latitude vs. Wind Speed {date}')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.savefig('lat_wind_speed.png')

## Linear Regression

In [None]:
# Northern and Southern hemisphere dataframes
north_hem = clean_city_df.loc[clean_city_df['Latitude'] >= 0]
south_hem = clean_city_df.loc[clean_city_df['Latitude'] < 0]

#north_hem.head()
#south_hem.head()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem['Latitude']).astype(float)
y_values = pd.to_numeric(north_hem['Temperature']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (C)')
plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('north_hem_temp.png')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem['Latitude']).astype(float)
y_values = pd.to_numeric(south_hem['Temperature']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (C)')
plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('south_hem_temp.png')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem['Latitude']).astype(float)
y_values = pd.to_numeric(north_hem['Humidity']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('Northern Hemisphere - Humidity vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('north_hem_humidity.png')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem['Latitude']).astype(float)
y_values = pd.to_numeric(south_hem['Humidity']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('Southern Hemisphere - Humidity vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('south_hem_humidity.png')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem['Latitude']).astype(float)
y_values = pd.to_numeric(north_hem['Cloudiness']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('Northern Hemisphere - Cloudiness vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('north_hem_cloudiness.png')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem['Latitude']).astype(float)
y_values = pd.to_numeric(south_hem['Cloudiness']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('Southern Hemisphere - Cloudiness vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('south_hem_cloudiness.png')

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem['Latitude']).astype(float)
y_values = pd.to_numeric(north_hem['Wind Speed']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title('Northern Hemisphere - Wind Speed vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('north_hem_wind_speed.png')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem['Latitude']).astype(float)
y_values = pd.to_numeric(south_hem['Wind Speed']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = 'y = ' + str(round(slope,2)) + 'X + ' + str(round(intercept, 2))

# Plot 1: Northern Hemisphere - Max Temp vs. Latitude Linear Regression
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(16,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title('Southern Hemisphere - Wind Speed vs. Latitude Linear Regression')

# Calculated rvalue to determine whether correlation is significant(if there is truly a relationship):
print(f"The r-value is: {rvalue}")

plt.savefig('south_hem_wind_speed.png')