# WeatherPy
----


In [None]:
%matplotlib notebook

In [None]:
# Dependencies and Setup, including import os for path to write to csv
import os

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key for OpenWeatherMap
from config import weather_api_key

# Incorporate citipy to determine city from latitude and longitude
from citipy import citipy

In [None]:
# Output File (CSV)
output_file = os.path.join("..", "output_data", "cities.csv")

# Range of latitudes and longitudes (all possible for use with np.random)
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations to use to find nearest city with citipy
lats = np.random.uniform(lat_range[0], lat_range[1], size=1400)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1400)

lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to the cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count (> 500 cities)
print(len(cities))

In [None]:
# View the lengthy list of cities created from the random coordinates
cities

## Perform API Calls
* Perform a weather check on the cities list (using a series of API calls) and include a print log to record each city as it's processed with city number and name  

Starter code had print of how the list should look, with groups of about 50  
Desired result is from a try, then except message "City not found. Skipping..." pass  
Goal data frame City Lat Lng MaxTemp Humidity Cloudiness Wind Speed Country Date

In [None]:
# Starter code had print of how the list should look, with groups of about 50
# Desired result is from a try, then except message "City not found. Skipping..." pass

# Save config information for API calls
base_url = "http://api.openweathermap.org/data/2.5/weather?"

# Build partial query URL, and choose imperial for Fahrenheit
query_url = f'{base_url}appid={weather_api_key}&units=imperial&q='

In [None]:
# Request/receive data using API and put in a dictionary to use for a data frame

# Identify start point for the data retrieval
print('Beginning Data Retrieval Process\n\
--------------\n')

# Create a list to store weather info to be requested with the API
weather_details = []

# Identify the variables in a way to divide into smaller sets, and set starting values
record = 0
sets = 1

# Create a for loop to add details about current weather 
for city in cities:
    record += 1
    print(f'Processing Record {record} of Set {sets} | {city}')
    
    # Once a set reaches 50 cities, move to the next set
    if record == 50:
        record = 0
        sets += 1
        
# Within the for loop request info needed from OpenWeatherMap
    response = requests.get(query_url + city).json()
    
    # Include a route out if the city requested is not found
    try:
        # Goal data frame includes City, Lat, Lng, Max Temp, Humidity, Cloudiness, Wind Speed, Country, Date
        weather_details.append({
            'City': city,
            'Lat': response['coord']['lat'],
            'Lng': response['coord']['lon'],
            'Max Temp': response['main']['temp_max'],
            'Humidity': response['main']['humidity'],
            'Cloudiness': response['clouds']['all'],
            'Wind Speed': response['wind']['speed'],
            'Country': response['sys']['country'],
            'Date': response['dt']
        })
    except:
        # Identify text to include if the city is not found, and pass to continue the looping
        print('City not found. Skipping...')
        pass

# Add text to show data retrieval has concluded
print(f'--------------\n\
Data Retrieval Complete\n\
--------------')

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create a Pandas DataFrame for the information collected
weather_df = pd.DataFrame(weather_details)
weather_df

In [None]:
# Export weather to a csv file: output_file = os.path.join("..", "output_data", "cities.csv")
weather_df.to_csv(output_file, header=True) 

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Check out the weather statistics (and check if max humidity > 100)
weather_df.describe()

In [None]:
# Above looks like no humidity is > 100 but will locate any >= to 100 to take a look
locations_high_humidity = weather_df.loc[weather_df['Humidity'] >= 100]
locations_high_humidity

# Confirmed understanding of .describe() humidity column max, no humidity>100 errors exist in data
# weather_df is clean data for use in plotting

In [None]:
# Use time to convert datetime column to mm-dd-yyyy only for a new column

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

In [None]:
# Considered standardizing the x-axis for latitude plotting (min lat ≈ -55, max ≈ 79)
# Begin the plot for Latitude vs Temperature
# lat_x_axis = np.arange(-60, 100, 20)
# lat_x_axis

# x_values = lat_x_axis
# max temp about 108

In [None]:
# define a function that will allow simpler plotting
def show_plot(x_col, y_col):
    x_values = weather_df.iloc[:,x_col]
    y_values = weather_df.iloc[:,y_col]
    plt.scatter(x_values, y_values, marker='o', facecolor=face, edgecolors=edge)
    
    plt.title(plot_title)
    plt.xlabel(x_label_name)
    plt.ylabel(y_label_name)
    plt.grid()
    plt.show()

## Latitude vs. Temperature Plot

In [None]:
# Plot latitude vs max_temp
figure1 = plt.figure(figsize=(8,5))
plot_title = 'Latitude vs. Max Temperature (4/24/21)'
x_label_name = 'Latitude'
y_label_name = 'Max Temperature (F)'
face = 'lightpink'
edge= 'black'

fig1 = show_plot(1, 3)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
# Like output_file = os.path.join("..", "output_data", "cities.csv")
latitude_vs_max_temp_image_path = os.path.join('..', 'output_data', 'LatitudeVsMaxTemp.png')
plt.savefig(latitude_vs_max_temp_image_path)

plt.show()

### From instructions, after each plot add brief explanation of what the code is analyzing
* The figure above is plotting the latitude and max temp for each city. The curve seen in the results is expected, since the 0 for the x-axis is the equator and temps would be expected to be the warmest nearer to the equator. The plotting to the left and right sides of the figure are for the latitudes nearer to the poles and so the temperatures are lower than near the equator. 

## Latitude vs. Humidity Plot

In [None]:
# Plot latitude vs humidity
figure2 = plt.figure(figsize=(8,5))
plot_title = 'Latitude vs. Humidity (4/24/21)'
x_label_name = 'Latitude'
y_label_name = 'Humidity (%)'
face = 'orange'
edge= 'black'

fig2 = show_plot(1, 4)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
# Like output_file = os.path.join("..", "output_data", "cities.csv")
latitude_vs_humidity_image_path = os.path.join('..', 'output_data', 'LatitudeVsHumidity.png')
plt.savefig(latitude_vs_humidity_image_path)

plt.show()

### From instructions, after each plot add brief explanation of what the code is analyzing
* The figure above is plotting the latitude and humidity for each city. There does not seem to be evidence of a correlation between latitude and humidity. From weather_df.describe() the mean for humidity is expected at around 66 degrees 'F. Based on where most of the markers are for this plot (heavier in the upper two-fifths of the chart), the linear regression line plot is expected to be fairly horizontal and near to 66 degrees.

## Latitude vs. Cloudiness Plot

In [None]:
# Plot latitude vs humidity
figure3 = plt.figure(figsize=(8,5))
plot_title = 'Latitude vs. Cloudiness (4/23/21)'
x_label_name = 'Latitude'
y_label_name = 'Cloudiness (%)'
face = 'lightskyblue'
edge= 'black'

fig3 = show_plot(1, 5)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
# Like output_file = os.path.join("..", "output_data", "cities.csv")
latitude_vs_cloudiness_image_path = os.path.join('..', 'output_data', 'LatitudeVsCloudiness.png')
plt.savefig(latitude_vs_cloudiness_image_path)

plt.show()

### From instructions, after each plot add brief explanation of what the code is analyzing
* The figure above is plotting the latitude and cloudiness, and the linear regression is expected to show that there is no correlation between these factors. Whether looking in the southern hemisphere or the northern, cloudiness varies greatly.

## Latitude vs. Wind Speed Plot

In [None]:
# Plot latitude vs humidity
figure4 = plt.figure(figsize=(8,5))
plot_title = 'Latitude vs. Wind Speed (4/23/21)'
x_label_name = 'Latitude'
y_label_name = 'Wind Speed (mph)'
face = 'purple'
edge= 'black'

fig4 = show_plot(1, 6)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
# Like output_file = os.path.join("..", "output_data", "cities.csv")
latitude_vs_windspeed_image_path = os.path.join('..', 'output_data', 'LatitudeVsWindSpeed.png')
plt.savefig(latitude_vs_windspeed_image_path)

plt.show()

### From instructions, after each plot add brief explanation of what the code is analyzing
* The figure above is plotting the latitude and wind speed. These also are factors that do not seem to be correlated, whether in the southern or the northern hemisphere.

## Linear Regression

### Divide the df into the northern and southern hemispheres

In [None]:
# Linear regressions will be done for the northern hemisphere (use latitude for northern >= 0)
# Create a df to work with both hemispheres separately
northern_weather_df = weather_df.loc[weather_df['Lat'] >= 0]
northern_weather_df

In [None]:
# Create df for linear regressions for the southern hemisphere (latitudes < 0)
southern_weather_df = weather_df.loc[weather_df['Lat'] < 0]
southern_weather_df                                     

In [None]:
# Check cities count = 540, same as before
s_count = southern_weather_df.count()['City'] 
n_count = northern_weather_df.count()['City']
total_count = s_count + n_count
total_count

### Define the functions for the plots and linear regressions needed

In [None]:
# Define additional functions that will allow simpler plotting, by northern and southern hemisphere
# For northern
def northern_calc_and_show_plot(x_col, y_col):
    x_values = northern_weather_df.iloc[:,x_col]
    y_values = northern_weather_df.iloc[:,y_col]
    
    correlation = sts.pearsonr(x_values, y_values)
    plt.scatter(x_values, y_values, marker='o', facecolor=face, edgecolors=edge)
    
    plt.title(plot_title)
    plt.xlabel(x_label_name)
    plt.ylabel(y_label_name)
    plt.grid()
    
    # For linear regression line
    lat_vs_slope, lat_vs_int, lat_vs_r, lat_vs_p, lat_vs_std_err = stats.linregress(x_values, y_values)
    line_fit = lat_vs_slope * x_values + lat_vs_int 
 
    plt.plot(x_values, line_fit,"r", label='y={:,.2f}x+{:.2f}'.format(lat_vs_slope,lat_vs_int))
    plt.legend(fontsize=10)
    
    round_correlation = round(correlation[0], 2)
    
    print(f'pearsonr results: {correlation}.')
    print(f'The correlation between these two factors is {round_correlation}.')
    plt.show()    

In [None]:
# Define two more functions that will allow simpler plotting, by northern and southern hemisphere
# For southern
def southern_calc_and_show_plot(x_col, y_col):
    x_values = southern_weather_df.iloc[:,x_col]
    y_values = southern_weather_df.iloc[:,y_col]
    
    correlation = sts.pearsonr(x_values, y_values)
    plt.scatter(x_values, y_values, marker='o', facecolor=face, edgecolors=edge)
    
    plt.title(plot_title)
    plt.xlabel(x_label_name)
    plt.ylabel(y_label_name)
    plt.grid()
    
    # For linear regression line
    lat_vs_slope, lat_vs_int, lat_vs_r, lat_vs_p, lat_vs_std_err = stats.linregress(x_col, y_col)
    line_fit = lat_vs_slope * x_col + lat_vs_int 
 
    plt.plot(x_col, line_fit,"r", label='y={:,.2f}x+{:.2f}'.format(lat_vs_slope,lat_vs_int))
    plt.legend(fontsize=10)
    
    plt.show()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot latitude vs max_temp, northern hemisphere
figure5 = plt.figure(figsize=(8,5))
plot_title = 'Northern Hemisphere\n\
Latitude vs. Max Temperature (4/23/21)'
x_label_name = 'Latitude'
y_label_name = 'Max Temperature (F))'
face = 'lightpink'
edge= 'black'

# Set ylim for entire temp range shown on weather_df.describe() for consistency betw s and n plots
plt.ylim(-15, 110)

# Plot with columns 1 and 3
fig5 = northern_calc_and_show_plot(1, 3)
plt.tight_layout()

x_col_value = northern_
plt.plot()

In [None]:
# Save the plot to png file in same place as cities.csv
n_max_temp_lin_reg_image = os.path.join('..', 'output_data', 'N-LatitudeVsMaxTempWithLinReg.png')
plt.savefig(n_max_temp_lin_reg_image)


plt.show()

In [None]:
# Add observations for each plot with a linear regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot latitude vs max_temp, southern hemisphere
figure6 = plt.figure(figsize=(8,5))
plot_title = 'Southern Hemisphere\n\
Latitude vs. Max Temperature (4/23/21)'
x_label_name = 'Latitude'
y_label_name = 'Max Temperature (F)'
face = 'lightpink'
edge= 'black'

# Set ylim for entire temp range shown on weather_df.describe() for consistency betw s and n  plots
plt.ylim(-15, 110)
    
fig6 = southern_calc_and_show_plot(1, 3)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
s_max_temp_lin_reg_image = os.path.join('..', 'output_data', 'S-LatitudeVsMaxTempWithLinReg.png')
plt.savefig(s_max_temp_lin_reg_image)


plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot latitude vs humidity, northern hemisphere
figure7 = plt.figure(figsize=(8,5))
plot_title = 'Northern Hemisphere\n\
Latitude vs. Humidity (4/23/21)'

x_label_name = 'Latitude'
y_label_name = 'Humidity (%)'
face = 'orange'
edge= 'black'

# Set ylim for entire range of humidity 0 to 100% for consistency between northern and southern plots
plt.ylim(0, 100)

fig7 = northern_calc_and_show_plot(1, 4)
plttight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
n_humidity_lin_reg_image = os.path.join('..', 'output_data', 'N-LatitudeVsHumWithLinReg.png')
plt.savefig(n_humidity_lin_reg_image)

plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot latitude vs humidity, southern hemisphere
figure8 = plt.figure(figsize=(8,5))
plot_title = 'Southern Hemisphere\n\
Latitude vs. Humidity (4/23/21)'

x_label_name = 'Latitude'
y_label_name = 'Humidity (%)'
face = 'orange'
edge= 'black'

# Set ylim for entire range of humidity 0 to 100% for consistency between northern and southern plots
plt.ylim(0, 100)

fig8 = southern_calc_and_show_plot(1, 4)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
s_humidity_lin_reg_image= os.path.join('..', 'output_data', 'S-LatitudeVsHumWithLinReg.png')
plt.savefig(s_humidity_lin_reg_image)

plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot latitude vs cloudiness, northern hemisphere
figure9 = plt.figure(figsize=(8,5))
plot_title = 'Northern Hemisphere\n\
Latitude vs. Cloudiness (4/23/21)'

x_label_name = 'Latitude'
y_label_name = 'Cloudiness (%)'
face = 'lightskyblue'
edge= 'black'

# Set ylim for entire range of cloudiness 0 to 100% for consistency between northern and southern plots
plt.ylim(0, 100)

fig7 = northern_calc_and_show_plot(1, 5)
plttight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
n_cloudiness_lin_reg_image = os.path.join('..', 'output_data', 'N-LatVsCloudsWithLinReg.png')
plt.savefig(n_cloudiness_image)

plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot latitude vs cloudiness, southern hemisphere
figure10 = plt.figure(figsize=(8,5))
plot_title = 'Southern Hemisphere\n\
Latitude vs. Cloudiness (4/23/21)'

x_label_name = 'Latitude'
y_label_name = 'Cloudiness (%)'
face = 'lightskyblue'
edge= 'black'

# Set ylim for entire range of cloudiness 0 to 100% for consistency between northern and southern plots
plt.ylim(0, 100)

fig10 = southern_calc_and_show_plot(1, 5)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
s_cloudiness_lin_reg_image = os.path.join('..', 'output_data', 'S-LatVsCloudsWithLinReg.png')
plt.savefig(s__cloudiness_image)

plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot latitude vs wind speed, norhtern hemisphere
figure11 = plt.figure(figsize=(8,5))
plot_title = 'Northern Hemisphere\n\
Latitude vs. Wind Speed (4/23/21)'

x_label_name = 'Latitude'
y_label_name = 'Wind Speed (mph)'
face = 'purple'
edge= 'black'


# Set ylim for entire range of winds (0.43 to max of 35.9) for consistency between northern and southern plots
plt.ylim(0, 40)
fig11 = northern_calc_and_show_plot(1, 6)
plttight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
n_wind_lin_reg_image = os.path.join('..', 'output_data', 'N-LatVsWindWithLinReg.png')
plt.savefig(n_wind_lin_reg_image)

plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot latitude vs wind speed, southern hemisphere
figure12 = plt.figure(figsize=(8,5))
plot_title = 'Southern Hemisphere\n\
Latitude vs. Wind Speed (4/23/21)'

x_label_name = 'Latitude'
y_label_name = 'Wind Speed (mph)'
face = 'purple'
edge= 'black'

# Set ylim for entire range of winds (0.43 to max of 35.9) for consistency between northern and southern plots
plt.ylim(0, 40)

fig12 = southern_calc_and_show_plot(1, 6)
plt.tight_layout()

In [None]:
# Save the plot to png file in same place as cities.csv
s_wind_lin_reg_image = os.path.join('..', 'output_data', 'S-LatVsWindWithLinReg.png')
plt.savefig(s_wind_lin_reg_image)

plt.show()