# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy import stats
from scipy.stats import linregress

# Import the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

In [None]:
cities

In [None]:
#city_test = cities[0:10]
#city_test

---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
# Set the API base URL
from pprint import pprint

url = "https://api.openweathermap.org/data/2.5/weather?"
weather_api_key
units= "metric"


# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = url + "&q=" + city + "&appid=" + weather_api_key + "&units=" + units
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:


    # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()


        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]
        
        # Convert city_unix_date to day
        # city_date = pd.to_datetime(city_unix_date, unit = 'M')

        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              
# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)
city_data_df
# Show Record Count
city_data_df.count()

In [None]:
# Display sample data
city_data_df

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("../output_data/cities.csv", index_label="City_ID")

In [None]:
# Read saved data
city_data_df = pd.read_csv("../output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature

# Define the x and y axes and values.
lat_v_temp_x = city_data_df["Lat"]
lat_v_temp_y = city_data_df["Max Temp"]

# Incorporate the other graph properties
# Axes labels, put grid in background, purple dots
plt.xlabel("Latitude")
plt.ylabel("Temperature (Celsius)")
plt.title("Latitude vs Temperature (All Cities)")
plt.grid(True)
plt.scatter(lat_v_temp_x,lat_v_temp_y, color = "purple", edgecolor="black")

# Save the figure
plt.savefig("../output_data/Fig1_LatTemp.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity

# Define x and y axes and values.
lat_v_hum_x = city_data_df["Lat"]
lat_v_hum_y = city_data_df["Humidity"]

# Incorporate the other graph properties
plt.grid(True)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Latitude vs Humidity (All Cities)")
plt.scatter(lat_v_hum_x,lat_v_hum_y,color="blue", edgecolor="black")

# Save the figure
plt.savefig("../output_data/Fig2_LatHum.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness

# Define x and y axes and values.
lat_v_cloud_x = city_data_df["Lat"]
lat_v_cloud_y = city_data_df["Cloudiness"]

# Incorporate the other graph properties
plt.title("Latitude vs Cloudiness (All Cities)")
plt.grid(True)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.scatter(lat_v_cloud_x,lat_v_cloud_y,color="orange", edgecolor="black")

# Save the figure
plt.savefig("../output_data/Fig3_LatCloud.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed

# Define the x and y axes and values.
lat_v_wind_x = city_data_df["Lat"]
lat_v_wind_y = city_data_df["Wind Speed"]

# Incorporate the other graph properties
plt.title("Latitude vs Wind Speed (All Cities)")
plt.grid(True)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (m/s)")
plt.scatter(lat_v_wind_x,lat_v_wind_y,color="green", edgecolor="black")

# Save the figure
plt.savefig("../output_data/Fig4_LatWind.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots
def lin_regr(x_values, y_values, xlab, ylab, title):

    # Linear regression on Lat vs <Column/Weather>
    # x_values and y_values are calculated within the specific section cells
    (slope, intercept, rvalue, pvalue, stderr) = stats.linregress(x_values, y_values)

    # Calculate regression values
    regress_values = x_values * slope + intercept
    print(regress_values)

    # Create line equation string
    line_eq = "y = " +str(round(slope, 2)) + "x + " + str(round(intercept, 2))
    print(line_eq)

    # Create scatter plot of lat vs <column/weather>, with the linear regression
    plt.scatter(x_values, y_values, color="purple", edgecolor="black")
    plt.plot(x_values,regress_values, "r-")

    # Label and annotate plot components and line equation
    # xmin and ymin are calculated within the specific section cells
    plt.xlabel(xlab)
    plt.ylabel(ylab)
    plt.grid(True)
    plt.title(title)
    plt.annotate(line_eq , (xmin, ymin), fontsize=15,color="red")

    # Print the r value
    print(f"The r-value is: {rvalue**2}")

    # Show the plot
    plt.show()


In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df[city_data_df["Lat"]>=0]
northern_hemi_df = pd.DataFrame(northern_hemi_df)

# Display sample data
northern_hemi_df

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df[city_data_df["Lat"]<0]
southern_hemi_df=pd.DataFrame(southern_hemi_df)

# Display sample data
southern_hemi_df

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere

# Define the x and y values for the formula
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4

# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values,"Latitude","Temperature (Celsius)","Latitude vs Temperature")

In [None]:
# Linear regression on Southern Hemisphere

# Define the x and y values for the formula
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4

# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values, "Latitude", "Temperature (Celsius)", "Latitude vs Temperature")

### Discussion about Linear Relationship:
* In both hemispheres as the latitude goes away from the equator (0), whether in the positive or negative direction, the temperature decreases as the latitudes approach the poles.
* The northern hemisphere has a negative slope while the southern hemisphere has a positive slope. This is due to the northern hemisphere having positive latitudes and the southern hemisphere having negative latitudes, but the slope means the same for each case: temperature decreases the further the latitude is from 0.
* The northern hemisphere has a higher r-value, which means there is a stronger linear association between temperature and latitude. The southern hemisphere has a lower r-value, meaning the association is weaker (but still there).

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere

# Define the x and y values for the formula
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4

# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values, "Latitude", "Humidity (%)", "Latitude vs Humidity")


In [None]:
# Southern Hemisphere
# Define the x and y values for the formula
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4

# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values, "Latitude", "Humidity (%)", "Latitude vs Humidity")


### Discussion about Linear Relationship:
* The northern hemisphere has a steeper, positive slope that indicates the humidity generally increases as latitude increases.
* The southern hemisphere has a lesser, positive slope, but since the latitude is negative, this means that humidity somewhat decreases as latitude gets further from 0.
* Although both plots indicate some relationship of humidity changing with latitude, the lower r-values (i.e. closer to 0 than to 1) and dispersed plot points tell us that there is not a major correlation between humidity and latitude.

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere

# Define the x and y values for the formula
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4

# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values, "Latitude", "Cloudiness (%)", "Latitude vs Cloudiness")


In [None]:
# Southern Hemisphere

# Define the x and y values for the formula
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4


# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values, "Latitude", "Cloudiness (%)", "Latitude vs Cloudiness")


### Discussion about Linear Relationship:
* The low r-values for both the northern and southern hemispheres, as well as the wide dispersal of plot points in each graph indicate that cloudiness is not dependent on increasing/decreasing latitude. There is likely a better way to find the relationship of cloudiness versus latitude that includes more variables (such as temperature and humidity).

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere

# Define the x and y values for the formula
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4

# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values, "Latitude", "Wind Speed (m/s)", "Latitude vs Wind Speed")

In [None]:
# Southern Hemisphere

# Define the x and y values for the formula
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]

xmin = int(x_values.min() + 0.5)
ymin = int(y_values.min() + 0.5) + 4

# Calculate and show the plot with linear regression using the previously defined function.
lin_regr(x_values,y_values, "Latitude", "Wind Speed (m/s)", "Latitude vs Wind Speed")


### Discussion about Linear Relationship:
* Both hemispheres have very low r-values and flatter slopes, which suggests that windspeed has less to do with latitude and distance from the equator, and more to do with other factors that are not included here (such as distance to oceans or mountains, temperature).
* Again, the dispersal of plot points for each hemisphere is less concentrated, which makes it difficult to find much of a relationship between latitude and its effect on windspeed.