# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from datetime import date
from scipy.stats import linregress
#from scipy.stats import pearsonr

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy


### Generate cities list by using the citipy Library



In [None]:
# Empty list for holding the latitude and longitude combinations
directions = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
latitudes = np.random.uniform(lat_range[0], lat_range[1], size=2500)
longtudes = np.random.uniform(lng_range[0], lng_range[1], size=2500)
directions = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)
        
    # Stop the loop once we have 623 unique cities
    if len(cities) == 623:
        break

# Print the city count to confirm sufficient count
print(f"Number of cities: {len(cities)}")



## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?"

In [None]:
# Define an empty list to fetch the weather data for each city
city_data = []


In [None]:
# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")


In [None]:
# Create an empty DataFrame with the desired columns
city_df = pd.DataFrame(columns=["City", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed", "Country", "Date"])

# Show Record Count
#print(city_data_df.count())

# Create counters
#record_count = 1
#set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
    # Create endpoint URL with each city
    city_url = f"{url}q={city}&units=imperial&appid={weather_api_key}"

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()

        # Extract relevant data from the API response
        city_name = city_weather["name"]
        city_lat = city_weather["coord"]["lat"]
        city_lon = city_weather["coord"]["lon"]
        city_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind_speed = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]

        # Add the data for this city to the city_data DataFrame
        city_df.loc[i] = [city_name, city_lat, city_lon, city_temp, city_humidity, city_clouds, city_wind_speed, city_country, city_date]

        # Log the progress of the API requests
        print(f"Processing Record {i+1} | {city_name}")
        
    except:
        print(f"City not found: {city}")
        pass

# Show Record Count
print(city_df.count())


In [None]:
# Show count of non-null values in each column
print(city_data_df.count())


In [None]:
# Export the City_Data into a csv
city_df.to_csv("output_data/cities.csv", index_label="City_Data_Details")

# Display the first 5 rows of the DataFrame
city_df.head()


In [None]:
# Build scatter plot for latitude vs. temperature
plt.scatter(city_df["Lat"], city_data_df["Max Temp"], edgecolors="black", linewidths=1, marker="o", alpha=0.8)

# Incorporate the other graph properties
plt.title("City Latitude vs. Max Temperature (04/19/2023)")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

In [None]:
# Build the scatter plots for latitude vs. humidity
plt.scatter(city_df["Lat"], city_df["Humidity"], edgecolor="black", linewidths=1, marker="o", alpha=0.8)

# Incorporate the other graph properties
plt.title("City Latitude vs. Humidity (04/19/23)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

    

In [None]:
# Print r value
from scipy.stats import pearsonr


# Calculate r value and p value for latitude vs. humidity
r, p = pearsonr(city_df["Lat"], city_df["Humidity"])
print(f"The r value between latitude and humidity is: {r:.2f}")

# Calculate r value and p value for latitude vs. cloudiness
r, p = pearsonr(city_df["Lat"], city_df["Cloudiness"])
print(f"The r value between latitude and cloudiness is: {r:.2f}")

# Calculate r value and p value for latitude vs. wind speed
r, p = pearsonr(city_df["Lat"], city_df["Wind Speed"])
print(f"The r value between latitude and wind speed is: {r:.2f}")

# Calculate r value and p value for latitude vs. max temperature
r, p = pearsonr(city_df["Lat"], city_df["Max Temp"])
print(f"The r value between latitude and max temperature is: {r:.2f}")
    
   

In [None]:
 # Save plot
plt.savefig('my_plot.png')

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
#Read the CSV file
city_data = pd.read_csv("output_data/cities.csv")

# Display the first 5 rows of the DataFrame
city_data.head()

In [None]:
# Convert city_data list to DataFrame
city_df = pd.DataFrame(city_data)

# Create new DataFrame for northern hemisphere (latitudes >= 0)
city_data_north = city_df.loc[city_df['Lat'] >= 0]

# Display first few rows of northern hemisphere DataFrame
city_data_north.head()



In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
city_data_south = city_data[city_data['Lat'] < 0]

# Display sample data
city_data_south.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
# Import dependencies
#import matplotlib.pyplot as plt
#from scipy.stats import linregress

# Define x and y values
x_values = city_data_north['Lat']
y_values = city_data_north['Max Temp']

# Perform linear regression
slope, intercept, rvalue, pvalue, stderr = linregress(x_values, y_values)

# Calculate regression line
regress_values = x_values * slope + intercept

# Create plot
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title('Northern Hemisphere - Max Temperature vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()


In [None]:
# Linear regression on Southern Hemisphere
# Define x and y values
x_values = city_data_south['Lat']
y_values = city_data_south['Max Temp']

# Perform linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

# Calculate regression values
regress_values = x_values * slope + intercept

# Create equation of line
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# Create scatter plot with linear regression line
plt.scatter(x_values, y_values)
plt.plot(x_values,regress_values,"r-")

# Add equation of line to plot
plt.annotate(line_eq,(-50,80),fontsize=15,color="red")

# Add labels and title to plot
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()


**Discussion about the linear relationship:** The two linear regression codes show that there is a relationship between latitude and maximum temperature in the northern and southern hemispheres. In the northern hemisphere, there is a strong negative correlation between latitude and maximum temperature, while in the southern hemisphere, there is a moderate positive correlation. These results suggest that latitude is a key factor in determining maximum temperature, and that this relationship varies between the hemispheres.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere Humidity vs. Latitude Linear Regression Plot

# Define x and y values
x_values = city_data_north['Lat']
y_values = city_data_north['Humidity']

# Perform linear regression
slope, intercept, rvalue, pvalue, stderr = linregress(x_values, y_values)

# Calculate regression line
regress_values = x_values * slope + intercept

# Create plot
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.xlabel('Latitude')
plt.ylabel('Humidity(F)')
plt.title('Northern Hemisphere - Humidity vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()





In [None]:
# Southern Hemisphere Humidity vs. Latitude Linear Regression Plot
# Define x and y values
x_values = city_data_south['Lat']
y_values = city_data_south['Humidity']

# Perform linear regression
slope, intercept, rvalue, pvalue, stderr = linregress(x_values, y_values)

# Calculate regression line
regress_values = x_values * slope + intercept

# Create plot
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.xlabel('Latitude')
plt.ylabel('Humidity(F)')
plt.title('Southern Hemisphere - Humidity vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()

**Discussion about the linear relationship:** The linear regressions of the two codes show the relationship between humidity and latitude in the northern and southern hemispheres. The northern hemisphere has a weak positive correlation between humidity and latitude, with a positive slope on the regression line, while the southern hemisphere has a weak negative correlation between humidity and latitude, with a negative slope. These differences could be attributed to atmospheric circulation patterns and regional climate conditions.

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere Cloudiness vs. Latitude Linear Regression Plot
# Define x and y values
x_values = city_data_north['Lat']
y_values = city_data_north['Cloudiness']

# Perform linear regression
slope, intercept, rvalue, pvalue, stderr = linregress(x_values, y_values)

# Calculate regression line
regress_values = x_values * slope + intercept

# Create plot
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness(F)')
plt.title('Northern Hemisphere - Cloudiness vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()

In [None]:
# Perform linear regression on latitude and cloudiness for Southern Hemisphere
# Define x and y values
x_values = city_data_south['Lat']
y_values = city_data_south['Cloudiness']

# Perform linear regression
slope, intercept, rvalue, pvalue, stderr = linregress(x_values, y_values)

# Calculate regression line
regress_values = x_values * slope + intercept

# Create plot
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness(F)')
plt.title('Southern Hemisphere - Cloudiness vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()

**Discussion about the linear relationship:** The two linear regression codes are examining the relationship between latitude and cloudiness in the northern and southern hemispheres. In the northern hemisphere, the scatter plot with a linear regression line shows a weak positive correlation between latitude and cloudiness. The r-value is printed and is close to 0, indicating little to no correlation. In the southern hemisphere, there is a similar weak positive correlation between latitude and cloudiness, as shown by the positive slope of the regression line and the r-value being close to 0. These results suggest that there is little to no relationship between latitude and cloudiness in either hemisphere.

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# Define x and y values
x_values = city_data_north['Lat']
y_values = city_data_north['Wind Speed']

# Perform linear regression
slope, intercept, rvalue, pvalue, stderr = linregress(x_values, y_values)

# Calculate regression line
regress_values = x_values * slope + intercept

# Create plot
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.xlabel('Latitude')
plt.ylabel('Windspeed(F)')
plt.title('Northern Hemisphere - Windspeed vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()


In [None]:
# Southern Hemisphere
# Define x and y values
x_values = city_data_south['Lat']
y_values = city_data_south['Wind Speed']

# Perform linear regression
slope, intercept, rvalue, pvalue, stderr = linregress(x_values, y_values)

# Calculate regression line
regress_values = x_values * slope + intercept

# Create plot
plt.scatter(x_values, y_values)
plt.plot(x_values, regress_values, color='red')
plt.xlabel('Latitude')
plt.ylabel('Windspeed(F)')
plt.title('Southern Hemisphere - Windspeed vs. Latitude Linear Regression')

# Print r-value
print(f"The r-value is: {rvalue}")

# Show plot
plt.show()