In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json


# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")


In [None]:
# Set the API base URL
url = "https://api.openweathermap.org/data/2.5/weather?"

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f"q={city}&appid={weather_api_key}"
    api_url = url + city_url
    print(api_url)
    
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    response = requests.get(api_url).json()
    print(json.dumps(response, indent=4, sort_keys=True))

    
    
# loop through each city in the list and make an API call
    try:
        city_lat = response["coord"]["lat"]
        city_lng = response["coord"]["lon"]
        city_max_temp = response["main"]["temp"]
        city_humidity = response["main"]["humidity"]
        city_clouds = response["clouds"]["all"]
        city_wind = response["wind"]["speed"]
        city_country = response["name"]
        city_date = response["dt"]
    
    
        # Append the City information into city_data list
        city_data.append({"City": city, 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})


    except:
        print(f"No data found for {city}. Skipping...")

# Print completion message
print(city_data)
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

In [None]:

# Create scatter plot for Latitude vs. Temperature
plt.scatter([data['Lat'] for data in city_data], [data['Max Temp'] for data in city_data])
plt.xlabel('Latitude')
plt.ylabel('Temperature (°F)')
plt.title('City Latitude vs. Max Temperature')
plt.grid(True)
plt.show()

# Create scatter plot for Latitude vs. Humidity
plt.scatter([data['Lat'] for data in city_data], [data['Humidity'] for data in city_data])
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title('City Latitude vs. Humidity')
plt.grid(True)
plt.show()

# Create scatter plot for Latitude vs. Cloudiness
plt.scatter([data['Lat'] for data in city_data], [data['Cloudiness'] for data in city_data])
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('City Latitude vs. Cloudiness')
plt.grid(True)
plt.show()

# Create scatter plot for Latitude vs. Wind Speed
plt.scatter([data['Lat'] for data in city_data], [data['Wind Speed'] for data in city_data])
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('City Latitude vs. Wind Speed')
plt.grid(True)
plt.show()


In [None]:
import matplotlib.pyplot as plt
from scipy.stats import linregress

# Define a function for creating scatter plots and linear regression
def plot_scatter(x_values, y_values, x_label, y_label, title):
    plt.scatter(x_values, y_values)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.grid(True)
    
    # Perform linear regression
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    
    # Calculate regression values
    x_array = np.array(x_values)
    regress_values = x_array * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    
    # Plot regression line
    plt.plot(x_values,regress_values,"r-")
    
    # Add equation to plot
    plt.annotate(line_eq,(min(x_values),max(y_values)-20),fontsize=15,color="red")
    
    # Print r value
    print(f"The r-value is: {rvalue}")
    
    # Show plot
    plt.show()

In [None]:
# Create function to plot scatter and linear regression for Southern Hemisphere
def plot_southern(x_values, y_values, x_label, y_label, title):
    # Filter data for Southern Hemisphere
    southern_data = list(filter(lambda x: x['Lat'] < 0, city_data))
    x_southern = [data['Lat'] for data in southern_data]
    y_southern = [data[y_values] for data in southern_data]
    
    # Call plot_scatter function to create scatter plot and linear regression
    plot_scatter(x_southern, y_southern, x_label, y_label, title)

# Southern vs Northern Latitude vs Max Temperature

In [None]:
# Create scatter plot and linear regression for Latitude vs. Temperature
plot_scatter([data["Lat"] for data in city_data], [data["Max Temp"] for data in city_data], "Latitude", "Temperature (°F)", "City Latitude vs. Max Temperature (Northern Hemisphere)")



In [None]:
# Create scatter plot and linear regression for Latitude vs. Temperature in Southern Hemisphere
plot_southern('Lat', 'Max Temp', 'Latitude', 'Temperature (°F)', 'City Latitude vs. Max Temperature (Southern Hemisphere)')


linear regression analysis

For the Northern hemisphere the negative slope of the linear regression line shows that as latitude increases, the maximum temperature decreases. For the Southern Hemisphere, the positive slope of the linear regression line indicates that as latitude decreases, the maximum temperature tends to increase.

# Southern vs Northern Latitude vs Humidity

In [None]:
# Create scatter plot and linear regression for Latitude vs. Humidity
plot_scatter([data["Lat"] for data in city_data], [data["Humidity"] for data in city_data], "Latitude", "Humidity (%)", "City Latitude vs. Humidity Northern Hemisphere")



In [None]:
# Create scatter plot and linear regression for Latitude vs. Humidity in Southern Hemisphere
plot_southern('Lat', 'Humidity', 'Latitude', 'Humidity (%)', 'City Latitude vs. Humidity (Southern Hemisphere)')


There is no significant correlation between humidity and latitude in either the Northern or southern Hemispheres. The linear regression for this relationship shows a weak negative correlation.

# Southern vs Northern Latitude vs Cloudiness

In [None]:
# Create scatter plot and linear regression for Latitude vs. Cloudiness
plot_scatter([data["Lat"] for data in city_data], [data["Cloudiness"] for data in city_data], "Latitude", "Cloudiness (%)", "City Latitude vs. Cloudiness (Northern Hemisphere)")



In [None]:
# Create scatter plot and linear regression for Latitude vs. Cloudiness in Southern Hemisphere
plot_southern('Lat', 'Cloudiness', 'Latitude', 'Cloudiness (%)', 'City Latitude vs. Cloudiness (Southern Hemisphere)')



Both the southern and northern hemispheres have weak positive correlations in this linear regression.

# Southern vs Northern Latitude vs Wind Speed

In [None]:
# Create scatter plot and linear regression for Latitude vs. Wind Speed
plot_scatter([data["Lat"] for data in city_data], [data["Wind Speed"] for data in city_data], "Latitude", "Wind Speed (mph)", "City Latitude vs. Wind Speed (Northern Hemisphere)")


In [None]:

# Create scatter plot and linear regression for Latitude vs. Wind Speed in Southern Hemisphere
plot_southern('Lat', 'Wind Speed', 'Latitude', 'Wind Speed (mph)', 'City Latitude vs. Wind Speed (Southern Hemisphere)')





The linear regression for this relationship shows a weak positive correlation for both the Northern and southern hemispheres.

In [None]:
import csv

# Open a new CSV file to write data
with open('city_data.csv', mode='w', newline='') as csv_file:

    # Define the fieldnames for the CSV file
    fieldnames = ['City', 'Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed', 'Country', 'Date']

    # Create a CSV writer object
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the header row to the CSV file
    writer.writeheader()

    # Write each row of data to the CSV file
    for data in city_data:
        writer.writerow(data)
