In [1]:
# Cell 1: Dependencies and Setup

# Import necessary libraries
import matplotlib.pyplot as plt  # For plotting graphs
import pandas as pd               # For data manipulation and analysis
import numpy as np                # For numerical operations
import requests                   # For making API requests
import time                       # For adding delays (to avoid rate limiting)
from scipy.stats import linregress  # For statistical analysis (if needed)
from citipy import citipy         # For finding nearest cities based on coordinates

# Import the OpenWeatherMap API key
from api_keys import weather_api_key  # Ensure this file contains your API key
print(f"API Key: {weather_api_key}")  # Verify that the API key is loaded correctly

API Key: 4135a014d8ac0c6d47f53bb624cc494c


In [2]:
# Cell 2: Generate Cities Based on Random Latitude and Longitude

# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name

    # If the city is unique, then add it to our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")

Number of cities in the list: 606


In [3]:
# Cell: Retrieve Weather Data for Each City

# Set the API base URL
url = "http://api.openweathermap.org/data/2.5/weather?"

# Define an empty list to fetch the weather data for each city
city_data = []

print("Beginning Data Retrieval")
print("-----------------------------")
print(f"Total cities to process: {len(cities)}")

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):
    city_url = f"{url}q={city}&appid={weather_api_key}&units=metric"
    
    print(f"Attempting to retrieve weather for: {city}")

    try:
        response = requests.get(city_url, timeout=10)  # Set timeout
        
        if response.status_code == 200:
            city_weather = response.json()

            # Parse out relevant information
            city_info = {
                "City": city,
                "Temperature": city_weather['main']['temp'],
                "Humidity": city_weather['main']['humidity'],
                "Wind Speed": city_weather['wind']['speed'],
                "Description": city_weather['weather'][0]['description']
            }
            city_data.append(city_info)
        else:
            print(f"Failed to retrieve data for {city} with status code {response.status_code}: {response.text}")

    except requests.exceptions.Timeout:
        print(f"Timeout occurred while trying to retrieve data for {city}. Skipping...")
    
    except Exception as e:
        print(f"An error occurred: {e}")

    time.sleep(1)  # Pause to avoid hitting the API rate limit

# Convert collected data into a DataFrame for further analysis or visualization later on.
city_data_df = pd.DataFrame(city_data)

# Show Record Count and Display DataFrame
print("Record Count:")
print(city_data_df.count())
print(city_data_df.head())  # Display first few records of the DataFrame

Beginning Data Retrieval
-----------------------------
Total cities to process: 606
Attempting to retrieve weather for: tual
Attempting to retrieve weather for: waitangi
Attempting to retrieve weather for: kalaleh
Attempting to retrieve weather for: edinburgh of the seven seas
Attempting to retrieve weather for: saldanha
Attempting to retrieve weather for: moranbah
Attempting to retrieve weather for: hawaiian paradise park
Attempting to retrieve weather for: hamilton
Attempting to retrieve weather for: pawni
Attempting to retrieve weather for: long beach
Attempting to retrieve weather for: adamstown
Attempting to retrieve weather for: santa cruz
Attempting to retrieve weather for: kapa'a
Attempting to retrieve weather for: savanna-la-mar
Attempting to retrieve weather for: kadur
Attempting to retrieve weather for: sandnessjoen
Attempting to retrieve weather for: bethel
Attempting to retrieve weather for: lata
Attempting to retrieve weather for: noyabrsk
Attempting to retrieve weather f

In [None]:
# Display sample data
city_data_df.head()

In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")
print("Data Exported successfully!")

In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()

### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
# Build scatter plot for latitude vs. temperature
# YOUR CODE HERE
plt.figure(figsize=(10, 6))
plt.scatter(city_data_df['Latitude'], city_data_df['Temperature'], marker='o', color='blue', alpha=0.5)
# Incorporate the other graph properties
# YOUR CODE HERE
plt.title('City Latitude vs. Temperature')
plt.xlabel('Latitude')
plt.ylabel('Temperature (°C)')
plt.grid()
# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()

#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
# YOUR CODE HERE
plt.figure(figsize=(10, 6))
plt.scatter(city_data_df['Latitude'], city_data_df['Humidity'], marker='o', color='blue', alpha=0.5)
# Incorporate the other graph properties
# YOUR CODE HERE
plt.title('City Latitude vs. Humidity')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.grid()
# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()

#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
# YOUR CODE HERE
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.scatter(city_data_df['Latitude'], city_data_df['Cloudiness'], marker='o', color='blue', alpha=0.5)
# Incorporate the other graph properties
# YOUR CODE HERE
plt.title('City Latitude vs. Cloudiness')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.grid()
# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
# YOUR CODE HERE
plt.figure(figsize=(10, 6))
plt.scatter(city_data_df['Latitude'], city_data_df['Wind Speed'], marker='o', color='blue', alpha=0.5)
# Incorporate the other graph properties
# YOUR CODE HERE
plt.title('City Latitude vs. Wind Speed')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (m/s)')
plt.grid()
# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()

---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots
# YOUR CODE HERE
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress

def create_linear_regression_plot(x, y, x_label, y_label, title):
    """
    Create a scatter plot with a linear regression line.

    Parameters:
    x : array-like
        The independent variable (e.g., latitude).
    y : array-like
        The dependent variable (e.g., temperature, humidity).
    x_label : str
        Label for the x-axis.
    y_label : str
        Label for the y-axis.
    title : str
        Title of the plot.
    """
    # Perform linear regression
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    
    # Calculate regression line values
    fit_line = slope * x + intercept
    
    # Create scatter plot
    plt.figure(figsize=(10, 6))
    plt.scatter(x, y, marker='o', color='blue', alpha=0.5)
    
    # Plot the regression line
    plt.plot(x, fit_line, color='red', linewidth=2)

    # Incorporate other graph properties
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.grid()
    
    # Save the figure
    plt.savefig(f"output_data/{title.replace(' ', '_')}.png")
    
    # Show plot
    plt.show()

# Example usage with your DataFrame:
create_linear_regression_plot(
    city_data_df['Latitude'], 
    city_data_df['Temperature'], 
    'Latitude', 
    'Temperature (°C)', 
    'Linear Regression of Latitude vs Temperature'
)

create_linear_regression_plot(
    city_data_df['Latitude'], 
    city_data_df['Humidity'], 
    'Latitude', 
    'Humidity (%)', 
    'Linear Regression of Latitude vs Humidity'
)

create_linear_regression_plot(
    city_data_df['Latitude'], 
    city_data_df['Wind Speed'], 
    'Latitude', 
    'Wind Speed (m/s)', 
    'Linear Regression of Latitude vs Wind Speed'
)

create_linear_regression_plot(
   city_data_df['Latitude'],
   city_data_df['Cloudiness'],
   'Latitude',
   'Cloudiness (%)',
   'Linear Regression of Latitude vs Cloudiness'
)

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df[city_data_df['Latitude'] >= 0]

# Display sample data
northern_hemi_df.head()

In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df[city_data_df['Latitude'] < 0]

# Display sample data
southern_hemi_df.head()

###  Temperature vs. Latitude Linear Regression Plot

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import linregress

# Assuming northern_hemi_df is already created and contains 'Latitude' and 'Temperature' columns

# Perform linear regression on Northern Hemisphere data
slope, intercept, r_value, p_value, std_err = linregress(northern_hemi_df['Latitude'], northern_hemi_df['Temperature'])

# Calculate the regression line values
fit_line = slope * northern_hemi_df['Latitude'] + intercept

# Create a scatter plot for Latitude vs Temperature
plt.figure(figsize=(10, 6))
plt.scatter(northern_hemi_df['Latitude'], northern_hemi_df['Temperature'], marker='o', color='blue', alpha=0.5)

# Plot the regression line
plt.plot(northern_hemi_df['Latitude'], fit_line, color='red', linewidth=2)

# Incorporate other graph properties
plt.title('Northern Hemisphere: Latitude vs. Temperature')
plt.xlabel('Latitude')
plt.ylabel('Temperature (°C)')
plt.grid()

# Save the figure
plt.savefig("output_data/Northern_Hemisphere_Latitude_vs_Temperature.png")

# Show plot
plt.show()

In [None]:
import matplotlib.pyplot as plt
from scipy.stats import linregress



# Perform linear regression on Southern Hemisphere data
slope, intercept, r_value, p_value, std_err = linregress(southern_hemi_df['Latitude'], southern_hemi_df['Temperature'])

# Calculate the regression line values
fit_line = slope * southern_hemi_df['Latitude'] + intercept

# Create a scatter plot for Latitude vs Temperature
plt.figure(figsize=(10, 6))
plt.scatter(southern_hemi_df['Latitude'], southern_hemi_df['Temperature'], marker='o', color='blue', alpha=0.5)

# Plot the regression line
plt.plot(southern_hemi_df['Latitude'], fit_line, color='red', linewidth=2)

# Incorporate other graph properties
plt.title('Southern Hemisphere: Latitude vs. Temperature')
plt.xlabel('Latitude')
plt.ylabel('Temperature (°C)')
plt.grid()

# Save the figure
plt.savefig("output_data/Southern_Hemisphere_Latitude_vs_Temperature.png")

# Show plot
plt.show()

**Discussion about the linear relationship:** YOUR RESPONSE HERE