In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Create a set of random latitude and longitude combinations and zip object them with zip() function.
lats = np.random.uniform(low=-90.000, high=90.000, size=2000)
lngs = np.random.uniform(low=-180.000, high=180.000, size=2000)
lat_lngs = zip(lats, lngs)
lat_lngs
#The zip object packs each pair of lats and lngs having the same index in their respective array into a tuple. 
#If there are 2000 latitudes and longitudes, there will be 2000 tuples of paired latitudes and longitudes, 
#where each latitude and longitude in a tuple can be accessed by the index of 0 and 1, respectively.



<zip at 0x7fb612c788c0>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)
#You can only unzip a zipped tuple once before it is removed from the computer's memory. 
#Make sure you unzip the latitudes and longitudes into the coordinates list before moving on.

In [4]:
# Import
from citipy import citipy

In [5]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

707

In [6]:
# Import the requests library.
import requests

# Import the API key.
from config import weather_api_key

In [7]:
# Starting URL for Open Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key
print(url)

http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=755f33187f86c3f74d9f8ae7856e6b2d


In [8]:
# Import the time library and the datetime module from the datetime library 
import time
from datetime import datetime

In [9]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

#Let's use the enumerate() method to get the index of the city for logging purposes 
#and the city for creating an endpoint URL. 
#Add the following code below our counters.

# Loop through all the cities in the list.

#We create the for loop with the enumerate() method and reference the index and the city in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    #In the conditional statement, we check if the remainder of the index divided by 50 is equal to 0 
    #and if the index is greater than or equal to 50. 
    #If the statement is true, then the set_count and the record_count are incremented by 1.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")
    #Inside the conditional statement, we create the URL endpoint for each city, as before. 
    #However, we are removing the blank spaces in the city name and concatenating the city name with, city.replace(" ","+"). 
    #This will find the corresponding weather data for the city instead of finding the weather data for the first part of the city name.


    # Log the URL, record, and set numbers and the city.
    #Also, we add a print statement that tells us the record count and set count, and the city that is being processed.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    #Then above we add one to the record count before the next city is processed.
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_weather_description = city_weather ["weather"][0][description]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Current Description": city_weather_description,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass
        #pass is a general purpose statement to handle all errors encountered & to allow the program to continue.

# Indicate that Data Loading is complete by adding the closing print statement
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | qaanaaq
City not found. Skipping...
Processing Record 2 of Set 1 | busselton
City not found. Skipping...
Processing Record 3 of Set 1 | necochea
City not found. Skipping...
Processing Record 4 of Set 1 | tuatapere
City not found. Skipping...
Processing Record 5 of Set 1 | port elizabeth
City not found. Skipping...
Processing Record 6 of Set 1 | bluff
City not found. Skipping...
Processing Record 7 of Set 1 | port alfred
City not found. Skipping...
Processing Record 8 of Set 1 | port lincoln
City not found. Skipping...
Processing Record 9 of Set 1 | kodiak
City not found. Skipping...
Processing Record 10 of Set 1 | pondicherry
City not found. Skipping...
Processing Record 11 of Set 1 | hambantota
City not found. Skipping...
Processing Record 12 of Set 1 | petropavlovsk-kamchatskiy
City not found. Skipping...
Processing Record 13 of Set 1 | atuona
City not found. Skipping...
Processing Record 14 of

City not found. Skipping...
Processing Record 19 of Set 3 | semey
City not found. Skipping...
Processing Record 20 of Set 3 | bambous virieux
City not found. Skipping...
Processing Record 21 of Set 3 | barawe
City not found. Skipping...
Processing Record 22 of Set 3 | nanortalik
City not found. Skipping...
Processing Record 23 of Set 3 | ulety
City not found. Skipping...
Processing Record 24 of Set 3 | hermosillo
City not found. Skipping...
Processing Record 25 of Set 3 | westport
City not found. Skipping...
Processing Record 26 of Set 3 | nelson bay
City not found. Skipping...
Processing Record 27 of Set 3 | thompson
City not found. Skipping...
Processing Record 28 of Set 3 | rundu
City not found. Skipping...
Processing Record 29 of Set 3 | bandarbeyla
City not found. Skipping...
Processing Record 30 of Set 3 | haines junction
City not found. Skipping...
Processing Record 31 of Set 3 | taolanaro
City not found. Skipping...
Processing Record 32 of Set 3 | prince rupert
City not found. 

City not found. Skipping...
Processing Record 38 of Set 5 | zhanaozen
City not found. Skipping...
Processing Record 39 of Set 5 | surt
City not found. Skipping...
Processing Record 40 of Set 5 | mount gambier
City not found. Skipping...
Processing Record 41 of Set 5 | tumannyy
City not found. Skipping...
Processing Record 42 of Set 5 | hithadhoo
City not found. Skipping...
Processing Record 43 of Set 5 | skara
City not found. Skipping...
Processing Record 44 of Set 5 | daru
City not found. Skipping...
Processing Record 45 of Set 5 | minab
City not found. Skipping...
Processing Record 46 of Set 5 | letavertes
City not found. Skipping...
Processing Record 47 of Set 5 | sambava
City not found. Skipping...
Processing Record 48 of Set 5 | cherskiy
City not found. Skipping...
Processing Record 49 of Set 5 | rebrikha
City not found. Skipping...
Processing Record 50 of Set 5 | manjacaze
City not found. Skipping...
Processing Record 1 of Set 6 | male
City not found. Skipping...
Processing Recor

City not found. Skipping...
Processing Record 6 of Set 8 | raudeberg
City not found. Skipping...
Processing Record 7 of Set 8 | tari
City not found. Skipping...
Processing Record 8 of Set 8 | negotino-polosko
City not found. Skipping...
Processing Record 9 of Set 8 | caravelas
City not found. Skipping...
Processing Record 10 of Set 8 | kuala terengganu
City not found. Skipping...
Processing Record 11 of Set 8 | sao filipe
City not found. Skipping...
Processing Record 12 of Set 8 | namibe
City not found. Skipping...
Processing Record 13 of Set 8 | morro bay
City not found. Skipping...
Processing Record 14 of Set 8 | dwarka
City not found. Skipping...
Processing Record 15 of Set 8 | staryy nadym
City not found. Skipping...
Processing Record 16 of Set 8 | kolokani
City not found. Skipping...
Processing Record 17 of Set 8 | cibitung
City not found. Skipping...
Processing Record 18 of Set 8 | peleduy
City not found. Skipping...
Processing Record 19 of Set 8 | troitsko-pechorsk
City not foun

City not found. Skipping...
Processing Record 26 of Set 10 | akdepe
City not found. Skipping...
Processing Record 27 of Set 10 | ordynskoye
City not found. Skipping...
Processing Record 28 of Set 10 | hualmay
City not found. Skipping...
Processing Record 29 of Set 10 | seoul
City not found. Skipping...
Processing Record 30 of Set 10 | yuzhno-yeniseyskiy
City not found. Skipping...
Processing Record 31 of Set 10 | montepuez
City not found. Skipping...
Processing Record 32 of Set 10 | kiama
City not found. Skipping...
Processing Record 33 of Set 10 | aksu
City not found. Skipping...
Processing Record 34 of Set 10 | axim
City not found. Skipping...
Processing Record 35 of Set 10 | krabi
City not found. Skipping...
Processing Record 36 of Set 10 | bose
City not found. Skipping...
Processing Record 37 of Set 10 | shahrud
City not found. Skipping...
Processing Record 38 of Set 10 | lamu
City not found. Skipping...
Processing Record 39 of Set 10 | north bend
City not found. Skipping...
Proces

City not found. Skipping...
Processing Record 44 of Set 12 | payakumbuh
City not found. Skipping...
Processing Record 45 of Set 12 | chimore
City not found. Skipping...
Processing Record 46 of Set 12 | rostovka
City not found. Skipping...
Processing Record 47 of Set 12 | pastavy
City not found. Skipping...
Processing Record 48 of Set 12 | manono
City not found. Skipping...
Processing Record 49 of Set 12 | wagga wagga
City not found. Skipping...
Processing Record 50 of Set 12 | zolotinka
City not found. Skipping...
Processing Record 1 of Set 13 | virginia beach
City not found. Skipping...
Processing Record 2 of Set 13 | tselinnoye
City not found. Skipping...
Processing Record 3 of Set 13 | doha
City not found. Skipping...
Processing Record 4 of Set 13 | lahad datu
City not found. Skipping...
Processing Record 5 of Set 13 | tocopilla
City not found. Skipping...
Processing Record 6 of Set 13 | edd
City not found. Skipping...
Processing Record 7 of Set 13 | varzea grande
City not found. Sk

In [15]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

In [11]:
#Next reorder the columns as City, Country, Lat, Lng, Max Temp, Humidity, Cloudiness, Wind Speed and Current Description, so they are easy to read.
#Recall that to reorder the columns, we assign a variable to an array of the columns in the order we want them to appear:
new_column_order = ["City", "Country", "Lat", "Lng", "Max Temp","Humidity", "Cloudiness", "Wind Speed", "Current Description"]
#Then, we assign a new or the same DataFrame with new column order:
city_data_df = city_data_df[new_column_order]
city_data_df

KeyError: "None of [Index(['City', 'Country', 'Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness',\n       'Wind Speed', 'Current Description'],\n      dtype='object')] are in the [columns]"

In [None]:
# Create the output file (CSV).
output_data_file = "cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")

In [None]:
#Get data for scatteree chart
#We will need latitude, maximum temperature, humidity, cloudiness, and wind speed from all the cities
# Extract relevant fields from the DataFrame for plotting.
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"]

In [None]:
#add the following code to create a scatter plot for the latitude vs. maximum temperature and run the cell.
# Import time module
import time

# Build the scatter plot for latitude vs. max temperature.
plt.scatter(lats,
            max_temps,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Max Temperature "+ time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig1.png")

# Show plot.
plt.show()

In [None]:
# Build the scatter plots for latitude vs. humidity.
#Change title and yaxis label to humidity %
plt.scatter(lats,
            humidity,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Humidity "+ time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig2.png")
# Show plot.
plt.show()


In [None]:
#Let's refactor the code for our scatter plots by changing the y-axis variable to "cloudiness," 
#the title to "Cloudiness (%)," and the y-axis label to "Cloudiness (%)."
# Build the scatter plots for latitude vs. cloudiness.
plt.scatter(lats,
            cloudiness,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Cloudiness (%) "+ time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig3.png")
# Show plot.
plt.show()


In [None]:
#repurpose the code we have been using and change the y-axis variable to "wind speed," 
#the title to "Wind Speed," and the y-axis label to "Wind Speed (mph)."


# Build the scatter plots for latitude vs. wind speed.
plt.scatter(lats,
            wind_speed,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Wind Speed "+ time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig4.png")
# Show plot.
plt.show()

In [None]:
#Create a Linear Regression Function#
# Import linregress
from scipy.stats import linregress

# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):

    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.title(title)
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.show()
plot_linear_regression

In [None]:
#Create the Hemisphere DataFrames 6.4.2
#To create a new DataFrame from a current DataFrame, we can use the loc method on the current DataFrame. 
#The loc method accesses a group of rows and columns in the current DataFrame by an index, labels, or a Boolean array. 
#apply this method to our city_data_df DataFrame by adding the code index13 = city_data_df.loc[13] in a cell 
#and running the cell. The output will present all the information at index 13 of the city_data_df DataFrame. 
#The syntax to get a specific row from a current DataFrame is row = df.loc[row_index].
index13 = city_data_df.loc[13]
index13


In [None]:
#We can also filter a DataFrame based on a value of a row. 
#Ex, if we wanted to get all Northern Hemisphere latitudes, 
#for latitudes greater than or equal to 0, we can filter the city_data_df DataFrame using the code city_data_df["Lat"] >= 0. 
#Executing this code will return either "True" or "False" for all the rows that meet these criteria.
city_data_df["Lat"] >= 0

In [None]:
#If we want to return a DataFrame with all data fitting the criteria, 
#for latitudes greater than or equal to 0, we can use the loc method on the city_data_df DataFrame. 
#Inside the brackets, we would add the conditional filter city_data_df["Lat"] >= 0 so that our statement would appear as:
city_data_df.loc[(city_data_df["Lat"] >= 0)].head()
#head() function to display the 1st 5 rows of df

In [None]:
# Create Northern and Southern Hemisphere DataFrames.
northern_hemi_df = city_data_df.loc[(city_data_df["Lat"] >= 0)].head()
southern_hemi_df = city_data_df.loc[(city_data_df["Lat"]<=0)].head()


In [None]:
#Perform Linear Regression on the Maximum Temperature for the Northern Hemisphere
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(10,40))
#text_coordinates=text_coordinates(50,50)

In [None]:
#Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(-40,90))

In [None]:
#To perform the linear regression on the percent humidity for the Northern Hemisphere, 
#set the x-value equal to the latitude column and 
#y-value equal to the Humidity column from the northern_hemi_df DataFrame.

#Call the plot_linear_regression function with the x- and y-values, and edit the title, y_label, 
#and text_coordinates for the percent humidity scatter plot.
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for % Humidity', '% Humidity',(40,90))



In [None]:
#For our linear regression line and plot of the percent humidity and latitudes for the Southern Hemisphere, 
#set the x-value equal to the latitude column and 
#y-value equal to the humidity column from the southern_hemi_df DataFrame.

#Call the plot_linear_regression function, with the x-and y-values, 
#and edit the title, y_label, and text_coordinates for the percent humidity scatter plot.
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for % Humidity', '% Humidity',(-30,65))


In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for % Cloudiness', '% Cloudiness',(50,80))


In [None]:
# Linear regression on the Southern Hemisphere Cloudiness
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the southern Hemisphere for % Cloudiness',
                       '% Cloudiness',(-40,20))


In [None]:
#Call the plot_linear_regression function, with the x-value equal to the Latitude column 
#and the y-value equal to the Wind Speed column from the northern_hemi_df DataFrame. 
#Edit the title, y_label, and text_coordinates for the wind speed scatter plot.
# Linear regression on the Northern Hemisphere with wind speed
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for Wind Speed', 'Wind Speed',(35,10))


In [None]:
#Finally, let's call the plot_linear_regression function, with the x-value equal to the latitude column 
#and the y-value equal to wind speed column from the southern_hemi_df DataFrame. 
#Edit the title, y_label, and text_coordinates for the wind speed scatter plot.
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for Wind Speed', 'Wind Speed',(-30,10))