In [None]:
# Import dependencies

import numpy as np
import pandas as pd
import requests
import json
import time

from scipy.stats import linregress
from matplotlib import pyplot as plt

from citipy import citipy

In [None]:
# Import OpenWeather API key

from config import api_key

In [None]:
# Generate a random list of cities with citipy

# Generate a random list of coordinates
# "The valid range of latitude in degrees is -90 and +90 for the southern and northern hemisphere respectively. Longitude is in the range -180 and +180" https://stackoverflow.com/questions/15965166/

latitude = []
longitude = []

for lat in np.random.randint(-90,90,500):
    latitude.append(lat)

for long in np.random.randint(-180,180,500):
    longitude.append(long)

lat_long = []
for i in range(len(latitude)):
    lat_long.append((latitude[i], longitude[i]))

# lat_long = tuple(zip(latitude,longitude))

# lat_long

# Locate nearest cities to coordinates
cities = []

for lat,long in lat_long:
    city = citipy.nearest_city(lat,long)
    city_name = city.city_name
    cities.append(city_name)

# cities

In [None]:
# Testing data retrieval

base_url = "http://api.openweathermap.org/data/2.5/weather?".format(city_name,api_key)
target_url = base_url + "q=" + city_name + "&appid=" + api_key

city_name = "albany"

print("Tesing OpenWeatherMap API")

response = requests.get(target_url)

# print the response URL, avoid doing for public GitHub repos in order to avoid exposing key
# print(response.url)

data_test = response.json()
# print(json.dumps(data_test, indent=4, sort_keys=True))

main = data_test["main"]
temperature = main["temp"] # Fetch temperature
humidity = main["humidity"] # Fetch humidity

wind = data_test["wind"] 
wind_speed = wind["speed"] # Fetch wind speed

clouds = data_test["clouds"]
cloudiness = clouds["all"] # Fetch cloudiness


print(f"{city_name.title():-^30}")
print(f"Temperature: {temperature}")
print(f"Humidity: {humidity}")
print(f"Wind speed: {wind_speed}")
print(f"Cloudiness: {cloudiness}")


In [None]:
# Testing data retrieval method 2 (using parameter dictionary)

base_url = "http://api.openweathermap.org/data/2.5/weather"
params = {
    "q": "albany",
    "appid": api_key
}
response = requests.get(base_url, params=params)

# print the response URL, avoid doing for public GitHub repos in order to avoid exposing key
# print(response.url)

data_test = response.json()

temperature = data_test["main"]["temp"] # Fetch temperature
humidity = data_test["main"]["humidity"] # Fetch humidity

# wind = data_test["wind"] 
# wind_speed = wind["speed"] # Fetch wind speed
wind_speed = data_test["wind"]["speed"] # Fetch wind speed


# clouds = data_test["clouds"]
# cloudiness = clouds["all"] # Fetch cloudiness
cloudiness = data_test["clouds"]["all"] # Fetch cloudiness


print(f"{city_name.title():-^30}")
print(f"Temperature: {temperature}")
print(f"Humidity: {humidity}")
print(f"Wind speed: {wind_speed}")
print(f"Cloudiness: {cloudiness}")


In [None]:
# Testing data retrieval method 3 (adding parameter for metric units of measurement)

base_url = "http://api.openweathermap.org/data/2.5/weather"
params = {
    "q": "albany",
    "appid": api_key,
    "units": "metric"
}
response = requests.get(base_url, params=params)

# print the response URL, avoid doing for public GitHub repos in order to avoid exposing key
# print(response.url)

data_test = response.json()

temperature = data_test["main"]["temp"] # Fetch temperature
humidity = data_test["main"]["humidity"] # Fetch humidity

# wind = data_test["wind"] 
# wind_speed = wind["speed"] # Fetch wind speed
wind_speed = data_test["wind"]["speed"] # Fetch wind speed


# clouds = data_test["clouds"]
# cloudiness = clouds["all"] # Fetch cloudiness
cloudiness = data_test["clouds"]["all"] # Fetch cloudiness


print(f"{city_name.title():-^30}")
print(f"Temperature: {temperature}")
print(f"Humidity: {humidity}")
print(f"Wind speed: {wind_speed}")
print(f"Cloudiness: {cloudiness}")


In [None]:
# Set up a DataFrame to hold city, lat, long, temperature, humidity, wind speed and cloudiness 
weather_df = pd.DataFrame(cities, columns=["city"])
weather_df.head()
# weather_df.info()


# Set up additional columns to hold information
weather_df['latitude'] = latitude
weather_df['longitude'] = longitude
weather_df['temperature'] = ""
weather_df['humidity'] = ""
weather_df['wind speed'] = ""
weather_df['cloudiness'] = ""

weather_df

In [None]:
# Data retrieval

base_url = "http://api.openweathermap.org/data/2.5/weather"
params = {
    "q": "albany",
    "appid": api_key,
    "units": "metric"
}

# Use iterrows to iterate through pandas dataframe
for index, row in weather_df.iterrows():
# for index, row in weather_df.head(n=5).iterrows(): # brief test

    # Get city from weather_df
    city = row['city']

    # Add query to params dict
    params['q'] = city

    # Assemble url and make API request
    print(f"Retrieving Results for Index {index}: {city}")

    # Extract results
    response = requests.get(base_url, params=params).json()
    
    try:
        weather_df.loc[index, "temperature"] = response["main"]["temp"]
        weather_df.loc[index, "humidity"] = response["main"]["humidity"] 
        weather_df.loc[index, "wind speed"] = response["wind"]["speed"]
        weather_df.loc[index, "cloudiness"] = response["clouds"]["all"]
        
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")
        
    print("------------")


In [None]:
# Check data in DataFrame
# weather_df

In [None]:
# Save data to CSV
# weather_df.to_csv("weather.csv")

In [None]:
# Read in data again
weather_df = pd.read_csv(
    "weather.csv", dtype="object", encoding="utf-8")
weather_df.head()

In [None]:
# Convert temperature, humidity, latitude to numeric values
weather_df['temperature'] = pd.to_numeric(weather_df['temperature'], errors='coerce')
weather_df['humidity'] = pd.to_numeric(weather_df['humidity'], errors='coerce')
weather_df['latitude'] = pd.to_numeric(weather_df['latitude'], errors='coerce')


In [None]:
# Scatter plot for Temperature (F) vs. Latitude
x_values = weather_df['temperature'].astype('float')
y_values = weather_df['latitude'].astype('float')
plt.scatter(x_values,y_values)
plt.xlabel('temperature')
plt.ylabel('latitude')
plt.title('Temperature (F) vs. Latitude')
plt.savefig('temp v latitude.png')
plt.show()

The plot shows temperature against latitude for each city in the dataset. At the time of data retrieval, temperatures tended to be higher in the southern hemisphere and lower in the nothern hemisphere.

In [None]:
# Scatter plot for Humidity (%) vs. Latitude
x_values = weather_df['humidity'].astype('float')
y_values = weather_df['latitude'].astype('float')
plt.scatter(x_values,y_values)
plt.xlabel('humidity')
plt.ylabel('latitude')
plt.title('Humidity vs. Latitude')
plt.savefig('humidity v latitude.png')
plt.show()

The plot shows humidity against latitude for each city in the dataset. Where lower levels of humidity are recorded, they tend to be closer to the equator or in the southern hemisphere.

In [None]:
# Scatter plot for cloudiness (%) vs. Latitude
x_values = weather_df['cloudiness'].astype('float')
y_values = weather_df['latitude'].astype('float')
plt.scatter(x_values,y_values)
plt.xlabel('cloudiness')
plt.ylabel('latitude')
plt.title('Cloudiness (%) vs. Latitude')
plt.savefig('cloudiness v latitude.png')
plt.show()

The plot shows cloudiness against latitude for each of the cities in the dataset. The level of cloudiness for the cities in the dataset are relatively evenly spread across the hemispheres.

In [None]:
# Scatter plot for Wind Speed (mph) vs. Latitude
x_values = weather_df['wind speed'].astype('float')
y_values = weather_df['latitude'].astype('float')
plt.scatter(x_values,y_values)
plt.xlabel('wind speed')
plt.ylabel('latitude')
plt.title('Wind Speed (mph) vs. Latitude')
plt.savefig('wind speed v latitude.png')
plt.show()

The plot shows wind speed against latitude for each of the cities in the dataset. The windspeed for the cities in the dataset are relatively evenly spread across the hemispheres, although extreme wind speeds are experienced furthest from the equator (i.e. at the north and south poles).

In [None]:
# Remove empty values from weather_df, to enable plotting of linear regression line
weather_clean_df = weather_df.dropna()
# weather_clean_df

In [None]:
# Create separate weather_dfs for northern and southern hemispheres
weather_df_north = weather_clean_df.loc[(weather_clean_df['latitude'] > 0)]
weather_df_south = weather_clean_df.loc[(weather_clean_df['latitude'] < 0)]

In [None]:
# Test creation of first linear regression plot

# # Scatter plot for Northern Hemisphere - Temperature (F) vs. Latitude
# x_values = weather_df_north['temperature'].astype('float')
# y_values = weather_df_north['latitude'].astype('float')

# # Run linear regression
# (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
# regress_values = x_values * slope + intercept
# line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# #Plot scatter
# plt.scatter(x_values,y_values)

# # Plot regression line
# plt.plot(x_values,regress_values,"r-")
# plt.annotate(line_eq,(6,10),fontsize=15,color="red")

# # Label plot
# plt.xlabel('temperature')
# plt.ylabel('latitude')
# plt.title('Northern Hemisphere - Temperature (F) vs. Latitude')

# # Save plot
# plt.savefig('north temp v latitude.png')

# # Show plot
# # plt.show()

In [None]:
# Create a function to create multiple linear regression plots
def linregress_function(x_values,y_values):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=15,color="red")
    # plt.xlabel('temperature')
    plt.ylabel('Latitude')
    plt.show()

In [None]:
# Linear regression plot for Northern Hemisphere - Temperature (F) vs. Latitude
x_values = weather_df_north['temperature'].astype('float')
y_values = weather_df_north['latitude'].astype('float')
plt.xlabel('Temperature (F)')
linregress_function(x_values,y_values)
plt.savefig('north temp v latitude.png')

In [None]:
# Linear regression plot for Southern Hemisphere - Temperature (F) vs. Latitude
x_values = weather_df_south['temperature'].astype('float')
y_values = weather_df_south['latitude'].astype('float')
plt.xlabel('Temperature (F)')
linregress_function(x_values,y_values)
plt.savefig('south temp v latitude.png')

The linear regression plots show a strong correlation between temperature and latitude. Temperatures are higher closer to the equator (i.e. latitudes closer to zero), lower further from the equator.

In [None]:
# Linear regression plot for Northern Hemisphere - Humidity (%) vs. Latitude
x_values = weather_df_north['humidity'].astype('float')
y_values = weather_df_north['latitude'].astype('float')
plt.xlabel('Humidity (%)')
linregress_function(x_values,y_values)
plt.savefig('north humidity v latitude.png')

In [None]:
# Linear regression plot for Southern Hemisphere - Humidity (%) vs. Latitude
x_values = weather_df_south['humidity'].astype('float')
y_values = weather_df_south['latitude'].astype('float')
plt.xlabel('Humidity (%)')
linregress_function(x_values,y_values)
plt.savefig('south humidity v latitude.png')

There is a very weak correlation between humidity and latitude. The range of humidity levels increases closer to the equator.

In [None]:
# Linear regression plot for Northern Hemisphere - Cloudiness (%) vs. Latitude
x_values = weather_df_north['cloudiness'].astype('float')
y_values = weather_df_north['latitude'].astype('float')
plt.xlabel('Cloudiness (%)')
linregress_function(x_values,y_values)
plt.savefig('north cloudiness v latitude.png')

In [None]:
# Linear regression plot for Southern Hemisphere - Cloudiness (%) vs. Latitude
x_values = weather_df_south['cloudiness'].astype('float')
y_values = weather_df_south['latitude'].astype('float')
plt.xlabel('Cloudiness (%)')
linregress_function(x_values,y_values)
plt.savefig('south cloudiness v latitude.png')

The flat linear regression lines indicate almost no relationship between cloudiness and latitude. 

In [None]:
# Linear regression plot for Northern Hemisphere - Wind Speed (mph) vs. Latitude
x_values = weather_df_north['wind speed'].astype('float')
y_values = weather_df_north['latitude'].astype('float')
plt.xlabel('Wind speed (mph)')
linregress_function(x_values,y_values)
plt.savefig('north wind speed v latitude.png')

In [None]:
# Linear regression plot for Southern Hemisphere - Wind Speed (mph) vs. Latitude
x_values = weather_df_south['wind speed'].astype('float')
y_values = weather_df_south['latitude'].astype('float')
plt.xlabel('Wind speed (mph)')
linregress_function(x_values,y_values)
plt.savefig('south wind speed v latitude.png')

The linear regression plot indicates a weak correlation between wind speed and latitudes closer to the north and south poles.