In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
from scipy.stats import linregress
from datetime import datetime
from scipy import stats


# Import API key
from config import api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "Output/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

cur_date = datetime.today().strftime ('%m/%d/%Y')

url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={api_key}&units={units}&q="

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

606

In [3]:


for value in cities:
    print(f"Processing Record {cities.index(value)} of Set 1 | {value}.")
    

Processing Record 0 of Set 1 | arlit.
Processing Record 1 of Set 1 | kilindoni.
Processing Record 2 of Set 1 | faanui.
Processing Record 3 of Set 1 | chagda.
Processing Record 4 of Set 1 | busselton.
Processing Record 5 of Set 1 | punta arenas.
Processing Record 6 of Set 1 | albany.
Processing Record 7 of Set 1 | rangia.
Processing Record 8 of Set 1 | olocuilta.
Processing Record 9 of Set 1 | baruun-urt.
Processing Record 10 of Set 1 | tuktoyaktuk.
Processing Record 11 of Set 1 | butaritari.
Processing Record 12 of Set 1 | atambua.
Processing Record 13 of Set 1 | santiago.
Processing Record 14 of Set 1 | sentyabrskiy.
Processing Record 15 of Set 1 | along.
Processing Record 16 of Set 1 | vaini.
Processing Record 17 of Set 1 | oliveira dos brejinhos.
Processing Record 18 of Set 1 | rikitea.
Processing Record 19 of Set 1 | avarua.
Processing Record 20 of Set 1 | jamestown.
Processing Record 21 of Set 1 | staunton.
Processing Record 22 of Set 1 | east london.
Processing Record 23 of Set 1

In [None]:
# set up lists to hold reponse info

lat = []
lng = []
temp = []
humidity = []
cloud = []
wind_speed = []
country = []
date = []

# Loop through the list of cities and perform a request for data on each
for area in cities:
    
    try:
        
        response = requests.get(query_url + area).json()

        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloud.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response["sys"]["country"])
        date.append(cur_date)
    
        print(f"{area} {lat} {lng} {temp} {humidity} {wind_speed} {country}")
    
    except KeyError:
    
        print(f"{area}, doesn't have information.")
        lat.append(None)
        lng.append(None)
        temp.append(None)
        humidity.append(None)
        cloud.append(None)
        wind_speed.append(None)
        country.append(None)
        date.append(None)
    
    

arlit, doesn't have information.
kilindoni, doesn't have information.
faanui, doesn't have information.
chagda, doesn't have information.
busselton, doesn't have information.
punta arenas, doesn't have information.
albany, doesn't have information.
rangia, doesn't have information.
olocuilta, doesn't have information.
baruun-urt, doesn't have information.
tuktoyaktuk, doesn't have information.
butaritari, doesn't have information.
atambua, doesn't have information.
santiago, doesn't have information.
sentyabrskiy, doesn't have information.
along, doesn't have information.
vaini, doesn't have information.
oliveira dos brejinhos, doesn't have information.
rikitea, doesn't have information.
avarua, doesn't have information.
jamestown, doesn't have information.
staunton, doesn't have information.
east london, doesn't have information.
hilo, doesn't have information.
narsaq, doesn't have information.
ushuaia, doesn't have information.
illoqqortoormiut, doesn't have information.
bluff, doesn

port hedland, doesn't have information.
kahului, doesn't have information.
eureka, doesn't have information.
namibe, doesn't have information.
czluchow, doesn't have information.
ust-nera, doesn't have information.
keti bandar, doesn't have information.
harper, doesn't have information.
leh, doesn't have information.
tuatapere, doesn't have information.
mar del plata, doesn't have information.
bitung, doesn't have information.
nuevo progreso, doesn't have information.
khani, doesn't have information.
alofi, doesn't have information.
barrow, doesn't have information.
provideniya, doesn't have information.
bethal, doesn't have information.
igrim, doesn't have information.
changqing, doesn't have information.
marcona, doesn't have information.
kloulklubed, doesn't have information.
bengkulu, doesn't have information.
pecos, doesn't have information.
chokurdakh, doesn't have information.
grindavik, doesn't have information.
merano, doesn't have information.
huilong, doesn't have informatio

In [None]:

city_weather_dict = {
    "City": cities,
    "Lat": lat,
    "Lng": lng,
    "Max Temp": temp,
    "Humidity": humidity,
    "Cloudiness": cloud,
    "Wind Speed": wind_speed,
    "County": country,
    "Date": date
}

# create a data frame from cities, lat, and temp

weather_data = pd.DataFrame(city_weather_dict)
weather_data.head()

In [None]:
weather_data.to_csv(output_data_file)

In [None]:
plt.scatter(weather_data['Lat'], weather_data["Max Temp"], color = "blue")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("City Latitute vs. Max Temperature " + cur_date)
plt.grid()
plt.savefig("Lat vs. Max Temp.png")
plt.show()

#The plot below displays the max temperature of a city based on its vertical location on Earth.
#The higher and lower from 0 the latitude is, the lower the max temperature should be.

In [None]:
plt.scatter(weather_data['Lat'], weather_data["Humidity"], color = "blue")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.title("City Latitute vs. Humidity " + cur_date)
plt.grid()
plt.savefig("Lat vs. Humidity.png")
plt.show()

#The plot below displays the humidity of a city based on its vertical location on Earth.
#The higher and lower from 0 the latitude is, the lower the humidity should be. 

In [None]:
plt.scatter(weather_data['Lat'], weather_data["Cloudiness"], color = "blue")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.title("City Latitute vs. Cloudiness " + cur_date)
plt.grid()
plt.savefig("Lat vs. Cloudiness.png")
plt.show()

#The plot below displays the cloudiness of a city based on its vertical location on Earth.

In [None]:
plt.scatter(weather_data['Lat'], weather_data["Wind Speed"], color = "blue")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.title("City Latitute vs. Wind Speed " + cur_date)
plt.grid()
plt.savefig("Lat vs. Wind Speed.png")
plt.show()

#The plot below displays the wind speed of a city based on its vertical location on Earth.

In [None]:
x_val = weather_data['Lat'].dropna()
y_val = weather_data['Max Temp'].dropna()
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(weather_data['Lat'], weather_data['Max Temp'])

regress_value = weather_data['Max Temp'] * slope + intercept

line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(line_eq)

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] >= 0]
y_val = weather_data['Max Temp'].loc[weather_data['Lat'] >= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] >= 0], weather_data['Max Temp'].loc[weather_data['Lat'] >= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(46,60), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")
plt.title("City Latitute vs. Max Temp")
plt.savefig("Northern Hemisphere Temp Regression")
plt.show()

#The below plot is showing a trend of decreasing temperatures, the further away from the equator the city is.

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] <= 0]
y_val = weather_data['Max Temp'].loc[weather_data['Lat'] <= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] <= 0], weather_data['Max Temp'].loc[weather_data['Lat'] <= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(-50,80), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")
plt.title("City Latitute vs. Max Temp " + cur_date)
plt.savefig("Southern Hemisphere Temp Regression")
plt.show()

#The below plot is showing a trend of increasing temperatures, the closer to the equator the city is.

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] >= 0]
y_val = weather_data['Humidity'].loc[weather_data['Lat'] >= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] >= 0], weather_data['Humidity'].loc[weather_data['Lat'] >= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(10,20), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Humidity %")
plt.title("City Latitute vs. Humidity " + cur_date)
plt.savefig("Northern Hemisphere Humidity Regression")
plt.show()

#The below plot is showing a slight trend of increasing humidity, the further away from the equator the city is.

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] <= 0]
y_val = weather_data['Humidity'].loc[weather_data['Lat'] <= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] <= 0], weather_data['Humidity'].loc[weather_data['Lat'] <= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(-30,45), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Humidity %")
plt.title("City Latitute vs. Humidity" + cur_date)
plt.savefig("Southern Hemisphere Humidity Regression")
plt.show()

#The below plot is showing a slight trend of increasing humidity, the further away from the equator the city is.

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] >= 0]
y_val = weather_data['Cloudiness'].loc[weather_data['Lat'] >= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] >= 0], weather_data['Cloudiness'].loc[weather_data['Lat'] >= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(10,20), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness %")
plt.title("City Latitute vs. Cloudiness " cur_date)
plt.savefig("Northern Hemisphere Cloudiness Regression")
plt.show()

#The below plot is showing a trend of increasing cloudiness, the further away from the equator the city is.

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] <= 0]
y_val = weather_data['Cloudiness'].loc[weather_data['Lat'] <= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] <= 0], weather_data['Cloudiness'].loc[weather_data['Lat'] <= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(-30,60), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness %")
plt.title("City Latitute vs. Cloudiness " + cur_date)
plt.savefig("Southern Hemisphere Cloudiness Regression")
plt.show()

#The below plot is showing a trend of increasing cloudiness, the closer to the equator the city is.

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] >= 0]
y_val = weather_data['Wind Speed'].loc[weather_data['Lat'] >= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] >= 0], weather_data['Wind Speed'].loc[weather_data['Lat'] >= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(10,20), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("City Latitute vs. Wind Speed " + cur_date)
plt.savefig("Northern Hemisphere Wind Speed Regression")
plt.show()

#The below plot is showing a slight trend of increasing increasing wind speed, the further from the equator the city is.

In [None]:
x_val = weather_data['Lat'].loc[weather_data['Lat'] <= 0]
y_val = weather_data['Wind Speed'].loc[weather_data['Lat'] <= 0]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_val, y_val)
regress_values = x_val * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"The r-value is: {rvalue**2}")


plt.scatter(weather_data['Lat'].loc[weather_data['Lat'] <= 0], weather_data['Wind Speed'].loc[weather_data['Lat'] <= 0], color = "blue")
plt.plot(x_val, regress_values, "r-")
plt.annotate(line_eq,(-30,15), fontsize="15", color = "red")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("City Latitute vs. Wind Speed " + cur_date)
plt.savefig("Southern Hemisphere Wind Speed Regression")
plt.show()

#The below plot is showing a slight trend of decreasing wind speed, the closer to the equator the city is.