In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "weather_cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for latitudes,longitudes and cities
lat_lngs = []
cities = []

# Create sets of random latitude and longitude combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    if city not in cities:
        cities.append(city)

# Print city count
len(cities)

In [11]:
#Get city data
city_data = []
print("Beginning Data Retrieval     ")
print("-----------------------------")

for city in cities:
    query_url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

#counters
record_count = 1
set_count = 1
# Loop
for i, city in enumerate(cities):
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0
    # Create URLs
    city_url = query_url + "&q=" + city
    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))
    # Add 1 to the record count
    record_count += 1
    #Create API request
    try:
        city_weather = requests.get(city_url).json()
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]
        city_data.append({"City": city,
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})
    #If error, skip city
    except:
        print("City not found. Skipping...")
        pass
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")



Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | barrow
Processing Record 2 of Set 1 | souillac
Processing Record 3 of Set 1 | lebu
Processing Record 4 of Set 1 | ushuaia
Processing Record 5 of Set 1 | port blair
Processing Record 6 of Set 1 | hithadhoo
Processing Record 7 of Set 1 | batticaloa
Processing Record 8 of Set 1 | longyearbyen
Processing Record 9 of Set 1 | provideniya
Processing Record 10 of Set 1 | cabo san lucas
Processing Record 11 of Set 1 | bayir
Processing Record 12 of Set 1 | busselton
Processing Record 13 of Set 1 | candelaria
Processing Record 14 of Set 1 | dikson
Processing Record 15 of Set 1 | hobart
Processing Record 16 of Set 1 | cockburn harbour
City not found. Skipping...
Processing Record 17 of Set 1 | attawapiskat
City not found. Skipping...
Processing Record 18 of Set 1 | rikitea
Processing Record 19 of Set 1 | sandpoint
Processing Record 20 of Set 1 | albany
Processing Record 21 of Set 1 | avarua
Processing Record

Processing Record 39 of Set 4 | ozgon
City not found. Skipping...
Processing Record 40 of Set 4 | yuli
Processing Record 41 of Set 4 | leningradskiy
Processing Record 42 of Set 4 | kavieng
Processing Record 43 of Set 4 | wamba
Processing Record 44 of Set 4 | pevek
Processing Record 45 of Set 4 | papetoai
Processing Record 46 of Set 4 | ust-kuyga
Processing Record 47 of Set 4 | vieste
Processing Record 48 of Set 4 | asfi
Processing Record 49 of Set 4 | yar-sale
Processing Record 0 of Set 5 | tasbuget
City not found. Skipping...
Processing Record 1 of Set 5 | qaanaaq
Processing Record 2 of Set 5 | tasiilaq
Processing Record 3 of Set 5 | cervo
Processing Record 4 of Set 5 | mason city
Processing Record 5 of Set 5 | tabou
Processing Record 6 of Set 5 | opuwo
Processing Record 7 of Set 5 | samsun
Processing Record 8 of Set 5 | lovington
Processing Record 9 of Set 5 | kibaya
Processing Record 10 of Set 5 | quimper
Processing Record 11 of Set 5 | marawi
Processing Record 12 of Set 5 | samfya


Processing Record 31 of Set 8 | meulaboh
Processing Record 32 of Set 8 | nizhneyansk
City not found. Skipping...
Processing Record 33 of Set 8 | vila velha
Processing Record 34 of Set 8 | kailua
Processing Record 35 of Set 8 | sao felix do xingu
Processing Record 36 of Set 8 | oshkosh
Processing Record 37 of Set 8 | malwan
City not found. Skipping...
Processing Record 38 of Set 8 | artyom
Processing Record 39 of Set 8 | marcona
City not found. Skipping...
Processing Record 40 of Set 8 | aktau
Processing Record 41 of Set 8 | bam
Processing Record 42 of Set 8 | smoky lake
Processing Record 43 of Set 8 | tuy hoa
Processing Record 44 of Set 8 | lorengau
Processing Record 45 of Set 8 | la gomera
Processing Record 46 of Set 8 | krasnoselkup
Processing Record 47 of Set 8 | behat
Processing Record 48 of Set 8 | kurumkan
Processing Record 49 of Set 8 | estacion coahuila
Processing Record 0 of Set 9 | progreso
Processing Record 1 of Set 9 | kuala terengganu
Processing Record 2 of Set 9 | homer
P

Processing Record 19 of Set 12 | pingliang
Processing Record 20 of Set 12 | baykit
Processing Record 21 of Set 12 | urubicha
Processing Record 22 of Set 12 | susangerd
Processing Record 23 of Set 12 | solec kujawski
Processing Record 24 of Set 12 | lamu
Processing Record 25 of Set 12 | rawannawi
City not found. Skipping...
Processing Record 26 of Set 12 | rawson
Processing Record 27 of Set 12 | port augusta
Processing Record 28 of Set 12 | areka
Processing Record 29 of Set 12 | barranca
Processing Record 30 of Set 12 | aksarka
Processing Record 31 of Set 12 | tokur
Processing Record 32 of Set 12 | taksimo
Processing Record 33 of Set 12 | vestbygda
City not found. Skipping...
Processing Record 34 of Set 12 | preobrazheniye
Processing Record 35 of Set 12 | nautla
Processing Record 36 of Set 12 | ardakan
Processing Record 37 of Set 12 | udachnyy
Processing Record 38 of Set 12 | moroni
Processing Record 39 of Set 12 | baglan
Processing Record 40 of Set 12 | bonthe
Processing Record 41 of S

In [None]:
weather_data = pd.DataFrame(city_data)

weather_data

In [None]:
weather_data.to_csv(output_data_file)

In [None]:
dif_weather_data = weather_data[(weather_data["Humidity"] > 100)].index
dif_weather_data

In [None]:
new_weather_data = weather_data[weather_data["Humidity"].isin(dif_weather_data)==False] 
new_weather_data

In [None]:
new_weather_data.csv(output_data_file)

In [None]:
#Latitude vs Max Temp
plt.scatter(weather_data["Lat"],weather_data["Max Temp"],edgecolors='b')
plt.title(f"City Latitude vs. Max Temperature")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature °F")
plt.grid()
plt.xlim((-90,90))
plt.show

In [None]:
#Latitude vs Humidity
plt.scatter(weather_data["Lat"],weather_data["Humidity"],edgecolors='b')
plt.title(f"City Latitude vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.grid()
plt.xlim((-90,90))
plt.show

In [None]:
#Latitude vs Cloudiness
plt.scatter(weather_data["Lat"],weather_data["Cloudiness"],edgecolors='b')
plt.title(f"City Latitude vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.grid()
plt.xlim((-90,90))
plt.show

In [None]:
#Latitude vs Wind Speed
plt.scatter(weather_data["Lat"],weather_data["Wind Speed"],edgecolors='b')
plt.title(f"City Latitude vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.grid()
plt.xlim((-90,90))
plt.show

In [None]:
northern_cities = weather_data[weather_data.Lat > 0]
northern_cities.head()

In [None]:
southern_cities = weather_data[weather_data.Lat < 0]
southern_cities.head()

In [None]:
#Northern cities latitude and max temp regression
x_values = northern_cities["Lat"]
y_values = northern_cities["Max Temp"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Latitude")
plt.ylabel("Max Temperature °F")
plt.xlim((0,90))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(2,min(y_values)),fontsize=13,color="red")
plt.title(f"Northern Hemisphere Max Temperature vs. Latitude")

print(f"The r-squared is {r_value * r_value}")
plt.show

In [None]:
#Southern cities latitude and max temp regression
x_values = southern_cities["Lat"]
y_values = southern_cities["Max Temp"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Latitude")
plt.ylabel("Max Temperature °F")
plt.xlim((-90,0))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(-90,min(y_values)),fontsize=13,color="red")
plt.title(f"Southern Hemisphere Max Temperature vs. Latitude")
print(f"The r-squared is {r_value * r_value}")
plt.show

In [None]:
#Northern cities latitude and humidity regression
x_values = northern_cities["Lat"]
y_values = northern_cities["Humidity"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.xlim((0,90))
plt.ylim((0,100))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(3,min(y_values)),fontsize=13,color="red")
plt.title(f"Northern Hemisphere Humidity vs. Latitude")
print(f"The r-squared is {r_value * r_value}")
plt.show

In [None]:
#Southern cities latitude and humidity regression
x_values = southern_cities["Lat"]
y_values = southern_cities["Humidity"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.xlim((-90,0))
plt.ylim((100,0))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(3,min(y_values)),fontsize=13,color="red")
plt.title(f"Southern Hemisphere Humidity vs. Latitude")
print(f"The r-squared is {r_value * r_value}")
plt.show

In [None]:
#Northern cities latitude and cloudiness regression
x_values = northern_cities["Cloudiness"]
y_values = northern_cities["Lat"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Cloudiness")
plt.ylabel("Latitude")
plt.xlim((0,90))
plt.ylim((-5,100))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(90,min(y_values)),fontsize=13,color="red")
plt.title(f"Northern Hemisphere Cloudiness vs. Latitude")
print(f"The r-squared is {r_value * r_value}")
plt.show

In [None]:
#Southern cities latitude and cloudiness regression

x_values = southern_cities["Cloudiness"]
y_values = southern_cities["Lat"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.xlim((-90,120))
plt.ylim((-80,100))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(-90,min(y_values)),fontsize=13,color="red")
plt.title(f"Southern Hemisphere Cloudiness vs. Latitude")
print(f"The r-squared is {r_value * r_value}")
plt.show

In [None]:
#Northern cities latitude and wind speed regression
x_values = northern_cities["Lat"]
y_values = northern_cities["Wind Speed"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Wind Speed")
plt.ylabel("Latitude")
plt.xlim((0,90))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(5,min(y_values)),fontsize=13,color="red")
plt.title(f"Northern Hemisphere Wind Speed vs. Latitude")
print(f"The r-squared is {r_value * r_value}")
plt.show

In [None]:
#Southern cities latitude and wind speed regression
x_values = southern_cities["Lat"]
y_values = southern_cities["Wind Speed"]

plt.scatter(x_values,y_values,edgecolors='b')
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.xlim((0,90))
slope, intercept, r_value, p_value, std_err = linregress(x_values,y_values)
plt.plot(x_values, intercept + slope*x_values, 'r')
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_eq,(50,min(y_values)),fontsize=13,color="red")
plt.title(f"Southern Hemisphere Wind Speed vs. Latitude")
print(f"The r-squared is {r_value * r_value}")
plt.show