In [None]:
 # Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
 # List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:

num_cities = len(cities)
sets = num_cities/58
if (sets-round(sets)) < 0:
    total_sets = int(sets + (-(sets-round(sets))))
else: 
    total_sets = int(sets + (1-(sets-round(sets))))
print(f"There will be {total_sets} calls based on {num_cities} cities.")

In [None]:
# Save config information
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"


# Build query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [None]:
y=0
fail_count = 0
success_count = 0
citycount = 0
stopcount = len(cities)-1
response_json = []
request_check = []
city = []
lat = []
temp = []
temp_max = []
temp_min = []
lng = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

# Make a request for each of the cities
print(f"Beginning Data Retrieval\n-----------------------------")
total_start = time.time()
for x in range(total_sets):
    start = time.time()
    for y in range (citycount,(((x+1)*58))):
        print(f"Processing Record {(y-((x*58)))+1} of set {x+1} | {cities[y]}")
        # Get one of the posts
        post_response = requests.get(query_url + str(cities[y]))
        request_check = post_response.json()
        if request_check['cod'] == '404':
            print('Invalid City. Skipping...')
            fail_count = fail_count+1
        else:
            #Save post's JSON
            city.append(request_check['name'])
            lat.append(request_check['coord']['lat'])
            lng.append(request_check['coord']['lon'])
            temp.append(request_check['main']['temp'])
            temp_max.append(request_check['main']['temp_max'])
            temp_min.append(request_check['main']['temp_min'])
            humidity.append(request_check['main']['humidity'])
            cloudiness.append(request_check['clouds']['all'])
            wind_speed.append(request_check['wind']['speed'])
            country.append(request_check['sys']['country'])
            date.append(request_check['dt'])
            success_count = success_count+1
        end = time.time()
        citycount = citycount+1
        if (citycount) > stopcount:
            break
    
    timedifference = end - start
    td60 = 61 - timedifference
    x=x+1
    if x < total_sets:
        time.sleep(td60)
        
total_end = time.time()
total_elapsed = (total_end - total_start)/60
print(f"----------------------------- \nData Retrieval Complete \n-----------------------------\nData for {success_count} cities was loaded successfully, and there were {fail_count} cities not found. The process took approximately {round(total_elapsed,2)} minutes")

In [None]:
#Create dictionary for dataframe
weather_data_dict = {
'City':city,
'Lat':lat,
'Lng':lng,
'Temp':temp,
'Max Temp':temp_max,
'Min Temp':temp_min,
'Humidity':humidity,
'Cloudiness':cloudiness,
'Wind Speed':wind_speed,
'Country':country,
'Date':date}

In [None]:
weather_data = pd.DataFrame(weather_data_dict)

In [None]:
weather_data

In [None]:
temp_conversion = 0

In [None]:
if temp_conversion == 0:
    weather_data['Temp'] = round((weather_data['Temp'] * (9/5)) + 32,0)
    weather_data['Max Temp'] = round((weather_data['Max Temp'] * (9/5)) + 32,0)
    weather_data['Min Temp'] = round((weather_data['Min Temp'] * (9/5)) + 32,0)
    temp_conversion = temp_conversion+1

In [None]:
humidity_outliers = weather_data[weather_data['Humidity'] > 100].copy()
humidity_outliers

In [None]:
if len(humidity_outliers) > 0:
    weather_data_ex_hum_ol = weather_data[~weather_data['City'].isin(humidity_outliers['City'])].copy()
    weather_data = weather_data_ex_hum_ol.copy()

In [None]:
weather_data

In [None]:
weather_data.to_csv(output_data_file, index=False)

In [None]:
lat_scatter = weather_data.iloc[:,1]
temp_scatter = weather_data.iloc[:,4]
hum_scatter = weather_data.iloc[:,6]
cloud_scatter = weather_data.iloc[:,7]
wind_speed_scatter = weather_data.iloc[:,8]

In [None]:
plt.scatter(lat_scatter,temp_scatter)
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.title('City Latitude vs Max Temperature')
plt.savefig('output_data/City Latitude vs Max Temperature.png')
plt.show()

The above plot compares temperature to Latitude. 
They plot shows that as you reach the equator Max Temparature increases, and decreases as it moves away in either direction. 
Temperatures are lower in the Northern hemisphere than the Southern. 

In [None]:
plt.scatter(lat_scatter,hum_scatter)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('City Latitude vs Humidity')
plt.savefig('output_data/City Latitude vs Humidity.png')
plt.show()

The above plot compares Humidity to Latitude. 
Humidity appears to be relatively high (>60) in all parts of the globe, and has larger clusters of high humidity in the Northern Hemisphere. 
The equator appears to have fewer instances of low humidity.  

In [None]:
plt.scatter(lat_scatter,cloud_scatter)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('City Latitude vs Cloudiness')
plt.savefig('output_data/City Latitude vs Cloudiness.png')
plt.show()

The above plot compares cloudiness to Latitude. 
Cloudiness does not appear to correlate much with latitude. 
The Equator seems to have fewer near 0 values for cloudiness. 

In [None]:
plt.scatter(lat_scatter,wind_speed_scatter)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title('City Latitude vs Wind Speed')
plt.savefig('output_data/City Latitude vs Wind Speed.png')
plt.show()

The above plot compares temperature to Latitude. 
Wind Speed does not appear to correlate much with latitude. 
Wind speed appears relatively low in all parts of the globe. 

In [None]:
#Create dataframe for weather in the Northern Hemisphere. 
weather_data_north = weather_data[weather_data['Lat'] >= 0].copy()

In [None]:
weather_data_north

In [None]:
#Create dataframe for weather in the Southern Hemisphere.
weather_data_south = weather_data[weather_data['Lat'] < 0].copy()

In [None]:
weather_data_south

In [None]:

north_lat_scatter = weather_data_north.iloc[:,1]
north_temp_scatter = weather_data_north.iloc[:,4]
north_hum_scatter = weather_data_north.iloc[:,6]
north_cloud_scatter = weather_data_north.iloc[:,7]
north_wind_speed_scatter = weather_data_north.iloc[:,8]
south_lat_scatter = weather_data_south.iloc[:,1]
south_temp_scatter = weather_data_south.iloc[:,4]
south_hum_scatter = weather_data_south.iloc[:,6]
south_cloud_scatter = weather_data_south.iloc[:,7]
south_wind_speed_scatter = weather_data_south.iloc[:,8]

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat_scatter,north_temp_scatter)
regress_values = north_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat_scatter,north_temp_scatter)
plt.plot(north_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
plt.savefig('output_data/Northern Hemisphere - Max Temp vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

The above plot compares temperature to Latitude in the northern hemisphere. 
These data points are highly correlated. 
As Latitude increases(away from the equator), temperature decreases. 

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat_scatter,south_temp_scatter)
regress_values = south_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat_scatter,south_temp_scatter)
plt.plot(south_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
plt.savefig('output_data/Southern Hemisphere - Max Temp vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

The above plot compares temperature to Latitude in the southern hemisphere. 
These data points appear to be correlated, but the data set is smaller than the for the Northern Hemisphere.  
As Latitude increases(toward the equator), temperature increases. 

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat_scatter,north_hum_scatter)
regress_values = north_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat_scatter,north_hum_scatter)
plt.plot(north_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
plt.savefig('output_data/Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

The above plot compares Humidity to Latitude in the northern hemisphere. 
Humidity in the northern hemisphere appears to trend high. 
As Latitude increases, humidity increases slowly. 
There are many outliers on the low end. 

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat_scatter,south_hum_scatter)
regress_values = south_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat_scatter,south_hum_scatter)
plt.plot(south_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
plt.savefig('output_data/Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

#The above plot compares Humidity to Latitude in the southern hemisphere. 
#Humidity in the northern hemisphere appears to be relatively spread out. 
#Humidity appears to trend higher as we approach the equator. 

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat_scatter,north_cloud_scatter)
regress_values = north_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat_scatter,north_cloud_scatter)
plt.plot(north_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
plt.savefig('output_data/Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

#The above plot compares cloudiness to Latitude in the northern hemisphere. 
#Cloudiness does not appear to have a strong linear relationship to latitude. 

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat_scatter,south_cloud_scatter)
regress_values = south_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat_scatter,south_cloud_scatter)
plt.plot(south_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
plt.savefig('output_data/Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

#The above plot compares cloudiness to Latitude in the southern hemisphere. 
#Cloudiness appear to have a weak relationship to latitude, but trends upward toward the equator. 

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat_scatter,north_wind_speed_scatter)
regress_values = north_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat_scatter,north_wind_speed_scatter)
plt.plot(north_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title('Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
plt.savefig('output_data/Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

The above plot compares Wind Speed to Latitude in the Northern hemisphere. 
Wind Speed appears to have a weak relationship to latitude, but trends upward away the equator. 
Wind speed appears to trend lower. 

In [None]:
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat_scatter,south_wind_speed_scatter)
regress_values = south_lat_scatter * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat_scatter,south_wind_speed_scatter)
plt.plot(south_lat_scatter,regress_values,"r-")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title('Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
plt.savefig('output_data/Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression.png')
plt.show()
print(f'The r-value is {rvalue}')

The above plot compares Wind Speed to Latitude in the Southern hemisphere. 
Wind Speed appears to have a weak relationship to latitude, but trends downward toward the equator. 
Unlike the northern hemisphere, the wind speed in the southern hemisphere appears to be much more spread out, not trending high or low. 
It is worth nothing again that the data set for the southern hemisphere is smaller than that for the north. 