In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import scipy.stats as st

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name

    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
# Initialize lists for data storage
temps = []
humidity = []
cloudiness = []
wind = []
cities_found = []
lats_found = []
lngs_found = []
city_count = 1

# Loop through each city in original cities list
for city in cities:

    # Attempt to pull city data from the weather API
    try:
        city_url = f'http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}'
        response = requests.get(city_url).json()

        # If found, add data to respective lists
        temps.append(response['main']['temp'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind.append(response['wind']['speed'])
        lats_found.append(response['coord']['lat'])
        lngs_found.append(response['coord']['lon'])
        cities_found.append(city)
        
        # Print the city and record number
        print(f'Record {city_count} for | {city}')
        city_count += 1

    # If city is not found in API, print notice of that
    except:
        print(f'{city} not found. Continuing...')

# print(f'{len(temps)}, {len(humidity)}, {len(cloudiness)}, {len(wind)}, {len(cities_found)}')

In [None]:
# print(f'{len(temps)}, {len[lats_found]}, {len(humidity)}, {len(cloudiness)}, {len(wind)}, {len(cities_found)}')

# Create Dataframe
weather_df = pd.DataFrame({'Cities':cities_found,
                           'Latitude':lats_found,
                           'Temperature (F)':temps,
                           'Humidity (%)':humidity,
                           'Cloudiness (%)':cloudiness,
                           'Wind Speed (mph)':wind})
weather_df.head()

In [None]:
# Create Scatter Plot for Temp (F) vs Latitude
cols = ['Temperature (F)','Humidity (%)','Cloudiness (%)','Wind Speed (mph)']

for col in cols:
    weather_df.plot.scatter(x='Latitude',y=col,c='Blue',
                       xlabel='Latitude',ylabel=col,
                       title=f'Latitude vs. {col}', rot=45)


In [None]:
north_df = weather_df.loc[weather_df['Latitude'] > 0,:]
south_df = weather_df.loc[weather_df['Latitude'] < 0,:]

for col in cols:
    
    # Northern Hemisphere Analysis
    x_data_n = north_df['Latitude']
    y_data_n = north_df[col]
    correlation = st.pearsonr(x_data_n, y_data_n)
    (slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_data_n, y_data_n)
    regress_values = x_data_n * slope + intercept
    line_eq = f'y = {round(slope,2)}x + {round(intercept,2)}'
    
    # Plotting and formatting
    north_df.plot.scatter(x='Latitude',y=col,c='Blue',
                       xlabel='Latitude',ylabel=col,
                       title=f'Latitude vs. {col} Northern Hemisphere', rot=45)
    plt.plot(x_data_n,regress_values,'r')
    plt.annotate(line_eq, (65,20), fontsize=10, color='red')
    plt.annotate(f"rsquared = {round(rvalue**2,3)}", (65,10), fontsize=10, color='red')
    
    # Southern Hemisphere Analysis
    x_data_s = south_df['Latitude']
    y_data_s = south_df[col]
    correlation = st.pearsonr(x_data_s, y_data_s)
    (slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_data_s, y_data_s)
    regress_values = x_data_s * slope + intercept
    line_eq = f'y = {round(slope,2)}x + {round(intercept,2)}'

    # Plotting and formatting
    south_df.plot.scatter(x='Latitude',y=col,c='Orange',
                       xlabel='Latitude',ylabel=col,
                       title=f'Latitude vs. {col} Southern Hemisphere', rot=45)
    plt.plot(x_data_s,regress_values,'r')
    plt.annotate(line_eq, (65,20), fontsize=10, color='red')
    plt.annotate(f"rsquared = {round(rvalue**2,3)}", (65,10), fontsize=10, color='red')


In [None]:
# Write all data to csv
weather_df.to_csv(output_data_file)