# WeatherPy

In [31]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [78]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=30)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=30)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

25

In [79]:
# Create new dataframe with cities
weather_df = pd.DataFrame(cities, columns = ['City'])

In [71]:
# Import google API key
from api_keys import g_key

# Set variables for weather API calls
weather_url = "http://api.openweathermap.org/data/2.5/weather"
params = {'appid':weather_api_key,
         'units':'imperial'}

#Set variables for counters. Each set to contain records 1-50.
record = 1
sets = 1

# Print output of API calls
print('Beginning Data Retrieval')
print('------------------------')

# Store weather data into dataframe for each city
for index, row in weather_df.iterrows():
    params['q'] = row['City']
    response = requests.get(weather_url, params).json()
       
    try:
        print(f"Processing Record {record} of Set {sets} | {row['City']}")
        weather_df.loc[index,'Lat'] = response['coord']['lat']
        weather_df.loc[index,'Lng'] = response['coord']['lon']
        weather_df.loc[index,'Max Temp'] = response['main']['temp_max']
        weather_df.loc[index,'Humidity'] = response['main']['humidity']
        weather_df.loc[index,'Cloudiness'] = response['clouds']['all']
        weather_df.loc[index,'Wind Speed'] = response['wind']['speed']
        weather_df.loc[index,'Country'] = response['sys']['country']        
        weather_df.loc[index,'Date'] = response['dt']
    except:
        print('City not found.  Skipping...')

    if record%5 == 0:
        sets += 1        
        record = 0

    # Increment city counter
    record += 1

print('------------------------')
print('Data Retrieval Complete')
print('------------------------')

Beginning Data Retrieval
------------------------
Processing Record 1 of Set 1 | tsihombe
City not found.  Skipping...
Processing Record 2 of Set 1 | bandarbeyla
Processing Record 3 of Set 1 | glace bay
Processing Record 4 of Set 1 | belushya guba
City not found.  Skipping...
Processing Record 5 of Set 1 | vilcun
Processing Record 1 of Set 2 | hobart
Processing Record 2 of Set 2 | rikitea
Processing Record 3 of Set 2 | labuhan
Processing Record 4 of Set 2 | marawi
Processing Record 5 of Set 2 | vanavara
Processing Record 1 of Set 3 | lincoln
Processing Record 2 of Set 3 | cherskiy
Processing Record 3 of Set 3 | staraya poltavka
Processing Record 4 of Set 3 | khatanga
Processing Record 5 of Set 3 | faanui
Processing Record 1 of Set 4 | port elizabeth
Processing Record 2 of Set 4 | creel
Processing Record 3 of Set 4 | ushuaia
Processing Record 4 of Set 4 | vila franca do campo
Processing Record 5 of Set 4 | kapaa
Processing Record 1 of Set 5 | tuktoyaktuk
Processing Record 2 of Set 5 | s

In [73]:
# Remove cities not found, reset index, and export into csv file
weather_df.dropna(inplace=True)
weather_df.reset_index(inplace = True, drop = True)
weather_df.to_csv(output_data_file, index = False, header=True)

## Inspect the data and remove the cities where the humidity > 100%.

In [74]:
weather_df.describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,28.0,28.0,28.0,28.0,28.0,28.0,28.0
mean,14.700425,-9.665504,65.432143,69.428571,48.857143,9.004643,1628090000.0
std,41.223185,108.983755,14.371109,16.175951,37.975347,6.275235,61.86421
min,-54.8,-159.319,40.03,38.0,0.0,1.01,1628090000.0
25%,-23.5569,-93.911725,51.4025,60.75,17.5,5.295,1628090000.0
50%,16.73985,-58.05595,69.37,69.0,40.0,7.26,1628090000.0
75%,47.266975,102.374975,78.2925,80.0,90.25,11.79,1628090000.0
max,72.7868,166.1,88.75,94.0,100.0,31.29,1628090000.0


## Latitude vs. Temperature Plot

In [None]:
clean_city_data = weather_df
latitude = clean_city_data['Lat']

In [None]:
# Plot latitude vs temp
temp = clean_city_data['Max Temp']
plt.scatter(latitude,temp)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title('City Latitude vs. Max Temperature (8/1/2021)')
plt.grid()
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Plot latitude vs. humidity
humidity = clean_city_data['Humidity']
plt.scatter(latitude,humidity)
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title('City Latitude vs. Humidity (8/1/2021)')
plt.grid()
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Plot latitude vs cloudiness
cloudiness = clean_city_data['Cloudiness']
plt.scatter(latitude,cloudiness)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('City Latitude vs. Cloudiness (8/1/2021)')
plt.grid()
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Plot latitude vs wind speed
wind_speed = clean_city_data['Wind Speed']
plt.scatter(latitude,wind_speed)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('City Latitude vs. Wind Speed (8/1/2021)')
plt.grid()
plt.show()

## Linear Regression

In [None]:
# Separate dataframe by Lat
northern_data = clean_city_data.loc[clean_city_data['Lat']>=0]
southern_data = clean_city_data.loc[clean_city_data['Lat']<0]

In [None]:
# Set variables for northern and southern latitudes
northern_latitude = northern_data['Lat']
southern_latitude = southern_data['Lat']

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Find max temp for northern latitudes
n_temp = northern_data['Max Temp']

# Plot scatter plot with linear regression line
plt.scatter(northern_latitude,n_temp)
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
slope, int, rvalue = linregress(northern_latitude,n_temp)[0:3]
line = slope * northern_latitude + int
plt.plot(northern_latitude,line)

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(15,55),fontsize=15,color="red")
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Find max temp for southern latitudes
s_temp = southern_data['Max Temp']

# Plot scatter plot with linear regression line
plt.scatter(southern_latitude,s_temp)
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
slope, int, rvalue = linregress(southern_latitude,s_temp)[0:3]
line = slope * southern_latitude + int
plt.plot(southern_latitude,line,"--")

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(-54,67),fontsize=15,color="red")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Find humidity for northern latitudes
n_humidity = northern_data['Humidity']

# Plot scatter plot with linear regression line
plt.scatter(northern_latitude,n_humidity)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
slope, int, rvalue = linregress(northern_latitude,n_humidity)[0:3]
line = slope * northern_latitude + int
plt.plot(northern_latitude,line)

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(15,25),fontsize=15,color="red")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Find humidity for southern latitudes
s_humidity = southern_data['Humidity']

# Plot scatter plot with linear regression line
plt.scatter(southern_latitude,s_humidity)
plt.xlabel('Latitude')
plt.ylabel('Humidity')
slope, int, rvalue = linregress(southern_latitude,s_humidity)[0:3]
line = slope * southern_latitude + int
plt.plot(southern_latitude,line,"--")

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(-54,67),fontsize=15,color="red")
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Find cloudiness for northern latitudes
n_cloudiness = northern_data['Cloudiness']

# Plot scatter plot with linear regression line
plt.scatter(northern_latitude,n_cloudiness)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
slope, int, rvalue = linregress(northern_latitude,n_cloudiness)[0:3]
line = slope * northern_latitude + int
plt.plot(northern_latitude,line)

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(10,10),fontsize=15,color="red")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Find cloudiness for southern latitudes
s_cloudiness = southern_data['Cloudiness']

# Plot scatter plot with linear regression line
plt.scatter(southern_latitude,s_cloudiness)
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
slope, int, rvalue = linregress(southern_latitude,s_cloudiness)[0:3]
line = slope * southern_latitude + int
plt.plot(southern_latitude,line,"--")

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(-54,90),fontsize=15,color="red")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Find wind speed for northern latitudes
n_wind = northern_data['Wind Speed']

# Plot scatter plot with linear regression line
plt.scatter(northern_latitude,n_wind)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
slope, int, rvalue = linregress(northern_latitude,n_wind)[0:3]
line = slope * northern_latitude + int
plt.plot(northern_latitude,line)

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(10,16),fontsize=15,color="red")
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Find wind speed for southern latitudes
s_wind = southern_data['Wind Speed']

# Plot scatter plot with linear regression line
plt.scatter(southern_latitude,s_wind)
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
slope, int, rvalue = linregress(southern_latitude,s_wind)[0:3]
line = slope * southern_latitude + int
plt.plot(southern_latitude,line,"--")

# Print R value
print(f'The r-value is: {rvalue}')

# Add text
line_eq = f'y = {round(slope,2)}x + {round(int,2)}'
plt.annotate(line_eq,(-55,20),fontsize=15,color="red")
plt.show()