# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
%matplotlib notebook
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
from scipy.stats import linregress
!pip install citipy

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)



## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(len(cities))
print(cities)

609
['kodiak', 'georgetown', 'cosala', 'rikitea', 'namatanai', 'nuuk', 'east london', 'cassia', 'villagomez', 'albany', 'kawana waters', 'kaitangata', 'vaini', 'ushuaia', 'souillac', 'illoqqortoormiut', 'entebbe', 'asyut', 'kapaa', 'hobart', 'severo-kurilsk', 'luan', 'victoria', 'port lincoln', 'barrow', 'castro', 'taolanaro', 'bredasdorp', 'fortuna', 'olafsvik', 'mar del plata', 'belushya guba', 'lebu', 'busselton', 'naze', 'mataura', 'vaitupu', 'nizhniy tsasuchey', 'khatanga', 'norman wells', 'hamilton', 'strezhevoy', 'mattru', 'maputo', 'asau', 'avarua', 'beidao', 'broken hill', 'henties bay', 'saint-philippe', 'nanortalik', 'maragogi', 'cape town', 'krasnoselkup', 'poum', 'hermanus', 'tomatlan', 'kununurra', 'hearst', 'aksarka', 'laguna', 'punta arenas', 'bougouni', 'machinga', 'saleaula', 'salalah', 'guerrero negro', 'caravelas', 'tilichiki', 'tuktoyaktuk', 'wenatchee', 'bengkulu', 'road town', 'ancud', 'cuenca', 'quelimane', 'znamenskoye', 'marsa matruh', 'taybad', 'codrington', 

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# base_url = f"http://api.openweathermap.org/data/2.5/weather?q={city_id}&appid={weather_api_key}"
# f"http://api.openweathermap.org/data/2.5/weather?q=london&appid={weather_api_key}"

url = "http://api.openweathermap.org/data/2.5/weather?"

city_id = ""

city_names = []
city_lats = []
city_lngs = []
city_max_temps = []
city_humidity = []
city_cloudiness = []
city_wind = []
city_country = []
city_date = []

responses = []

specific_url = f"{url}q={city_id}&appid={weather_api_key}"

for city in cities:
    
    city_id = str(city)
    
    specific_url = f"{url}q={city_id}&appid={weather_api_key}"
    
    response = requests.get(specific_url).json()
    
    print(f"Data acquired for {len(city_names) + 1}/{len(cities)} cities.")
    
    # Append data to empty lists
    try:
        city_names.append(response['name'])
        city_lats.append(response['coord']['lat'])
        city_lngs.append(response['coord']['lon'])
        city_max_temps.append((((response['main']['temp_max']) - 273.15)* 1.8000 )+ 32.00)
        city_humidity.append(response['main']['humidity'])
        city_cloudiness.append(response['clouds']['all'])
        city_wind.append(response['wind']['speed'])
        city_country.append(response['sys']['country'])
        city_date.append(response['dt'])
    
    except:
        pass
        print("Not found")

Data acquired for 1/609 cities.
Data acquired for 2/609 cities.
Data acquired for 3/609 cities.
Data acquired for 4/609 cities.
Data acquired for 5/609 cities.
Data acquired for 6/609 cities.
Data acquired for 7/609 cities.
Data acquired for 8/609 cities.
Data acquired for 9/609 cities.
Data acquired for 10/609 cities.
Data acquired for 11/609 cities.
Not found
Data acquired for 11/609 cities.
Data acquired for 12/609 cities.
Data acquired for 13/609 cities.
Data acquired for 14/609 cities.
Data acquired for 15/609 cities.
Not found
Data acquired for 15/609 cities.
Data acquired for 16/609 cities.
Data acquired for 17/609 cities.
Data acquired for 18/609 cities.
Data acquired for 19/609 cities.
Data acquired for 20/609 cities.
Data acquired for 21/609 cities.
Data acquired for 22/609 cities.
Data acquired for 23/609 cities.
Data acquired for 24/609 cities.
Data acquired for 25/609 cities.
Not found
Data acquired for 25/609 cities.
Data acquired for 26/609 cities.
Data acquired for 27/6

Data acquired for 221/609 cities.
Data acquired for 222/609 cities.
Data acquired for 223/609 cities.
Data acquired for 224/609 cities.
Data acquired for 225/609 cities.
Data acquired for 226/609 cities.
Data acquired for 227/609 cities.
Data acquired for 228/609 cities.
Data acquired for 229/609 cities.
Data acquired for 230/609 cities.
Data acquired for 231/609 cities.
Data acquired for 232/609 cities.
Data acquired for 233/609 cities.
Data acquired for 234/609 cities.
Data acquired for 235/609 cities.
Data acquired for 236/609 cities.
Not found
Data acquired for 236/609 cities.
Data acquired for 237/609 cities.
Not found
Data acquired for 237/609 cities.
Not found
Data acquired for 237/609 cities.
Data acquired for 238/609 cities.
Data acquired for 239/609 cities.
Data acquired for 240/609 cities.
Data acquired for 241/609 cities.
Data acquired for 242/609 cities.
Data acquired for 243/609 cities.
Not found
Data acquired for 243/609 cities.
Data acquired for 244/609 cities.
Data acq

Data acquired for 430/609 cities.
Data acquired for 431/609 cities.
Data acquired for 432/609 cities.
Data acquired for 433/609 cities.
Data acquired for 434/609 cities.
Data acquired for 435/609 cities.
Data acquired for 436/609 cities.
Data acquired for 437/609 cities.
Data acquired for 438/609 cities.
Data acquired for 439/609 cities.
Data acquired for 440/609 cities.
Data acquired for 441/609 cities.
Data acquired for 442/609 cities.
Data acquired for 443/609 cities.
Data acquired for 444/609 cities.
Data acquired for 445/609 cities.
Data acquired for 446/609 cities.
Data acquired for 447/609 cities.
Data acquired for 448/609 cities.
Not found
Data acquired for 448/609 cities.
Data acquired for 449/609 cities.
Data acquired for 450/609 cities.
Data acquired for 451/609 cities.
Data acquired for 452/609 cities.
Data acquired for 453/609 cities.
Data acquired for 454/609 cities.
Data acquired for 455/609 cities.
Data acquired for 456/609 cities.
Data acquired for 457/609 cities.
Data

In [4]:
# city_id = 'Yanliang'

# london_response = requests.get(base_url).json()
# print(json.dumps(london_response, indent = 4, sort_keys = True))

# print(london_response)

# print(city_names)

london_url = f"http://api.openweathermap.org/data/2.5/weather?q=london&appid={weather_api_key}"
london_response = requests.get(london_url).json()
print(json.dumps(london_response, indent = 4, sort_keys = True))

print(london_response["name"])

{
    "base": "stations",
    "clouds": {
        "all": 90
    },
    "cod": 200,
    "coord": {
        "lat": 51.5085,
        "lon": -0.1257
    },
    "dt": 1611978651,
    "id": 2643743,
    "main": {
        "feels_like": 278.15,
        "humidity": 81,
        "pressure": 999,
        "temp": 280.49,
        "temp_max": 281.15,
        "temp_min": 279.82
    },
    "name": "London",
    "rain": {
        "1h": 0.37
    },
    "sys": {
        "country": "GB",
        "id": 1414,
        "sunrise": 1611992502,
        "sunset": 1612025146,
        "type": 1
    },
    "timezone": 0,
    "visibility": 10000,
    "weather": [
        {
            "description": "light rain",
            "icon": "10n",
            "id": 500,
            "main": "Rain"
        }
    ],
    "wind": {
        "deg": 80,
        "speed": 1.54
    }
}
London


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [5]:
weather_dict = {
    'City': city_names,
    'Lat': city_lats,
    'Lng': city_lngs,
    'Max Temp': city_max_temps,
    'Humidity': city_humidity,
    'Cloudiness': city_cloudiness,
    'Wind Speed': city_wind,
    'Country': city_country,
    'Date': city_date
}

weather_df = pd.DataFrame(weather_dict)

weather_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Saint Paul Harbor,57.7900,-152.4072,35.600,55,1,7.20,US,1611978482
1,George Town,5.4112,100.3354,86.000,61,20,3.60,MY,1611977851
2,Cosalá,24.4125,-106.6908,62.996,87,1,1.09,MX,1611978482
3,Rikitea,-23.1203,-134.9692,80.186,72,24,4.39,PF,1611977983
4,Namatanai,-3.6667,152.4333,84.218,76,70,6.28,PG,1611978094
...,...,...,...,...,...,...,...,...,...
550,Pangnirtung,66.1451,-65.7125,-5.800,65,1,1.03,CA,1611978679
551,Gariāband,20.6350,82.0614,72.644,28,14,2.33,IN,1611978680
552,Haileybury,47.4490,-79.6373,6.800,78,90,4.12,CA,1611978680
553,Waipawa,-41.4122,175.5153,64.994,55,21,8.75,NZ,1611978680


In [7]:
weather_stats_df = weather_df[['Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed', 'Date']]
weather_stats_df



Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
0,57.7900,-152.4072,35.600,55,1,7.20,1611978482
1,5.4112,100.3354,86.000,61,20,3.60,1611977851
2,24.4125,-106.6908,62.996,87,1,1.09,1611978482
3,-23.1203,-134.9692,80.186,72,24,4.39,1611977983
4,-3.6667,152.4333,84.218,76,70,6.28,1611978094
...,...,...,...,...,...,...,...
550,66.1451,-65.7125,-5.800,65,1,1.03,1611978679
551,20.6350,82.0614,72.644,28,14,2.33,1611978680
552,47.4490,-79.6373,6.800,78,90,4.12,1611978680
553,-41.4122,175.5153,64.994,55,21,8.75,1611978680


In [16]:
# weather_stats_df = weather_df['Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed', 'Date']
# weather_stats_df.head()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,599.0,599.0,599.0,599.0,599.0,599.0,599.0
mean,19.877028,19.568998,69.794808,69.353923,51.569282,7.979416,1593605000.0
std,33.262944,91.418472,15.678452,22.704698,35.405602,5.660694,57.38815
min,-54.8,-179.17,12.2,7.0,0.0,0.36,1593605000.0
25%,-8.54,-61.21,59.0,55.0,20.0,3.62,1593605000.0
50%,24.37,24.67,73.11,75.0,59.0,6.73,1593605000.0
75%,46.585,102.255,80.6,87.0,85.0,11.05,1593605000.0
max,78.22,179.32,116.6,100.0,100.0,42.5,1593605000.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [9]:
# check the stats to see if there are any cities with humidity over 100%

weather_stats_df.describe()

# and there are not, so we're all good!

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,555.0,555.0,555.0,555.0,555.0,555.0,555.0
mean,20.937052,19.817959,46.746411,73.893694,53.390991,3.716595,1611978000.0
std,33.928243,90.369373,34.799387,19.450795,40.445938,2.531199,271.9156
min,-54.8,-175.2,-43.6,12.0,0.0,0.09,1611978000.0
25%,-8.3274,-57.0621,26.285,64.0,3.0,1.76,1611978000.0
50%,23.6667,25.678,58.244,79.0,63.0,3.09,1611979000.0
75%,50.75825,97.84065,75.047,88.0,93.0,5.14,1611979000.0
max,78.2186,178.4167,105.8,100.0,100.0,12.35,1611979000.0


In [10]:
#  Get the indices of cities that have humidity over 100%.

dirty_city_data = weather_stats_df[(weather_stats_df["Humidity"] > 100)].index

dirty_city_data

# none

Int64Index([], dtype='int64')

In [12]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

clean_city_data = weather_stats_df.drop(dirty_city_data, inplace = False)

clean_city_data.head()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
0,57.79,-152.4072,35.6,55,1,7.2,1611978482
1,5.4112,100.3354,86.0,61,20,3.6,1611977851
2,24.4125,-106.6908,62.996,87,1,1.09,1611978482
3,-23.1203,-134.9692,80.186,72,24,4.39,1611977983
4,-3.6667,152.4333,84.218,76,70,6.28,1611978094


In [15]:
# Extract relevant data fields
lats = weather_df['Lat']
max_temps = weather_df['Max Temp']
humidity = weather_df['Humidity']
cloudiness = weather_df['Cloudiness']
wind_speed = weather_df['Wind Speed']

clean_city_data.to_csv('cityweather.csv', index = False)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [25]:
# Temperature (F) vs. Latitude

# scatterplot and properties
plt.scatter(lats, max_temps, edgecolor = "black", marker = "o", linewidths = 1, alpha = 0.5, label = "Cities")
plt.grid(True)

# labels 

plt.title('City Latitude vs. Max Temp (F) (01/29/2021)' )
plt.xlabel('Latitude')
plt.ylabel('Max Temp (F)')

# save and show
plt.savefig("lat_vs_temp.png")
plt.show()

<IPython.core.display.Javascript object>

## Latitude vs. Humidity Plot

In [26]:
# Latitude vs. Humidity (%)

# scatterplot and properties
plt.scatter(lats, humidity, edgecolor = "black", marker = "o", linewidths = 1, alpha = 0.5, label = "Cities")
plt.grid(True)

# labels 

plt.title('City Latitude vs. Humidity (%) (01/29/2021)' )
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')

# save and show
plt.savefig("lat_vs_humidity.png")
plt.show()

<IPython.core.display.Javascript object>

## Latitude vs. Cloudiness Plot

In [100]:
# Latitude vs. Cloudiness

# scatterplot and properties
plt.scatter(lats, max_temps, edgecolor = "black", marker = "o", linewidths = 1, alpha = 0.5, label = "Cities")
plt.grid(True)

# labels 

plt.title('City Latitude vs. Cloudiness (%) (01/29/2021)' )
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')

# save and show
plt.savefig("lat_vs_cloudiness.png")
plt.show()

<IPython.core.display.Javascript object>

## Latitude vs. Wind Speed Plot

In [28]:
# Latitude vs. Wind Speed 

# scatterplot and properties
plt.scatter(lats, max_temps, edgecolor = "black", marker = "o", linewidths = 1, alpha = 0.5, label = "Cities")
plt.grid(True)

# labels 

plt.title('City Latitude vs. Wind Speed (mph) (01/29/2021)' )
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')

# save and show
plt.savefig("lat_vs_windspeed.png")
plt.show()

<IPython.core.display.Javascript object>

## Linear Regression

In [49]:
def plot_linear_regression(x_values, y_values, title, text_coordinates):
    # Plot regression line for both hemispheres
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    # Plot scatterplot & linear regression line
    plt.scatter(x_values, y_values, edgecolor = "black", marker = "o", linewidths = 1, alpha = 0.5, label = "Cities")
    plt.grid(True)
    plt.annotate(line_eq, text_coordinates, fontsize = 15, color = "red")
    plt.xlabel('Latitude')
    plt.ylabel(title)
    plt.plot(x_values, regress_values, color = "red")
    print(f"The r-value is: {rvalue**2}")
    plt.show()
    
    

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [50]:
northern_hemi_df = weather_df.loc[(weather_df["Lat"] >= 0)]
southern_hemi_df = weather_df.loc[(weather_df["Lat"] < 0)]

In [75]:
# Northern Hemisphere: Latitude vs. Max Temp (F)
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
plot_linear_regression(x_values, y_values, "Max Temp (F)", (5,10))

plt.title("N. Hemisphere: Latitude vs. Max Temp (F)")
plt.show()

plt.savefig("northern_hemi_temp.png")

<IPython.core.display.Javascript object>

The r-value is: 0.7379560088978061


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [76]:
# Southern Hemisphere: Latitude vs. Max Temp (F)
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
plot_linear_regression(x_values, y_values, "Max Temp (F)", (-25,48))

plt.title("S. Hemisphere: Latitude vs. Max Temp (F)")
plt.show()

plt.savefig("southern_hemi_temp.png")

<IPython.core.display.Javascript object>

The r-value is: 0.20166623669083503


####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [77]:
# Northern Hemisphere: Latitude vs. Humidity (%)
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]
plot_linear_regression(x_values, y_values, "Humidity (%)", (45,10))

plt.title("N. Hemisphere: Latitude vs. Humidity (%)")
plt.show()

plt.savefig("northern_hemi_humidity.png")

<IPython.core.display.Javascript object>

The r-value is: 0.18649464165454843


####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [78]:
# Southern Hemisphere: Latitude vs. Humidity (%)
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
plot_linear_regression(x_values, y_values, "Humidity (%)", (-20,50))

plt.title("S. Hemisphere: Latitude vs. Humidity (%)")
plt.show()

plt.savefig("southern_hemi_humidity.png")

<IPython.core.display.Javascript object>

The r-value is: 0.05398599259636088


####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [86]:
# Northern Hemisphere: Latitude vs. Cloudiness (%)
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
plot_linear_regression(x_values, y_values, "Cloudiness (%)", (25,30))

plt.title("N. Hemisphere: Latitude vs. Cloudiness (%)")
plt.show()

plt.savefig("northern_hemi_cloudiness.png")

<IPython.core.display.Javascript object>

The r-value is: 0.11566025817912785


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [87]:
# Southern Hemisphere: Latitude vs. Cloudiness (%)
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
plot_linear_regression(x_values, y_values, "Cloudiness (%)", (-25,45))

plt.title("S. Hemisphere: Latitude vs. Cloudiness (%)")
plt.show()

plt.savefig("southern_hemi_cloudiness.png")

<IPython.core.display.Javascript object>

The r-value is: 0.23600453829421394


####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [97]:
# Northern Hemisphere: Latitude vs. Wind Speed (mph)
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
plot_linear_regression(x_values, y_values, "Wind Speed (mph)", (15,10))

plt.title("N. Hemisphere: Latitude vs. Wind Speed (mph)")
plt.show()

plt.savefig("northern_hemi_windspeed.png")

<IPython.core.display.Javascript object>

The r-value is: 5.104340357744179e-05


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [99]:
# Southern Hemisphere: Latitude vs. Wind Speed (mph)
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
plot_linear_regression(x_values, y_values, "Wind Speed (mph)", (-50,3))

plt.title("S. Hemisphere: Latitude vs. Wind Speed (mph)")
plt.show()

plt.savefig("southern_hemi_windspeed.png")

<IPython.core.display.Javascript object>

The r-value is: 0.016220875744720635
