# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
import json
import scipy.stats as st

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)
pprint(cities)

['thompson',
 'caohe',
 'qaanaaq',
 'tasiilaq',
 'poykovskiy',
 'ushuaia',
 'komsomolskiy',
 'sakyla',
 'longyearbyen',
 'kodiak',
 'albany',
 'tazovskiy',
 'takoradi',
 'barcelona',
 'tilichiki',
 'hobart',
 'khatanga',
 'upernavik',
 'hithadhoo',
 'atuona',
 'rikitea',
 'koregaon',
 'yellowknife',
 'punta arenas',
 'dikson',
 'saint-louis',
 'bluff',
 'hermanus',
 'tuktoyaktuk',
 'mabaruma',
 'taolanaro',
 'kaffrine',
 'sentyabrskiy',
 'kaitangata',
 'sinegorsk',
 'dauphin',
 'fukue',
 'vaini',
 'kruisfontein',
 'namibe',
 'cabo san lucas',
 'morant bay',
 'valparaiso',
 'longkou',
 'shimoda',
 'jamestown',
 'susehri',
 'bitkine',
 'busselton',
 'doha',
 'hilo',
 'liaocheng',
 'belushya guba',
 'haikou',
 'atasu',
 'havre-saint-pierre',
 'fairlie',
 'contamana',
 'pangnirtung',
 'mbandaka',
 'hambantota',
 'winslow',
 'bredasdorp',
 'texarkana',
 'east london',
 'maragogi',
 'mataura',
 'kapaa',
 'ahipara',
 'saleaula',
 'mareeba',
 'mishkino',
 'leningradskiy',
 'mount isa',
 'tabas

In [3]:
# Create empty lists to store weather data
city = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
get_date = []
city_id = []
city_name = []

In [4]:
# Save config information
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
query_url = f"{url}appid={weather_api_key}&units={units}&q="


# Get weather data
for city in cities:
    response = requests.get(query_url + city)
    
weather_json = response.json()
pprint(weather_json)

{'base': 'stations',
 'clouds': {'all': 0},
 'cod': 200,
 'coord': {'lat': 64.23, 'lon': 27.73},
 'dt': 1604689602,
 'id': 654899,
 'main': {'feels_like': 30.99,
          'humidity': 75,
          'pressure': 1003,
          'temp': 46.4,
          'temp_max': 46.4,
          'temp_min': 46.4},
 'name': 'Kajaani',
 'sys': {'country': 'FI',
         'id': 1344,
         'sunrise': 1604642984,
         'sunset': 1604669743,
         'type': 1},
 'timezone': 7200,
 'visibility': 10000,
 'weather': [{'description': 'clear sky',
              'icon': '01n',
              'id': 800,
              'main': 'Clear'}],
 'wind': {'deg': 280, 'gust': 40.26, 'speed': 23.04}}


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Create empty lists to store weather data
city = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
get_date = []
city_id = []
city_name = []


print("Beginning Data Retrieval")
print("-----------------------------")

record_count = 0
set_count = 1

for index, city in enumerate(cities):
    try:
        response = requests.get(query_url + city).json()
        city_name.append(response['name'])
        lat.append (response['coord']['lat'])
        lng.append (response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        get_date.append(response['dt'])
        curr_city_id=response['id']
        city_id.append(curr_city_id)
        
        if record_count > 49:
            record_count = 0
            set_count = set_count + 1

        else:
            record_count = record_count + 1

        print(f"Processing Record {record_count} of Set {set_count} | {city} {curr_city_id}")

    
    except (KeyError):
        print(f"City not found. Skipping.....")


print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")

Beginning Data Retrieval
-----------------------------
Processing Record 1 of Set 1 | thompson 6165406
Processing Record 2 of Set 1 | caohe 1816026
Processing Record 3 of Set 1 | qaanaaq 3831208
Processing Record 4 of Set 1 | tasiilaq 3424607
Processing Record 5 of Set 1 | poykovskiy 1494276
Processing Record 6 of Set 1 | ushuaia 3833367
Processing Record 7 of Set 1 | komsomolskiy 1513491
Processing Record 8 of Set 1 | sakyla 638104
Processing Record 9 of Set 1 | longyearbyen 2729907
Processing Record 10 of Set 1 | kodiak 5866583
Processing Record 11 of Set 1 | albany 5106841
Processing Record 12 of Set 1 | tazovskiy 1489853
Processing Record 13 of Set 1 | takoradi 2294915
Processing Record 14 of Set 1 | barcelona 3128760
Processing Record 15 of Set 1 | tilichiki 2120591
Processing Record 16 of Set 1 | hobart 2163355
Processing Record 17 of Set 1 | khatanga 2022572
Processing Record 18 of Set 1 | upernavik 3418910
Processing Record 19 of Set 1 | hithadhoo 1282256
Processing Record 20 o

City not found. Skipping.....
Processing Record 9 of Set 4 | nhulunbuy 2064735
Processing Record 10 of Set 4 | lorengau 2092164
Processing Record 11 of Set 4 | northam 2641434
Processing Record 12 of Set 4 | jumla 1283285
City not found. Skipping.....
Processing Record 13 of Set 4 | te anau 2181625
Processing Record 14 of Set 4 | haines junction 5969025
Processing Record 15 of Set 4 | riyadh 108410
Processing Record 16 of Set 4 | hopkinsville 4295251
Processing Record 17 of Set 4 | clarence town 2171465
Processing Record 18 of Set 4 | nerchinskiy zavod 2019323
Processing Record 19 of Set 4 | humberto de campos 3398428
Processing Record 20 of Set 4 | santa lucia 2511150
Processing Record 21 of Set 4 | port elizabeth 964420
Processing Record 22 of Set 4 | jaisalmer 1269507
Processing Record 23 of Set 4 | sfantu gheorghe 667303
Processing Record 24 of Set 4 | saint-joseph 6690296
Processing Record 25 of Set 4 | sabang 1214026
Processing Record 26 of Set 4 | banda aceh 1215502
Processing R

Processing Record 8 of Set 7 | yako 2353688
City not found. Skipping.....
Processing Record 9 of Set 7 | arlit 2447513
Processing Record 10 of Set 7 | santiago de cao 3692073
Processing Record 11 of Set 7 | katsuura 2112309
Processing Record 12 of Set 7 | morehead 4301317
Processing Record 13 of Set 7 | plon 2853162
Processing Record 14 of Set 7 | floresta 3399518
Processing Record 15 of Set 7 | sarai 498711
Processing Record 16 of Set 7 | shwebo 1296736
Processing Record 17 of Set 7 | peterhead 2640351
Processing Record 18 of Set 7 | cockburn town 3576994
Processing Record 19 of Set 7 | tongliao 2034400
Processing Record 20 of Set 7 | rio gallegos 3838859
Processing Record 21 of Set 7 | coari 3664539
Processing Record 22 of Set 7 | guerrero negro 4021858
City not found. Skipping.....
Processing Record 23 of Set 7 | warrington 2634739
Processing Record 24 of Set 7 | horasan 745527
Processing Record 25 of Set 7 | san vicente 3428068
Processing Record 26 of Set 7 | puerto carreno 3671519

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create dataframe to store weather data
weather_data = pd.DataFrame({"City": city_name, 
                             "Lat": lat, 
                             "Lng": lng, 
                             "Max Temp": max_temp, 
                             "Humidity": humidity, 
                             "Cloudiness": cloudiness,
                            "Wind Speed": wind_speed,
                            "Country": country,
                            "Date": get_date
                            })
weather_data

In [None]:
# Save data to csv file
weather_data.to_csv("output_data/cities.csv", index=False)

In [None]:
weather_data.count()

In [None]:
weather_data.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.

clean_city_data = weather_data.drop(weather_data[weather_data['Humidity'] > 100].index, inplace = False)
clean_city_data

In [None]:
#Save clean dataframe 
clean_city_data.to_csv("output_data/cities.csv", index=False)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Scatter plot city latitude against max temp
plt.scatter(clean_city_data['Lat'], clean_city_data['Max Temp'], marker='o', color= 'steelblue', edgecolor="black")
plt.grid()
plt.title("City Latitude vs Max Temperature (11/2/20)")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")

plt.savefig("../Graphs/Temp vs Latitude.png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Scatter plot city latitude against humidity
plt.scatter(clean_city_data['Lat'], clean_city_data['Humidity'], marker='o', color= 'steelblue', edgecolor="black")
plt.grid()
plt.title("City Latitude vs Humidity (11/2/20)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")

plt.savefig("../Graphs/Humidity vs Latitude.png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Scatter plot city latitude against cloudiness
plt.scatter(clean_city_data['Lat'], clean_city_data['Cloudiness'], marker='o', color= 'steelblue', edgecolor="black")
plt.grid()
plt.title("City Latitude vs Cloudiness (11/2/20)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")

plt.savefig("../Graphs/Cloudiness vs Latitude.png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Scatter plot city latitude against wind speed
plt.scatter(clean_city_data['Lat'], clean_city_data['Wind Speed'], marker='o', color= 'steelblue', edgecolor="black")
plt.grid()
plt.title("City Latitude vs Wind Speed (11/2/20)")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")

plt.savefig("../Graphs/Wind Speed vs Latitude.png")
plt.show()

## Linear Regression

In [None]:
# Separate cities by hemisphere and save in dataframes
south_hemisphere = clean_city_data[clean_city_data["Lat"] < 0]
north_hemisphere = clean_city_data[clean_city_data["Lat"] > 0]
south_hemisphere.head()


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Do linear regression in N. hemisphere of max temp vs latitude
x_values = north_hemisphere['Lat']
y_values = north_hemisphere['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title('Northern Hemisphere - Max Temp vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')

plt.savefig("../Graphs/Linregress Northern Hem - Temp vs Lat.png")
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Do linear regression in S. hemisphere of max temp vs latitude
x_values = south_hemisphere['Lat']
y_values = south_hemisphere['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title('Southern Hemisphere - Max Temp vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')

plt.savefig("../Graphs/Linregress Southern Hem - Temp vs Lat.png")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Do linear regression in N. hemisphere of humidity vs latitude
x_values = north_hemisphere['Lat']
y_values = north_hemisphere['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title('Northern Hemisphere - Humidity vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Humidity')

plt.savefig("../Graphs/Linregress Northern Hem - Humidity vs Lat.png")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Do linear regression in S. hemisphere of humidity vs latitude
x_values = south_hemisphere['Lat']
y_values = south_hemisphere['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title('Southern Hemisphere - Humidity vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Humidity')

plt.savefig("../Graphs/Linregress Southern Hem - Humidity vs Lat.png")
plt.show()

In [None]:
#Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Do linear regression in N. hemisphere of cloudiness vs latitude
x_values = north_hemisphere['Lat']
y_values = north_hemisphere['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.title('Northern Hemisphere - Cloudiness (%) vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')

plt.savefig("../Graphs/Linregress Northern Hem - Cloudiness vs Lat.png")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Do linear regression in S. hemisphere of cloudiness vs latitude
x_values = south_hemisphere['Lat']
y_values = south_hemisphere['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(10,60),fontsize=15,color="red")
plt.title('Southern Hemisphere - Cloudiness (%) vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')

plt.savefig("../Graphs/Linregress Southern Hem - Cloudiness vs Lat.png")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Do linear regression in N. hemisphere of wind speed vs latitude
x_values = north_hemisphere['Lat']
y_values = north_hemisphere['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(40,40),fontsize=15,color="red")
plt.title('Northern Hemisphere - Wind Speed vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')

plt.savefig("../Graphs/Linregress Northern Hem - Wind Speed vs Lat.png")
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Do linear regression in S. hemisphere of wind speed vs latitude
x_values = south_hemisphere['Lat']
y_values = south_hemisphere['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"The r-squared value is: {rvalue**2}")

# Using scatter plot, show linear regression line
plt.scatter(x_values,y_values, color= "steelblue", edgecolor= "black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(10,6),fontsize=15,color="red")
plt.title('Southern Hemisphere - Wind Speed vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')

plt.savefig("../Graphs/Linregress Southern Hem - Wind Speed vs Lat.png")
plt.show()