# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [90]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import csv
from scipy import stats
from scipy.stats import linregress
from datetime import date

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [91]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

print(cities)
        
# Print the city count to confirm sufficient count
len(cities)

['chagda', 'chuy', 'namibe', 'hilo', 'cabo san lucas', 'vaini', 'punta arenas', 'padang', 'kruisfontein', 'bonfim', 'chokurdakh', 'woodward', 'hobart', 'cape town', 'urumqi', 'albany', 'hermanus', 'leninsk', 'kapaa', 'arinos', 'georgetown', 'ushuaia', 'aksarka', 'saint-philippe', 'bethel', 'rikitea', 'puerto ayora', 'tiksi', 'half moon bay', 'adre', 'longyearbyen', 'atuona', 'vila do maio', 'lavrentiya', 'hithadhoo', 'porto novo', 'bredasdorp', 'illoqqortoormiut', 'cayenne', 'butterworth', 'miri', 'itarema', 'tasiilaq', 'nanortalik', 'bluff', 'harper', 'taolanaro', 'granville', 'wagga wagga', 'damietta', 'belmonte', 'tarauaca', 'oskarshamn', 'kaitangata', 'galitsy', 'mahebourg', 'cherskiy', 'keti bandar', 'bengkulu', 'samusu', 'aykhal', 'avarua', 'baker city', 'king city', 'kahului', 'attawapiskat', 'bandundu', 'labutta', 'castro', 'lewistown', 'portland', 'mataura', 'bambanglipuro', 'daru', 'tsihombe', 'korla', 'lerwick', 'palabuhanratu', 'port alfred', 'udachnyy', 'beitbridge', 'qaan

629

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [92]:
# save config information
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [93]:
# create list to hold data
name = []
lat = []
lon = []
temp_max = []
humidity = []
clouds = []
speed = []
country = []
dt = []

counter = 0

# loop through cities to get weather data
for city in cities:
    
    counter += 1
    
    try:   
        # get weather data
        response = requests.get(query_url + city).json()
        name.append(response['name'])
        lat.append(response['coord']['lat'])
        lon.append(response['coord']['lon'])
        temp_max.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        clouds.append(response['clouds']['all'])
        speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        dt.append(response['dt'])
        
        print(f"Processing Record {counter} | The city name is {city}")
    
    except:
        print(f"City not found: {city}  Skipping ...")
    
    

City not found: chagda  Skipping ...
Processing Record 2 | The city name is chuy
Processing Record 3 | The city name is namibe
Processing Record 4 | The city name is hilo
Processing Record 5 | The city name is cabo san lucas
Processing Record 6 | The city name is vaini
Processing Record 7 | The city name is punta arenas
Processing Record 8 | The city name is padang
Processing Record 9 | The city name is kruisfontein
Processing Record 10 | The city name is bonfim
Processing Record 11 | The city name is chokurdakh
Processing Record 12 | The city name is woodward
Processing Record 13 | The city name is hobart
Processing Record 14 | The city name is cape town
Processing Record 15 | The city name is urumqi
Processing Record 16 | The city name is albany
Processing Record 17 | The city name is hermanus
Processing Record 18 | The city name is leninsk
Processing Record 19 | The city name is kapaa
Processing Record 20 | The city name is arinos
Processing Record 21 | The city name is georgetown
P

Processing Record 170 | The city name is khatanga
Processing Record 171 | The city name is port augusta
Processing Record 172 | The city name is constantine
City not found: tumannyy  Skipping ...
Processing Record 174 | The city name is new norfolk
Processing Record 175 | The city name is nyagan
Processing Record 176 | The city name is boguchany
Processing Record 177 | The city name is chiredzi
Processing Record 178 | The city name is paciran
Processing Record 179 | The city name is rio grande
Processing Record 180 | The city name is teahupoo
City not found: louisbourg  Skipping ...
Processing Record 182 | The city name is civita castellana
Processing Record 183 | The city name is chapada dos guimaraes
Processing Record 184 | The city name is dali
Processing Record 185 | The city name is san juan de uraba
Processing Record 186 | The city name is ballina
Processing Record 187 | The city name is otane
Processing Record 188 | The city name is talnakh
Processing Record 189 | The city name 

Processing Record 334 | The city name is duku
Processing Record 335 | The city name is bulgan
Processing Record 336 | The city name is saint-francois
Processing Record 337 | The city name is kinshasa
Processing Record 338 | The city name is naze
Processing Record 339 | The city name is moindou
Processing Record 340 | The city name is cabedelo
Processing Record 341 | The city name is nueva loja
Processing Record 342 | The city name is dillon
Processing Record 343 | The city name is morondava
Processing Record 344 | The city name is akyab
Processing Record 345 | The city name is carbonia
Processing Record 346 | The city name is wawa
Processing Record 347 | The city name is port elizabeth
Processing Record 348 | The city name is gusau
Processing Record 349 | The city name is souillac
Processing Record 350 | The city name is vanderhoof
Processing Record 351 | The city name is calama
Processing Record 352 | The city name is shimoda
Processing Record 353 | The city name is narasannapeta
Proc

Processing Record 501 | The city name is poum
Processing Record 502 | The city name is dingle
Processing Record 503 | The city name is luderitz
Processing Record 504 | The city name is coquimbo
Processing Record 505 | The city name is bilibino
Processing Record 506 | The city name is maraa
Processing Record 507 | The city name is awbari
Processing Record 508 | The city name is mikuni
Processing Record 509 | The city name is opuwo
Processing Record 510 | The city name is thenzawl
Processing Record 511 | The city name is flinders
City not found: azimur  Skipping ...
Processing Record 513 | The city name is dovers
City not found: lolua  Skipping ...
City not found: geresk  Skipping ...
Processing Record 516 | The city name is apopka
Processing Record 517 | The city name is kualakapuas
Processing Record 518 | The city name is salisbury
Processing Record 519 | The city name is makakilo city
Processing Record 520 | The city name is gizo
Processing Record 521 | The city name is khartoum
Proce

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [94]:
# create a dictionary from lists
weather_dict = {"City":name,
                "Lat":lat,
                "Lng":lon,
                "Max_Temp":temp_max,
                "Humidity":humidity,
                "Cloudiness":clouds,
                "Wind_Speed":speed,
                "Country":country,
                "Date":dt}

In [95]:
# create a data frame using the dictionary
weather_data = pd.DataFrame(weather_dict)
weather_data

Unnamed: 0,City,Lat,Lng,Max_Temp,Humidity,Cloudiness,Wind_Speed,Country,Date
0,Chui,-33.70,-53.46,47.01,91,99,9.19,UY,1595641523
1,Mossamedes,-15.20,12.15,65.95,81,0,4.79,AO,1595641523
2,Hilo,19.73,-155.09,78.80,83,90,3.36,US,1595641523
3,Cabo San Lucas,22.89,-109.91,89.01,73,5,17.22,MX,1595641524
4,Vaini,-21.20,-175.20,75.20,83,75,12.75,TO,1595641524
...,...,...,...,...,...,...,...,...,...
575,Pakokku,21.33,95.10,88.79,53,46,5.73,MM,1595641673
576,Pahlgām,34.03,75.33,63.79,64,52,1.36,IN,1595641673
577,Bosaso,11.28,49.18,88.70,42,99,21.36,SO,1595641673
578,Makkah al Mukarramah,21.43,39.83,86.92,45,86,9.80,SA,1595641594


In [96]:
# create csv from dataframe
weather_data.to_csv(r'output_data\raw_weather_data.csv', index=False)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [97]:
# check if cities have max humidity > 100
weather_data.describe()

Unnamed: 0,Lat,Lng,Max_Temp,Humidity,Cloudiness,Wind_Speed,Date
count,580.0,580.0,580.0,580.0,580.0,580.0,580.0
mean,19.615362,20.999069,69.140448,70.82069,47.856897,7.647121,1595642000.0
std,32.929404,90.319633,13.039101,19.638677,39.518456,5.180945,86.09062
min,-54.8,-175.2,32.0,10.0,0.0,0.13,1595641000.0
25%,-7.505,-55.925,59.0,62.0,1.0,3.685,1595642000.0
50%,22.83,24.7,70.905,76.0,44.0,6.555,1595642000.0
75%,47.6775,100.3525,78.8,85.0,90.0,10.29,1595642000.0
max,78.22,179.32,101.52,100.0,100.0,32.21,1595642000.0


In [98]:
clean_city_data = weather_data[weather_data['Humidity']<=100]
clean_city_data.head()

Unnamed: 0,City,Lat,Lng,Max_Temp,Humidity,Cloudiness,Wind_Speed,Country,Date
0,Chui,-33.7,-53.46,47.01,91,99,9.19,UY,1595641523
1,Mossamedes,-15.2,12.15,65.95,81,0,4.79,AO,1595641523
2,Hilo,19.73,-155.09,78.8,83,90,3.36,US,1595641523
3,Cabo San Lucas,22.89,-109.91,89.01,73,5,17.22,MX,1595641524
4,Vaini,-21.2,-175.2,75.2,83,75,12.75,TO,1595641524


In [99]:
clean_city_data.count()  #delete

City          580
Lat           580
Lng           580
Max_Temp      580
Humidity      580
Cloudiness    580
Wind_Speed    580
Country       580
Date          580
dtype: int64

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [100]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


In [101]:
# Extract relevant fields from the data frame


# Export the City_Data into a csv


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

In [102]:
plot_date = date.today().strftime('%m/%d/%Y')
#print(f'Plotted {plot_date}')

## Latitude vs. Temperature Plot

In [None]:
# create a Scatter Plot for temperature vs latitude
x_values = clean_city_data['Lat']
y_values = clean_city_data['Max_Temp']
plt.scatter(x_values,y_values, marker="o", facecolors="blue", edgecolors="black", grid=True)
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title(f"City Latitude vs Max Temperature {plot_date}")
plt.xlim(-65,85) # set the upper and lower limits of our x axis
plt.ylim(20,110) # set the upper and lower limits of our y axis
plt.savefig("Images/Latitude_vs_Temperature.png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# create a Scatter Plot for humidity vs latitude
x_values = clean_city_data['Lat']
y_values = clean_city_data['Humidity']
plt.scatter(x_values,y_values, marker="o", facecolors="blue", edgecolors="black")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f"City Latitude vs Humidity {plot_date}")
plt.xlim(-85,85) # set the upper and lower limits of our x axis
plt.ylim(0,100) # set the upper and lower limits of our y axis
plt.savefig("Images/Latitude_vs_Humidity.png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# create a Scatter Plot for cloudiness vs latitude
x_values = clean_city_data['Lat']
y_values = clean_city_data['Cloudiness']
plt.scatter(x_values,y_values, marker="o", facecolors="blue", edgecolors="black")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title(f"City Latitude vs Cloudiness {plot_date}")
plt.xlim(-85,85) # set the upper and lower limits of our x axis
plt.ylim(0,100) # set the upper and lower limits of our y axis
plt.savefig("Images/Latitude_vs_Cloudiness.png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# create a Scatter Plot for wind speed vs latitude
x_values = clean_city_data['Lat']
y_values = clean_city_data['Wind_Speed']
plt.scatter(x_values,y_values, marker="o", facecolors="blue", edgecolors="black")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f"City Latitude vs Wind Speed {plot_date}")
plt.xlim(-85,85) # set the upper and lower limits of our x axis
plt.ylim(0,35) # set the upper and lower limits of our y axis
plt.savefig("Images/Latitude_vs_Wind_Speed.png")
plt.show()

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots

In [None]:
# Create Northern and Southern Hemisphere DataFrames

In [None]:
north_data = clean_city_data[clean_city_data['Lat']>=0]
north_data.head()

In [None]:
south_data = clean_city_data[clean_city_data['Lat']<0]
south_data.head()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = north_data['Lat']
y_values = north_data['Max_Temp']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = south_data['Lat']
y_values = south_data['Max_Temp']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = north_data['Lat']
y_values = north_data['Humidity']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity %')
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = south_data['Lat']
y_values = south_data['Humidity']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity %')
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = north_data['Lat']
y_values = north_data['Cloudiness']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness %')
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = south_data['Lat']
y_values = south_data['Cloudiness']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness %')
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = north_data['Lat']
y_values = north_data['Wind_Speed']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# calculate correlation 
x_values = south_data['Lat']
y_values = south_data['Humidity']

correlation = stats.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],4)}")

# create scatter plot with linear regression equation and line
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.show()

In [None]:
# The End