# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

617

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:

# Setting base url
url = "http://api.openweathermap.org/data/2.5/weather?"
units = 'metric'

# place to store information
lat = []
long = []
temp = []
humidity = []
cloud = []
wind = []
country = []
date = []
place = []

# Number count
recordcount = 1
setcount = 1

#Start of Print
print('Beginning Data Retrieval')     
print('-----------------------------')

# run through the cities in our list
for i, city in enumerate(cities):
    
    #set new url for new city
    query = f'{url}appid={weather_api_key}&units={units}&q={city}'
    
    #change numbers for a new city
    if (i % 50 == 0 and i >= 50):
        setcount += 1
        recordcount = 1
    
    #print new counts
    print(f"Processing Record {recordcount} of Set {setcount} | {city}")
    
    #add 1 to the recordcount
    recordcount += 1
    
    #pull the information
    try:
        
        # Get weather data
        Response = requests.get(query)
        json = Response.json()
    
        # add info to list
        temp.append(json['main']['temp'])
        lat.append(json['coord']['lat'])
        long.append(json['coord']['lon'])
        country.append(json['sys']['country'])
        humidity.append(json['main']['humidity'])
        cloud.append(json['clouds']['all'])
        wind.append(json['wind']['speed'])
        date.append(json['dt'])
        place.append(json['name'])
        
        #suspension time
        time.sleep(1)
    
    #if you can't find the city    
    except (KeyError, IndexError):
        print('City not found. Skipping...')

#print the end        
print("-----------------------------")
print("Data Retrieval Complete")   
print("-----------------------------")
    

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [4]:
Create dataframe of new information
pl = pd.DataFrame({
    'City': place,
    'Lat': lat,
    'Lng': long,
    'Max Temp': temp,
    'Humidity': humidity,
    'Cloudiness': cloud,
    'Wind Speed': wind,
    'Country': country,
    'Date': date
})

#Store in csv file
pl.to_csv('../output_data/citydata.csv', index=False)

#Show Dataframe
pl

NameError: name 'place' is not defined

In [8]:
pl = pd.read_csv('../output_data/citydata.csv')
pl.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Ribeira Grande,38.5167,-28.7,24.22,88,75,2.57,PT,1658761328
1,Albany,42.6001,-73.9662,23.08,93,100,3.09,US,1658761202
2,Ugoofaaru,5.6667,73.0,27.76,78,80,6.96,MV,1658761356
3,Rikitea,-23.1203,-134.9692,23.92,73,23,8.62,PF,1658761358
4,Lebu,-37.6167,-73.65,12.22,82,100,10.87,CL,1658761359


In [9]:
#describe the data
pl.describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,546.0,546.0,546.0,546.0,546.0,546.0,546.0
mean,21.662368,15.422582,21.880971,68.57326,57.855311,3.681007,1658762000.0
std,32.96768,88.908891,8.198183,21.780623,38.945494,2.360283,275.976
min,-54.8,-175.2,-0.55,5.0,0.0,0.0,1658761000.0
25%,-4.987175,-61.814,15.79,58.0,20.0,1.865,1658761000.0
50%,27.85765,21.24075,22.92,74.0,69.0,3.16,1658762000.0
75%,50.1751,86.10185,27.75,85.0,99.0,5.14,1658762000.0
max,78.2186,179.3167,43.17,100.0,100.0,13.87,1658762000.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
hum = pl.loc[(pl['Humidity'] >= 100)].index
hum

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
update = pl.drop(hum, inplace=False)
update


In [None]:
#check new data
update.describe()

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#Create scatter plot
plt.scatter(update["Lat"], update["Max Temp"], facecolors="blue", edgecolors='black')
plt.ylabel("Max Temperature (C)")
plt.xlabel('Latitude')
plt.title("City Latitude Vs Max Temperature")
plt.grid()

#store new fig
plt.savefig('../output_data/Fig1.png')

#show
plt.show

## Latitude vs. Humidity Plot

In [None]:
#Create scatter plot
plt.scatter(update["Lat"], update["Humidity"], facecolors="blue", edgecolors='black')
plt.ylabel("Humidity (%)")
plt.xlabel('Latitude')
plt.title("City Latitude Vs Humidity")
plt.grid()

#store new fig
plt.savefig('../output_data/Fig2.png')

#show
plt.show

## Latitude vs. Cloudiness Plot

In [None]:
#Create scatter plot
plt.scatter(update["Lat"], update["Cloudiness"], facecolors="blue", edgecolors='black')
plt.ylabel("Cloudiness (%)")
plt.xlabel('Latitude')
plt.title("City Latitude Vs Cloudiness")
plt.grid()

#store new fig
plt.savefig('../output_data/Fig3.png')

#show
plt.show

## Latitude vs. Wind Speed Plot

In [None]:
#Create scatter plot
plt.scatter(update["Lat"], update["Wind Speed"], facecolors="blue", edgecolors='black')
plt.ylabel("Wind Speed (mph)")
plt.xlabel('Latitude')
plt.title("City Latitude Vs Wind Speed")
plt.grid()

#store new fig
plt.savefig('../output_data/Fig4.png')

#show
plt.show

## Linear Regression

In [None]:
#setting hemispheres
north = update.loc[update['Lat'] >= 0, :]
south = update.loc[update['Lat'] <= 0, :]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(north["Lat"], north["Max Temp"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(north["Lat"], north["Max Temp"])
values = (north["Lat"] * slope) + yIntercept
plt.plot(north["Lat"], values, 'r')
line = f'y = {slope:.2f}x + {yIntercept:.2f}'
plt.annotate(line,(10,5), fontsize=12, color='r')

#Label
plt.ylabel("Max Temperature (C)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(south["Lat"], south["Max Temp"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(south["Lat"], south["Max Temp"])
values = (south["Lat"] * slope) + yIntercept
plt.plot(south["Lat"], values, 'r')
line = f'y = {slope:.2f}x + {yIntercept:.2f}'
plt.annotate(line,(-50,30), fontsize=12, color='r')

#Label
plt.ylabel("Max Temperature (C)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(north["Lat"], north["Humidity"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(north["Lat"], north["Humidity"])
values = (north["Lat"] * slope) + yIntercept
plt.plot(north["Lat"], values, 'r')
line = f'y = {slope:.2f}x + {yIntercept:.2f}'
plt.annotate(line,(50,5), fontsize=12, color='r')

#Label
plt.ylabel("Humidity (%)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(south["Lat"], south["Humidity"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(south["Lat"], south["Humidity"])
values = (south["Lat"] * slope) + yIntercept
plt.plot(south["Lat"], values, 'r')
line = f'y = {slope:.2f}x + {yIntercept:.2f}'
plt.annotate(line,(-50,20), fontsize=12, color='r')

#Label
plt.ylabel("Humidity (%)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(north["Lat"], north["Cloudiness"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(north["Lat"], north["Cloudiness"])
values = (north["Lat"] * slope) + yIntercept
plt.plot(north["Lat"], values, 'r')
line = f'y = {slope:.2f}x + {yIntercept:.2f}'
plt.annotate(line,(15,50), fontsize=12, color='r')

#Label
plt.ylabel("Cloudiness (%)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(south["Lat"], south["Cloudiness"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(south["Lat"], south["Cloudiness"])
values = (south["Lat"] * slope) + yIntercept
plt.plot(south["Lat"], values, 'r')
line = f'y = {slope:.2f}x + {yIntercept:.2f}'
plt.annotate(line,(-55,20), fontsize=12, color='r')

#Label
plt.ylabel("Cloudiness (%)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(north["Lat"], north["Wind Speed"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(north["Lat"], north["Wind Speed"])
values = (north["Lat"] * slope) + yIntercept
plt.plot(north["Lat"], values, 'r')
line = f'y = {slope:.4f}x + {yIntercept:.2f}'
plt.annotate(line,(15,12), fontsize=12, color='r')

#Label
plt.ylabel("Wind Speed (mph)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Create scatter plot
plt.scatter(south["Lat"], south["Wind Speed"], facecolors="blue", edgecolors='black')

#Create line
(slope, yIntercept, rvalue, pvalue, sterr) = linregress(south["Lat"], south["Wind Speed"])
values = (south["Lat"] * slope) + yIntercept
plt.plot(south["Lat"], values, 'r')
line = f'y = {slope:.2f}x + {yIntercept:.2f}'
plt.annotate(line,(-25,11), fontsize=12, color='r')

#Label
plt.ylabel("Wind Speed (mph)")
plt.xlabel('Latitude')

#Show and print r-value
plt.show
print(f'The r-value is: {rvalue}')

## Analysis

1. Latitude has the most effect on Maximum temperature and very little on other variables like wind speed and cloudiness.

2. Cloudiness is the least associated with Latitude proven by very low R-Values.