# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import scipy.stats as st
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

610

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
#save config info
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial" 

#build partial query url
query_url = f"{url}appid={weather_api_key}&units={units}&q="


In [4]:
#set up empty lists to hold response info
city_name = []
lat = []
temp = []
lon = []
humidity = []
cloudiness = []
wind_speed = []
country = []
index_counter = 0

#loop through cities and request data 
for index, city in enumerate(cities, start = 1):
    try:
        response = requests.get(query_url + city).json()
        city_name.append(response["name"])
        lat.append(response["coord"]["lat"])
        temp.append(response["main"]["temp_max"])
        lon.append(response["coord"]["lon"])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        
        #keep track of records 
        index_counter = index_counter + 1
        
        print(f"Record= {index_counter}: {city}")
        
    except(KeyError,IndexError):
        print('City not found.')
        

        


City not found.
City not found.
Record= 1: talnakh
Record= 2: asau
Record= 3: bundaberg
Record= 4: punta arenas
Record= 5: maltahohe
Record= 6: hami
Record= 7: busselton
Record= 8: odlabari
Record= 9: yellowknife
Record= 10: atuona
Record= 11: vaini
Record= 12: qom
Record= 13: port alfred
City not found.
Record= 14: lubao
Record= 15: cabo san lucas
Record= 16: bambous virieux
Record= 17: cape town
Record= 18: rikitea
Record= 19: souillac
Record= 20: mataura
Record= 21: dejen
Record= 22: farafangana
Record= 23: marawi
Record= 24: la ronge
Record= 25: kodiak
Record= 26: ruteng
Record= 27: seoul
Record= 28: sao miguel do araguaia
Record= 29: ayan
Record= 30: hithadhoo
Record= 31: jamestown
Record= 32: san patricio
Record= 33: yerbogachen
Record= 34: srednekolymsk
Record= 35: kaele
Record= 36: nikolskoye
Record= 37: manokwari
Record= 38: ponta do sol
Record= 39: kapaa
Record= 40: fort-shevchenko
Record= 41: tuatapere
Record= 42: hervey bay
Record= 43: new norfolk
Record= 44: katsuura
City 

Record= 348: maniitsoq
Record= 349: yakovlevka
Record= 350: rundu
Record= 351: chapais
Record= 352: waipawa
Record= 353: port moresby
Record= 354: tateyama
Record= 355: lata
Record= 356: port-gentil
Record= 357: eureka
Record= 358: pangai
Record= 359: alekseyevsk
Record= 360: ixtapa
Record= 361: birao
Record= 362: aden
Record= 363: bengkulu
Record= 364: pennadam
City not found.
Record= 365: salalah
Record= 366: arkhipo-osipovka
Record= 367: husavik
Record= 368: posadas
Record= 369: slave lake
Record= 370: san quintin
City not found.
Record= 371: paris
Record= 372: mosquera
Record= 373: vejle
Record= 374: mount isa
Record= 375: carnarvon
Record= 376: alugan
Record= 377: maarianhamina
Record= 378: severobaykalsk
Record= 379: nouadhibou
Record= 380: kedrovyy
Record= 381: hasaki
Record= 382: mao
Record= 383: corn island
Record= 384: inongo
Record= 385: goderich
Record= 386: beyneu
City not found.
Record= 387: cerrik
Record= 388: makakilo city
Record= 389: kahului
Record= 390: wewak
Record=

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [6]:
#create pandas dataframe
weather_df=pd.DataFrame({"City": city_name,
                         "Country": country,
                       "Latitude": lat,
                       "Temperature": temp,
                       "Longitude": lon,
                       "Humidity": humidity,
                       "Cloudiness": cloudiness,
                       "Wind Speed": wind_speed})

#convert types to float
weather_df["Humidity"] = weather_df["Humidity"].astype(float)
weather_df["Cloudiness"] = weather_df["Cloudiness"].astype(float)

weather_df
weather_df.dtypes



City            object
Country         object
Latitude       float64
Temperature    float64
Longitude      float64
Humidity       float64
Cloudiness     float64
Wind Speed     float64
dtype: object

In [7]:
#save city data into csv file 
weather_df.to_csv(output_data_file, index=False)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#create latitude vs. Temperature scatter plot
plt.scatter(weather_df["Latitude"],weather_df["Temperature"])

#create title
plt.title("City Latitude vs. Temperature")

#create x and y axis labels
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")

plt.show()


## Latitude vs. Humidity Plot

In [None]:
#create latitude vs. Temperature scatter plot
plt.scatter(weather_df["Latitude"],weather_df["Humidity"])

#create title
plt.title("City Latitude vs. Humidity")

#create x and y axis labels
plt.xlabel("Latitude")
plt.ylabel("Humidity")

plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
#create latitude vs. Temperature scatter plot
plt.scatter(weather_df["Latitude"],weather_df["Cloudiness"])

#create title
plt.title("City Latitude vs. Cloudiness")

#create x and y axis labels
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")

plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_df["Latitude"],weather_df["Wind Speed"])

#create title
plt.title("City Latitude vs. Wind Speed")

#create x and y axis labels
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")

plt.show()

## Linear Regression

In [None]:
#create southern and northern hemisphere dataframes
northern_hem = weather_df.loc[weather_df["Latitude"] >=0]
southern_hem = weather_df.loc[weather_df["Latitude"] < 0]


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = northern_hem['Temperature']
y_values = northern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_hem['Temperature']
y_values = southern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hem['Humidity']
y_values = northern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hem['Humidity']
y_values = southern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hem['Cloudiness']
y_values = northern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hem['Cloudiness']
y_values = southern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_hem['Wind Speed']
y_values = northern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_hem['Wind Speed']
y_values = southern_hem['Latitude']

#calculate and print correlation coefficient
correlation = st.pearsonr(x_values,y_values)
print(f"The correlation between both factors is {round(correlation[0],2)}")

#calculation linear regression
(slope, intercept, rvalue, pvalue, stderr)=linregress(x_values,y_values)

regress_values = x_values * slope + intercept

line_eq = f"y = {round(slope,2)} x + {round(intercept,2)}"

#print line equation
print(line_eq)

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"g-")