# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies
import pandas as pd
import numpy as np
import requests
import json
import matplotlib.pyplot as plt
from scipy.stats import linregress
import datetime

#this function will allow me to get a list of cities which are nearest to a set of lat/longs i create
from citipy import citipy

# Google and weather API Keys
from api_keys import g_key, weather_api_key

## Generate Cities List

In [2]:
# first create a "random" set of lat/longs

lats = np.random.uniform(low = -90, high = 90, size = 750)
lngs = np.random.uniform(low = -180, high = 180, size = 750)

city = []
country = []

# now those lat/longs have to be in pairs to use the citipy function
lat_lng = zip(lats, lngs)

# run a loop calling citipy to get the nearest city to the lat/long created
for lat in lat_lng:
    city.append(citipy.nearest_city(lat[0], lat[1]).city_name)
    country.append(citipy.nearest_city(lat[0], lat[1]).country_code)

# create a list of all the data
cities_to_truncate = [city, country]   #, city_lat, city_lng

# make sure the cities are unique then cut the list to 500  
np.unique(cities_to_truncate)

cities = cities_to_truncate[0][:500]
countries = cities_to_truncate[1][:500]

# i have a list of "random" cities to use

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# create the base url and the lists which i'll be using to store data

base_url = 'http://api.openweathermap.org/data/2.5/weather?units=imperial'
city_lats = []
city_lngs = []
temps = []
humidities = []
cloudiness = []
wind_speeds = []
dates = []
count = 0

# loop through the data, call it through the api, then create lists of the needed data
# there will be cities which will not be in the weather app, so use try/except
for city in cities:
    try:
        url_string = (base_url + '&appid=' + weather_api_key + '&q=' + city)
        city_lat_lng = requests.get(url_string)
        weather_data = city_lat_lng.json()
        city_lats.append(weather_data['coord']['lat'])
        city_lngs.append(weather_data['coord']['lon'])
        temps.append(weather_data['main']['temp_max'])
        humidities.append(weather_data['main']['humidity'])
        cloudiness.append(weather_data['clouds']['all'])
        wind_speeds.append(weather_data['wind']['speed'])
        dates.append(weather_data['dt'])
        city_num = (weather_data['id'])
        print(f"City: {city}  City ID: {city_num}")

    except:
        print("City not found")
        city_lats.append(0)
        city_lngs.append(0)
        temps.append(0)
        humidities.append(101)  #make the humidity over 100 to remove the city later
        cloudiness.append(0)
        wind_speeds.append(0)
        dates.append(0)
        
# the list of cities now has all the weather details, with the cities not found marked

City not found
City: bredasdorp  City ID: 1015776
City: cape town  City ID: 3369157
City: hermanus  City ID: 3366880
City: bambous virieux  City ID: 1106677
City: filadelfia  City ID: 4560349
City not found
City: anadyr  City ID: 2127202
City: xuddur  City ID: 49747
City: leningradskiy  City ID: 2123814
City: mataura  City ID: 6201424
City not found
City: punta arenas  City ID: 3874787
City: wilmington  City ID: 4499379
City: beloha  City ID: 1067565
City: atuona  City ID: 4020109
City: ofunato  City ID: 2111530
City: hilo  City ID: 5855927
City: juneau  City ID: 5554072
City: aquiraz  City ID: 3407407
City: san cristobal  City ID: 3628473
City: porto novo  City ID: 2392087
City: mabaruma  City ID: 3377301
City: caravelas  City ID: 3466980
City: cape town  City ID: 3369157
City: saint-denis  City ID: 2980915
City: tautira  City ID: 4033557
City: constitucion  City ID: 3893726
City: nome  City ID: 5870133
City: nikolskoye  City ID: 546105
City: rosario  City ID: 3838583
City: hurricane 

City: la ronge  City ID: 6050066
City: yulara  City ID: 6355222
City: busselton  City ID: 2075265
City: ilulissat  City ID: 3423146
City: punta arenas  City ID: 3874787
City: souillac  City ID: 933995
City: sioux lookout  City ID: 6148373
City: lamar  City ID: 4705086
City: ushuaia  City ID: 3833367
City: tasiilaq  City ID: 3424607
City not found
City: atuona  City ID: 4020109
City: avera  City ID: 4231997
City: bluff  City ID: 2206939
City: albany  City ID: 5106841
City: bengkulu  City ID: 1649150
City: chernyshevskiy  City ID: 2025456
City: new norfolk  City ID: 2155415
City: mayo  City ID: 4362001
City: yellowknife  City ID: 6185377
City: port alfred  City ID: 964432
City: ushuaia  City ID: 3833367
City not found
City: flinders  City ID: 6255012
City: ewa beach  City ID: 5855051
City: carnarvon  City ID: 2074865
City: kota kinabalu  City ID: 1733432
City: albany  City ID: 5106841
City: bubaque  City ID: 2374583
City: lebu  City ID: 3883457
City: sao gabriel da cachoeira  City ID: 36

In [4]:
# keep this code if it's needed to see the structure of the json data coming back from weather
# print(json.dumps(weather_data, indent=2, sort_keys=True))

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [5]:
# create a weather data frame
#first convert unix date_time to standard

from datetime import date
date_time = []

# change the unix times to mon/day/yr format
for row in dates:
    date_time.append(date.fromtimestamp(row).strftime('%m-%d-%Y'))

city_weather = {"City": cities,
                "Lat": city_lats,
                "Lng": city_lngs,
                "High Temp": temps,
                "Humidity": humidities,
                "Cloudiness": cloudiness,
                "Wind Speed": wind_speeds,
                "Country": countries,
                "Date Time": date_time
               }
city_weather_df = pd.DataFrame(city_weather)
city_weather_df.head()


Unnamed: 0,City,Lat,Lng,High Temp,Humidity,Cloudiness,Wind Speed,Country,Date Time
0,mahaicony,0.0,0.0,0.0,101,0,0.0,gy,12-31-1969
1,bredasdorp,-34.53,20.04,57.2,76,21,9.17,za,10-12-2020
2,cape town,-33.93,18.42,60.01,87,40,3.36,za,10-12-2020
3,hermanus,-34.42,19.23,57.0,82,3,4.0,za,10-12-2020
4,bambous virieux,-20.34,57.76,71.6,88,40,10.29,mu,10-12-2020


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [6]:
#  Get the indices of cities that have humidity over 100%.
cities_more_100 = city_weather_df.loc[city_weather_df['Humidity']>100]
cities_more_100

Unnamed: 0,City,Lat,Lng,High Temp,Humidity,Cloudiness,Wind Speed,Country,Date Time
0,mahaicony,0.0,0.0,0.0,101,0,0.0,gy,12-31-1969
6,illoqqortoormiut,0.0,0.0,0.0,101,0,0.0,gl,12-31-1969
11,belushya guba,0.0,0.0,0.0,101,0,0.0,ru,12-31-1969
39,sentyabrskiy,0.0,0.0,0.0,101,0,0.0,ru,12-31-1969
58,mys shmidta,0.0,0.0,0.0,101,0,0.0,ru,12-31-1969
86,chagda,0.0,0.0,0.0,101,0,0.0,ru,12-31-1969
111,mys shmidta,0.0,0.0,0.0,101,0,0.0,ru,12-31-1969
130,illoqqortoormiut,0.0,0.0,0.0,101,0,0.0,gl,12-31-1969
135,illoqqortoormiut,0.0,0.0,0.0,101,0,0.0,gl,12-31-1969
142,illoqqortoormiut,0.0,0.0,0.0,101,0,0.0,gl,12-31-1969


In [7]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

clean_city_data = city_weather_df.loc[city_weather_df['Humidity']<=100]
clean_city_data.head()

Unnamed: 0,City,Lat,Lng,High Temp,Humidity,Cloudiness,Wind Speed,Country,Date Time
1,bredasdorp,-34.53,20.04,57.2,76,21,9.17,za,10-12-2020
2,cape town,-33.93,18.42,60.01,87,40,3.36,za,10-12-2020
3,hermanus,-34.42,19.23,57.0,82,3,4.0,za,10-12-2020
4,bambous virieux,-20.34,57.76,71.6,88,40,10.29,mu,10-12-2020
5,filadelfia,39.95,-75.16,57.99,100,90,12.75,py,10-12-2020


In [8]:
# output the csv file of all the cities which weather can find
clean_city_data.to_csv('../output_data/cities.csv')


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [9]:
# create the date of the data pull to use in the title
current_date = clean_city_data.at[0,'Date Time']

# scatter plot to observe trends
plt.scatter(clean_city_data['Lat'],clean_city_data['High Temp'],marker="o",color="navy")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.title(f"Latitude vs. Max Temperature ({current_date})")

# save the pic
plt.savefig('../output_data/lat_v_temp.png')

plt.show()

KeyError: 0

## Latitude vs. Humidity Plot

In [None]:
# scatter plot to observe trends

plt.scatter(clean_city_data['Lat'],clean_city_data['Humidity'],marker="^",color="blue")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title(f"Latitude vs. Humidity ({current_date})")

# save the pic
plt.savefig('../output_data/lat_v_humid.png')

plt.show()


## Latitude vs. Cloudiness Plot

In [None]:
# scatter plot to observe trends
plt.scatter(clean_city_data['Lat'],clean_city_data['Cloudiness'],marker="1",color="forestgreen")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title(f"Latitude vs. Cloudiness ({current_date})")

# save the pic
plt.savefig('../output_data/lat_v_cloudiness.png')

plt.show()


## Latitude vs. Wind Speed Plot

In [None]:
# scatter plot to observe trends

plt.scatter(clean_city_data['Lat'],clean_city_data['Wind Speed'],marker="2",color="darkorange")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title(f"Latitude vs. Wind Speed ({current_date})")

# save the pic
plt.savefig('../output_data/lat_v_wind_speed.png')

plt.show()


## Linear Regression

In [None]:
# start with dividing the data into northern and southern hemisphere

northern_data = clean_city_data.loc[clean_city_data['Lat'] >= 0]
southern_data = clean_city_data.loc[clean_city_data['Lat'] <= 0]
southern_data

In [None]:
# create a linear regression function which creates the plots with the call

def lin_regression(x_values, y_values, mrkr, clr, ann_x, ann_y):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x_values,y_values, marker=mrkr,color=clr)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,(ann_x,ann_y),fontsize=15,color="black")

In [None]:
# create an r-squared function which prints out the value
def r_value(x_r, y_r):
    correlation_matrix = np.corrcoef(x_r, y_r)
    correlation_xy = correlation_matrix[0,1]
    r_squared = correlation_xy**2
    print(f"r-squared value: {r_squared}")

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(northern_data['High Temp'],northern_data['Lat'])
lin_regression(northern_data['High Temp'],northern_data['Lat'], 'o', 'navy', 20, 10)

plt.xlabel('Max Temp')
plt.ylabel('Latitude')
plt.title(f"Northern Hemisphere Latitude vs. Max Temp ({current_date})")

plt.show()
#correlation betw latitude and temp is very high

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(southern_data['High Temp'],southern_data['Lat'])
lin_regression(southern_data['High Temp'],southern_data['Lat'], 'v', 'forestgreen', 70, -45)

plt.xlabel(Max Temp'')
plt.ylabel('Latitude')
plt.title(f'Southern Hemisphere Latitude vs. Max Temp ({current_date})')

plt.show()
#correlation betw latitude and temp is very high

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(northern_data['Humidity'],northern_data['Lat'])
lin_regression(northern_data['Humidity'],northern_data['Lat'], 'p', 'darkorange', 20, 75)

plt.xlabel('Humidity')
plt.ylabel('Latitude')
plt.title(f'Northern Hemisphere Latitude vs. Humidity ({current_date})')

plt.show()
#correlation betw latitude and humidity is very, very low

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(southern_data['Humidity'],southern_data['Lat'])
lin_regression(southern_data['Humidity'],southern_data['Lat'], '+', 'lightseagreen', 20, -5)

plt.xlabel('Humidity')
plt.ylabel('Latitude')
plt.title(f'Southern Hemisphere Latitude vs. Humidity ({current_date})')

plt.show()
#correlation betw latitude and humidity is very, very, very low

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(northern_data['Cloudiness'],northern_data['Lat'])
lin_regression(northern_data['Cloudiness'],northern_data['Lat'], 'd', 'navy', 0, 75)

plt.xlabel('Cloudiness')
plt.ylabel('Latitude')
plt.title(f'Northern Hemisphere Latitude vs. Cloudiness ({current_date})')

plt.show()
#correlation betw latitude and cloudiness is very, very low

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(southern_data['Cloudiness'],southern_data['Lat'])
lin_regression(southern_data['Cloudiness'],southern_data['Lat'], '^', 'forestgreen', 55, -50)

plt.xlabel('Cloudiness')
plt.ylabel('Latitude')
plt.title(f'Southern Hemisphere Latitude vs. Cloudiness ({current_date})')

plt.show()
#correlation betw latitude and cloudiness is very, very, very low

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(northern_data['Wind Speed'],northern_data['Lat'])
lin_regression(northern_data['Wind Speed'],northern_data['Lat'], 'p', 'darkorange', 30, 20)

plt.xlabel('Wind Speed')
plt.ylabel('Latitude')
plt.title(f'Northern Hemisphere Latitude vs. Wind Speed ({current_date})')

plt.show()
#correlation betw latitude and cloudiness is low

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# calculate r-squared and create linear regression plot using the r_value and lin_regression 
# function which i've created.  Create labeling below
r_value(southern_data['Wind Speed'],southern_data['Lat'])
lin_regression(southern_data['Wind Speed'],southern_data['Lat'], '+', 'lightseagreen', 20, -5)

plt.xlabel('Wind Speed')
plt.ylabel('Latitude')
plt.title(f'Southern Hemisphere Latitude vs. Wind Speed ({current_date})')

plt.show()
#correlation betw latitude and cloudiness is very, very, very low

In [None]:
# Observations:1.) Not shocking, but temp is tightly correllated with latitude.
# 2.) I was surprised that humidity has very little corellation with latitude. I would 
# have thought the closer to the equator, the more humid things are.