# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Import dependecies
import requests
import pandas as pd
import numpy as np
from citipy import citipy
from api_keys import weather_api_key
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

%reload_ext lab_black

# Output File (CSV)

## Generate Cities List

In [2]:
# Create list of 500+ cities
lat_list = np.random.uniform(-90, 90, 1500)
lng_list = np.random.uniform(-180, 180, 1500)

# combine list to create coodinates
coords_list = list(zip(lat_list, lng_list))

In [3]:
cities = []
countries = []

for coords in coords_list:

    city = citipy.nearest_city(coords[0], coords[1]).city_name
    country = citipy.nearest_city(coords[0], coords[1]).country_code

    if city not in cities:
        cities.append(city)

len(cities)

603

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
def extract_data(data):
    return {
        "name": data["name"],
        "lat": data["coord"]["lat"],
        "lng": data["coord"]["lon"],
        "max_temp_f": data["main"]["temp_max"],
        "humidity": data["main"]["humidity"],
        "cloudiness": data["clouds"]["all"],
        "windspeed": data["wind"]["speed"],
        "country": data["sys"]["country"],
        "date": data["dt"],
    }

In [None]:
# create empty list for appending extracted data
city_results = []

# set beginning print statments for output
print("Beginning Data Retrieval")
print("-" * 25)

# set up record counter for output
set_count = 1
record_counter = 1

# create for loop to loop over random city list
for i, city in enumerate(cities):

    # set up record counter for print log
    if i > 500:
        
        record_counter += 1

    # set up request and include try/except
    try:
        print(f"Processing Record {record_counter} | {city}")
        
        record_counter += 1

        data = requests.get(
            url="http://api.openweathermap.org/data/2.5/weather",
            params={"q": city, "appid": weather_api_key, "units": "imperial"},
        ).json()

        cities_result = extract_data(data)

        city_results.append(cities_result)

    except KeyError:
        print("City Not Found. Skipping ...")
        pass

print("-" * 25)
print("Data Retrieval Complete")
print("-" * 25)

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [11]:
# convert data to a data frame
city_df = pd.DataFrame(city_results)

# export df to CSV
city_df.to_csv("cities.csv", index=False)

# show dataframe
city_df.head(20)

Unnamed: 0,name,lat,lng,max_temp_f,humidity,cloudiness,windspeed,country,date
0,Hambantota,6.12,81.12,80.46,80,100,14.27,LK,1600983904
1,Inírida,3.87,-67.92,81.39,79,95,1.68,CO,1600983368
2,Nanortalik,60.14,-45.24,38.77,72,100,27.47,GL,1600983696
3,Saldanha,-33.01,17.94,57.2,76,27,10.29,ZA,1600983905
4,Ushuaia,-54.8,-68.3,50.0,46,40,21.92,AR,1600983662
5,Cape Town,-33.93,18.42,60.01,82,75,14.99,ZA,1600983519
6,Dingle,11.0,122.67,75.06,89,100,3.33,PH,1600983550
7,Butaritari,3.07,172.79,81.9,74,95,16.78,KI,1600983905
8,Lüderitz,-26.65,15.16,58.15,83,5,3.65,,1600983142
9,Albany,42.6,-73.97,73.0,59,100,2.21,US,1600983881


In [10]:
city_df.describe()

Unnamed: 0,lat,lng,max_temp_f,humidity,cloudiness,windspeed,date
count,551.0,551.0,551.0,551.0,551.0,551.0,551.0
mean,19.04804,12.171742,65.386425,71.560799,47.705989,9.576897,1600984000.0
std,33.218618,88.598774,16.239951,20.063704,36.918642,32.897113,218.3301
min,-54.8,-179.17,17.11,8.0,0.0,0.13,1600983000.0
25%,-8.46,-62.375,53.6,61.5,10.5,3.89,1600984000.0
50%,21.12,16.83,68.0,77.0,41.0,6.93,1600984000.0
75%,45.86,80.405,77.215,87.0,86.5,10.625,1600984000.0
max,78.22,179.32,106.0,100.0,100.0,767.49,1600984000.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [7]:
#  Get the indices of cities that have humidity over 100%.

In [8]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression