# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json

from citipy import citipy


# Import API key
from api_keys import weather_api_key
# weather_api_key = "72b089cef30a72ce2b488842116ac105"

## Generate Cities List

In [None]:
# Set random number seed 
np.random.seed(100)

# Create a list of random latitudes and longitudes
lats = np.random.randint(-90, 90, size=5)
longs = np.random.randint(-180, 180, size=5)

# Place these latitude and longitude lists into a DataFrame
coords = pd.DataFrame({"Latitude": lats, "Longitude": longs})

# Show preview of DataFrame
coords.head()

In [None]:
# Create empty list to append the retrieved cities
cities = []

# Create a loop to go through the coords dataframe rows and to look up the closest city from each latitude and longitude
for ind, row in coords.iterrows():
    
    # Retrieve the latitude and longitude for a row
    lat, lon = row['Latitude'], row['Longitude']
    
    # Use citipy to find closest city
    city = citipy.nearest_city(lat, lon)
    city = city.city_name
    
    # Add retrieved city to cities list
    if city not in cities:
        cities.append(city)

# # Remove any duplicate cities from list 
# cities_final = np.unique(cities).tolist()

# Print the final amount of cities retrieved
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Save config information
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Set time to pause between each loop to enable api data retrieval
time_between = 1

# Set empty lists to fill during loop
final_city = []
lat = []
long = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

# Log counter
record = 0
set = 1

# Print log of each city
print("Beginning Data Retrieval")     
print("-----------------------------")

# Create a loop to loop through each city and retrieve weather data to append to lists, pausing for a second at the end of each loop   
for city in cities:    
            
    try:
        response = requests.get(query_url + city).json()
        lat.append(response['coord']['lat'])
        long.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        
        # Add to log counter
        if record >= 50:
            record = 1
            set += 1
            final_city.append(city)
        else:
            record += 1
            final_city.append(city)
            
        print(f"Processing Record {record} of Set {set} | {city}")
        
    except KeyError:
        print("City not found. Skipping...")

    time.sleep(time_between)

print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create a dataframe from the retrieved weather data
weather_dict = {"City": final_city,
                "Lat": lat,
                "Lng": long,
                "Max. Temp": max_temp,
                "Humidity": humidity,
                "Cloudiness": cloudiness,
                "Wind Speed": wind_speed,
                "Country": country,
                "Date": date}

weather_data_df = pd.DataFrame(weather_dict)

# Show preview of weather dataframe
weather_data_df.head()

In [None]:
weather_data_df.count()

In [None]:
# Create a summary statistics table frome the weather_data dataframe
weather_data_df[['Lat', 'Lng', 'Max. Temp', 'Humidity', 'Cloudiness', 'Wind Speed', 'Date']].describe()

In [None]:
# Save weather dataframe as a csv
weather_data_df.to_csv(r'output_data\weather_data.csv', encoding="utf-8", index=False)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
cleaned_weather_data_df = weather_data_df.query("Humidity < 100")
cleaned_weather_data_df.head()

In [None]:
high_humidity_df = weather_data_df.query("Humidity >= 100")
high_humidity_df.head()

In [None]:
#  Get the indices of cities that have humidity over 100%
indices = weather_data_df[weather_data_df['Humidity']>=100].index.tolist()
indices


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data_df = weather_data_df.drop(indices, inplace=False)
clean_city_data_df

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Set date x and y axis for scatter plot of Latitude vs. Max Temperature
x_axis = clean_city_data_df["Lat"]
y_axis = clean_city_data_df["Max. Temp"]

# Create scatter plot
plt.scatter(x_axis, y_axis)

# Add title and labels to line plot
plt.title('City Latitude vs. Max Temperatue (19/04/2021)')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (C)')

# Show scatter plot
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Set date x and y axis for scatter plot of Latitude vs. Max Temperature
x_axis = clean_city_data_df["Lat"]
y_axis = clean_city_data_df["Humidity"]

# Create scatter plot
plt.scatter(x_axis, y_axis)

# Add title and labels to line plot
plt.title('City Latitude vs. Humidity (19/04/2021)')
plt.xlabel('Latitude')
plt.ylabel('Humidity')

# Show scatter plot
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Set date x and y axis for scatter plot of Latitude vs. Max Temperature
x_axis = clean_city_data_df["Lat"]
y_axis = clean_city_data_df["Cloudiness"]

# Create scatter plot
plt.scatter(x_axis, y_axis)

# Add title and labels to line plot
plt.title('City Latitude vs. Cloudiness (19/04/2021)')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')

# Show scatter plot
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Set date x and y axis for scatter plot of Latitude vs. Max Temperature
x_axis = clean_city_data_df["Lat"]
y_axis = clean_city_data_df["Wind Speed"]

# Create scatter plot
plt.scatter(x_axis, y_axis)

# Add title and labels to line plot
plt.title('City Latitude vs. Wind Speed (19/04/2021)')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')

# Show scatter plot
plt.show()

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression