# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
pwd

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import datetime
from datetime import date, datetime
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "Weather_File/weather_API_data.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
# Will need fo(r date stamps on plots and the text files for documentation purposes
today = (date.today()).strftime("%m/%d/%y")
print("Today's date:", today)

In [None]:
# For total amount of queries listed on data set. 
equivalent = len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
#  URL
url="http://api.openweathermap.org/data/2.5/weather?"

# Will look like http://api.openweathermap.org/data/2.5/weather?appid={your api key}&units=imperial&q={city name}
query_url=url+"appid="+ weather_api_key+"&units=imperial"+"&q=" #+city

# Make log for cities with ID as text
printlog = open('Weather_File/weather_data_log.txt', 'w')

In [None]:
# # Check for unambigous data recommended by Weather Map API and display json format
# city_id = input("id: ") # for various id's for unambigous data

# # Look's like api.openweathermap.org/data/2.5/weather?id={city id}&appid={your api key}
# id_url= url + "id=" + city_id + "&appid="+ weather_api_key

# json_data = requests.get(id_url).json()
# json_data

In [None]:
# # Save for later use when restarting kernels
# wid = int(json_data["id"])
# wid

In [None]:
#create row counter and lists
counter=0
set_num=1
cid = 0
weather_list=[]

# # city_ids = [] 
# clouds = []
# country = []
# wdate = []
# humidity = []
# latitude = []
# longitude = []
# max_temp = []
# wind_speed = []
# city_list=[]


In [None]:
print(f"Start Data Retrieval on {today} estimated to be {equivalent}")
printlog.write(f"Start Data Retrieval on {today} estimated to be {equivalent}")
print('-'*40 + "\n")

# call = query_url + city # create variable to update city in query!

for city in cities:
    # To append city Id's because it is an integer value
    call = query_url + city # create variable to update city in query!
    response = (requests.get(call)).json()

#     # Want to make sure the data has enough time to properly be appended by 1 second
#     time.sleep(1)
    
    # update record number and set number / note: easier to add to match to length of cities
    if counter>=50: 
        counter=1
        set_num+=1
        
    else:
        counter+=1

        
#     # For Id statement    
#     cid = int(response['id'])

    
#     # Use variables to prevent list from being printed, group by 50, also see url changes!   
#     print(f"Currently Processing record number {counter} of Set {set_num} | {city} with City ID: {cid}") #| the query URL: {call}\n")

    
#     # Want to see documentation of all cities with ID in the weather map API as a text file for back up. 
#     printlog.write(f"Currently Processing record number {counter} of Set {set_num} | {city} with City ID: {cid}) #| the query URL: {call}\n") 
#     pass  

    #
    try:
        
        #create dictionary within a list to append data from openweather
        weather_list =({"City":response["name"],
                        "Country":response['sys']['country'],
                         "Latitude":response['coord']['lat'],
                         "Longitude":response['coord']['lon'],
                         "Date":response['dt'],
                         "Cloudiness":response['clouds']['all'],
                         "Humidity":response['main']['humidity'],
                         "Max Tempature":response['main']['temp_max'],
                         "Wind Speed":response['wind']['speed']})
        

#         # The list appends the values and append response to print out on to datframe
#         #city_ids.append(cid)
#         country.append(response['sys']['country'])
#         latitude.append(response['coord']['lat'])
#         longitude.append(response['coord']['lon'])
        
#         # wdate.append(datetime.fromtimestamp(response['dt']).strftime('%m/%d/%y')) #Format date????
#         wdate.append(response['dt'])
#         clouds.append(response['clouds']['all'])
#         humidity.append(response['main']['humidity'])
#         max_temp.append(response['main']['temp_max'])
#         wind_speed.append(response['wind']['speed'])
#         city_list.append(city) 
        
        # For Id statement    
        cid = int(response['id'])

    
        # Use variables to prevent list from being printed, group by 50, also see url changes!   
        print(f"Currently Processing record number {counter} of Set {set_num} | {city} with City ID: {cid}") #| the query URL: {call}\n")

    
        # Want to see documentation of all cities with ID in the weather map API as a text file for back up. 
        printlog.write(f"Currently Processing record number {counter} of Set {set_num} | {city} with City ID: {cid}) #| the query URL: {call}\n") 
        pass  
        
    except:
        print(f"City not found. Skipping... \n")
        
         # Want to see documentation of all cities with ID not in the weather map API.
        printlog.write(f"City not found. Skipping... \n")
        
#         # "City ID" : city_ids,        # Append null values so lists are all the same length (later we will remove nulls from dataframe)
#         #city_ids.append(None)
#         city_list.append(None)
#         country.append(None)
#         latitude.append(None)
#         longitude.append(None)
#         clouds.append(None)
#         wdate.append(None)
#         humidity.append(None)
#         max_temp.append(None)
#         wind_speed.append(None)

  
print('-'*40 + '\n')
print("Process complete ... \n")
# Close file after loop
printlog.close()

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Consistent data to work with now in dataframe then count for accuracy: number = len(cities)
city_df = pd.DataFrame(weather_list)
city_df.count()

In [None]:
city_df.count()

In [None]:
# I appeneded everything regardless of value to make sure it went through all city names, now drop
city_df = city_df.dropna()

In [None]:
# Total rows per column after drop if any
city_df.count()

In [None]:
city_df.head()
city_df["Date"].strftime('%m/%d/%y')

In [None]:
# Consistent data to work with, in case anything happens, process runs for too long
city_df.to_csv(output_data_file)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%
h = city_df['Humidity']
h.max() # if value greater than 100 then write code below

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

In [None]:
# Extract relevant fields from the data frame


# Export the City_Data into a csv if humidity above 100
# city_df.to_csv("Weather_File/clean_weather.csv")

In [None]:
# Create function 

def linear_regression (x, y, title)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
plt.scatter(city_df["Latitude"], city_df["Max Tempature"] ,edgecolors="black",facecolors="steelblue")
plt.title(f"City Latitude vs. Max Temperature {today}")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")

In [None]:
plt.savefig('Weather_File/Latitude_vs_Temperature Plot')

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(city_df["Latitude"],city_df["Humidity"],edgecolors="black",facecolors="steelblue")
plt.title(f"City Latitude vs. Humidity {today}")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")

In [None]:
plt.savefig("Weather_File/Lat_vs_Humidity.png")

## Latitude vs. Cloudiness Plot

In [None]:

plt.scatter(city_df["Latitude"],city_df["Cloudiness"],edgecolors="black",facecolors="steelblue")
plt.title(f"City Latitude vs. Cloudiness {today}")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")
plt.savefig("Weather_File/LatvsCloudiness.png")


In [None]:
# Save the figure
plt.savefig("Weather_File/LatitudeVsCloudiness.png")

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(city_df["Latitude"],city_df["Wind Speed"],edgecolors="black",facecolors="steelblue")
plt.title(f"City Latitude vs. Wind Speed {today}")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")

In [None]:
plt.savefig("Weather_File/LatvsWindSpeed.png")

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots
def linear_regression (X, Y, title, x_label, y_label):
    a, b, r_value, p_value, std_err  = linregress(X, Y, test_size=0.2, random_state=0)
    y = regressor.fit(X, Y)
    plt.scatter(X,Y, edgecolors="black",facecolors="steelblue", label = 'Original Data')
    plt.plot(X, y, 'r', label = f"{y}") )
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")
    plt.savefig(f"Weather_File/{title}.png")

In [None]:
# Create Northern and Southern Hemisphere DataFrames
# Latitude -90 = North, Latitude 90 = South
# Horizontal mapping lines on Earth are lines of latitude. They are known as "parallels" of latitude, because they run parallel to the equator. One simple way to visualize this might be to think about having imaginary horizontal "hula hoops" around the earth, with the biggest hoop around the equator, and then progressively smaller ones stacked above and below it to reach the North and South Poles.
north = city_df[city_df["Latitude"] > 0]
south = city_df[city_df["Latitude"] < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
linear_regression(north["Latitude"], north["Max Tempature"], "Northern Hemisphere - Max Temp vs. Latitude Linear Regression", "Latitude (North), Max Tempature" )  

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression