# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

#Weather URL
weather_url = "http://api.openweathermap.org/data/2.5/weather?"

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
class Keys:
    is_fetch_ok = "Is_Fetch_Ok"
    city_id     = "City_ID"
    city        = "City"
    cloudiness  = "Cloudiness"
    country     = "Country"
    date        = "Date"
    humidity    = "Humidity"
    lat         = "Lat"
    lng         = "Lng"
    max_temp    = "Max Temp"
    wind_speed  = "Wind Speed"   

## Generate Cities List

In [3]:

# List for holding lat_lngs and cities
lat_lngs     = []

#Keeping track of processed cities
cities       = []

#Stores all weather-relevant data for a given set of coordinates
weather_container = []

#************************************#
#*            Test Vars             *#
#************************************#
geographical_span = 15 #was 1500 TODO change back
is_test = True     #TODO remove
test_humidity = 70 #TODO remove
#************************************#
#*            Test Vars             *#
#************************************#

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=geographical_span)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=geographical_span)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    
    city_name    = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    country_code = citipy.nearest_city(lat_lng[0], lat_lng[1]).country_code

    # If the city is unique, then add it to 'cities'
    if city_name not in cities:
        
        weather_hash  = {
                             Keys.is_fetch_ok : False,
                             Keys.city_id     : None, 
                             Keys.city        : city_name, 
                             Keys.cloudiness  : None, 
                             Keys.country     : country_code.upper(),
                             Keys.date        : None,
                             Keys.humidity    : None,
                             Keys.lat         : lat_lng[0],
                             Keys.lng         : lat_lng[1],
                             Keys.max_temp    : None,
                             Keys.wind_speed  : None
                        }        
        
        weather_container.append(weather_hash)
        cities.append(city_name)
        
# Print the city count to confirm (sufficient) count
print(f"The pseudo-random number of unique cities in this unit of execution is {len(cities)}")

The pseudo-random number of unique cities in this unit of execution is 15


In [4]:
def print_city_log(status_code : int, 
                   city_name   : str       = None, 
                   rec_set     : int       = None, 
                   success_fetch_cntr :int = None) -> None:
    """
    Prints running log of city number and name, if and only if, if the city contains 500+ people, otherwise
    the city name will not be printed

    :param int status_code: The status code of the Response
    :param str city_name: The city name for a given set of coordinates
    :param int rec_set: The count of the processing record set
    :param int success_fetch_cntr: The current record fetch count
    """        
    if(status_code == 200):
        print(f"Processing Record {success_fetch_cntr} of Set {rec_set} | {city_name}")
    else:
        print("City not found. Skipping...")

In [5]:
def print_line_sep() -> None:
    
    """
    Prints a line delimiter
    """       
    
    print("----------------------------")

In [6]:
def display_dataframe(data_frame,
                      reset_index_flag = False, 
                      copy_flag        = False, 
                      in_place_flag    = True) -> pd.DataFrame:
    
    """
    Returns formatted copy of given DataFrame

    :param DataFrame data_frame: The DataFrame to display
    :param Bool reset_index_flag: The recipient of the message
    :param Bool copy_flag: The body of the message
    :param Bool in_place_flag: The priority of the message, can be a number 1-5
    """  
    
    #try:
        
    df = data_frame.copy(deep = True) #Not in situ

    if reset_index_flag:
        df.reset_index(inplace=in_place_flag)

    #df.astype({Keys.city_id:     int}, copy=copy_flag) #.set_index(city_id)
    #df.astype({Keys.humidity:    int}, copy=copy_flag) 
    #df.astype({Keys.cloudiness:  int}, copy=copy_flag) 
    
    df = df.astype({Keys.city_id:     int, 
                    Keys.humidity:    int,
                    Keys.cloudiness:  int,
                    Keys.date:        str
                   })
    
    df = df.astype({Keys.lat: float}).round(2)
    df = df.astype({Keys.lng: float}).round(2)

    df = df[Keys.date].replace('.0', '') 
    
    df.head(20)

    #Source: https://stackoverflow.com/questions/19738169/convert-column-of-date-objects-in-pandas-dataframe-to-strings  
    #Source: https://stackoverflow.com/questions/1759455/how-can-i-account-for-period-am-pm-using-strftime
    #https://pandas.pydata.org/docs/reference/api/pandas.to_datetime.html

    #try:
        #df[Keys.date] = pd.to_datetime(df[Keys.date], unit='s')

    #except Exception as e:
        #print(f"Exception occured in display_dataframe on line and the exception is {e.__str__()}")

#except Exception as e:
    #print(f"EXCEPTION    {e.__str__()}")
             
    #return df[[Keys.city,
               #Keys.lat,
               #Keys.lng,
              # Keys.max_temp,
              # Keys.humidity,
              # Keys.cloudiness,
              # Keys.wind_speed,
               #Keys.country,
               #Keys.date]]  

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [7]:
# Build query URL
  
#out_cols = ["City_ID","Cloudiness","Country","Date","Humidity","Lat","Lng","Max Temp","Wind Speed"]

total_success_fetch_cntr = 0
rec_set_cntr = 0
set_cntr = 1

print("Beginning Data Retrieval")
print_line_sep()

for i in range(len(weather_container)):
    
    weather_hash = weather_container[i]
    city_name    = weather_hash[Keys.city]
    weather_query_url = weather_url + "appid=" + weather_api_key + "&q=" + city_name
    
    if (i + 1) % 60 != 0:
        
        weather_response  = requests.get(weather_query_url)
        status_code       = weather_response.status_code

        if status_code == 200 :

            weather_json = weather_response.json()
            
            rec_set_cntr += 1
            
            weather_hash[Keys.is_fetch_ok] = True
            weather_hash[Keys.city_id]     = total_success_fetch_cntr #id starts at 0 not 1
            total_success_fetch_cntr += 1
            
            try:
                
                #weather_hash[date]        = "" if weather_json       == None  or \
                                                  #weather_json["dt"] == ""    or \
                                                  #weather_json["dt"] == None else str(weather_json["dt"])
                
                weather_hash[Keys.date]        = weather_json["dt"]
                
                weather_hash[Keys.max_temp]    = weather_json["main"]["temp_max"]
                weather_hash[Keys.humidity]    = weather_json["main"]["humidity"]      
                weather_hash[Keys.wind_speed]  = weather_json["wind"]["speed"]
                weather_hash[Keys.cloudiness]  = weather_json["clouds"]["all"]
                
                if is_test:
                    time.sleep(1)              
            
            except IndexError as ie:
                print(f"Index Exception occured {ie}")
                break
                
            except Exception as e:
                print(f"Index Exception occured {e}")
                break                
                
            else:
                      
                print_city_log(status_code,
                               city_name,
                               set_cntr,
                               rec_set_cntr)
                
        else:
            print_city_log(status_code)
    
    else:
        set_cntr    += 1
        rec_set_cntr = 0
        time.sleep(60)

print_line_sep()
print("Data Retrieval Complete")
print_line_sep()


Beginning Data Retrieval
----------------------------
Processing Record 1 of Set 1 | marzuq
City not found. Skipping...
Processing Record 2 of Set 1 | mataura
Processing Record 3 of Set 1 | yellowknife
Processing Record 4 of Set 1 | albany
Processing Record 5 of Set 1 | mirabad
Processing Record 6 of Set 1 | tshikapa
Processing Record 7 of Set 1 | hobart
Processing Record 8 of Set 1 | new norfolk
City not found. Skipping...
Processing Record 9 of Set 1 | narsaq
Processing Record 10 of Set 1 | puerto ayora
Processing Record 11 of Set 1 | camacha
Processing Record 12 of Set 1 | sayyan
Processing Record 13 of Set 1 | rajpipla
----------------------------
Data Retrieval Complete
----------------------------


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [8]:
# Put data into DataFrame
raw_results = pd.DataFrame(weather_container)
fetched_results_df = raw_results.loc[raw_results[Keys.is_fetch_ok] == True][[Keys.city_id,
                                                                             Keys.city,
                                                                             Keys.cloudiness,
                                                                             Keys.country,
                                                                             Keys.date,
                                                                             Keys.humidity,
                                                                             Keys.lat,
                                                                             Keys.lng,
                                                                             Keys.max_temp,
                                                                             Keys.wind_speed]].set_index(Keys.city_id)

#Export the DataFrame to CSV
fetched_results_df.to_csv(output_data_file,
                          header=True,
                          mode='w',
                          index=True)

#Display the DataFrame
df = display_dataframe(fetched_results_df,
                       reset_index_flag = True,
                       copy_flag        = False,
                       in_place_flag    = True)

df.head(10)


#display_dataframe = fetched_results_df.copy(deep = True) #Not in situ

#display_dataframe = display_dataframe.reset_index()


#display_dataframe = display_dataframe.astype({Keys.city_id: int})  #.set_index(city_id)
#df.astype({Keys.humidity:   'int'},   copy=False) 
#df.astype({Keys.cloudiness: 'int'},   copy=False) 

#display_dataframe =display_dataframe.astype({Keys.lat:float}).round(2)
#display_dataframe =display_dataframe.astype({Keys.lng:float}).round(2)

#df.astype({Keys.date: 'str'},   copy = False)
#df[Keys.date].replace('.0', '', inplace = True) 


#display_dataframe[[Keys.city_id,
                   #Keys.city,
                  # Keys.lat,
                   #Keys.lng,
                   #Keys.max_temp,
                   #Keys.humidity,
                   #Keys.cloudiness,
                   #Keys.wind_speed,
                   #Keys.country,
                   #Keys.date]].head(10) 


KeyError: "None of [Index(['City', 'Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness',\n       'Wind Speed', 'Country', 'Date'],\n      dtype='object')] are in the [index]"

In [None]:
df = None #Clean up

In [None]:
#Display discriptive fundamental statistics visualization of successfully fetched results

fetched_results_df[[Keys.city,
                    Keys.lat,
                    Keys.lng,
                    Keys.max_temp,
                    Keys.humidity,
                    Keys.cloudiness,
                    Keys.wind_speed,
                    Keys.country,
                    Keys.date]].describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.

humidity_limit = test_humidity  ##Test Humidity [Test Variable] set above
print(f"\nIdentifying records with humidity over {humidity_limit}% to remove")

excessive_humidity_df = fetched_results_df.loc[fetched_results_df[humidity] > humidity_limit].reset_index()[[city_id,country,city,humidity]]  \
                                                                                             .astype({city_id: 'int'})                                                                       
excessive_humidity_df.head(15)

In [None]:
#Re-configure City ID type from float to int to match visualization requirement
excessive_humidity_df = excessive_humidity_df.astype({city_id: 'int'}) #.set_index(city_id)


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
idxs_to_drop = list((x for x in list(excessive_humidity_df[city_id])))

# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data = fetched_results_df.drop(fetched_results_df.index[idxs_to_drop], inplace = False )

#Display copy of dataset in a manner consistent other visualizations 
df = format_dataframe_for_display(clean_city_data,
                                  reset_index_flag = True,
                                  copy_flag        = False,
                                  in_place_flag    = True)

df.head(15)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression