# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [163]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

#Weather URL
weather_url = "http://api.openweathermap.org/data/2.5/weather?"

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [164]:
class Keys:
    is_fetch_ok = "Is_Fetch_Ok"
    city_id     = "City_ID"
    city        = "City"
    cloudiness  = "Cloudiness"
    country     = "Country"
    date        = "Date"
    humidity    = "Humidity"
    lat         = "Lat"
    lng         = "Lng"
    max_temp    = "Max Temp"
    wind_speed  = "Wind Speed"   

## Generate Cities List

In [165]:

# List for holding lat_lngs and cities
lat_lngs     = []

#Keeping track of processed cities
cities       = []

#Stores all weather-relevant data for a given set of coordinates
weather_container = []

#************************************#
#*            Test Vars             *#
#************************************#
geographical_span = 15 #was 1500 TODO change back
is_test = True     #TODO remove
test_humidity = 70 #TODO remove
#************************************#
#*            Test Vars             *#
#************************************#

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=geographical_span)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=geographical_span)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    
    city_name    = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    country_code = citipy.nearest_city(lat_lng[0], lat_lng[1]).country_code

    # If the city is unique, then add it to 'cities'
    if city_name not in cities:
        
        weather_hash  = {
                             Keys.is_fetch_ok : False,
                             Keys.city_id     : None, 
                             Keys.city        : city_name, 
                             Keys.cloudiness  : None, 
                             Keys.country     : country_code.upper(),
                             Keys.date        : None,
                             Keys.humidity    : None,
                             Keys.lat         : lat_lng[0],
                             Keys.lng         : lat_lng[1],
                             Keys.max_temp    : None,
                             Keys.wind_speed  : None
                        }        
        
        weather_container.append(weather_hash)
        cities.append(city_name)
        
# Print the city count to confirm (sufficient) count
print(f"The pseudo-random number of unique cities in this unit of execution is {len(cities)}")

The pseudo-random number of unique cities in this unit of execution is 15


In [166]:
#Function to print running log of city number and name.  Will print city name if the city contains 500+ people, otherwise
#the city name will not be printed

def print_city_log(status_code, 
                   city_name          = None, 
                   rec_set            = None, 
                   success_fetch_cntr = None):
    
    if(status_code == 200):
        print(f"Processing Record {success_fetch_cntr} of Set {rec_set} | {city_name}")
    else:
        print("City not found. Skipping...")
        
def print_line_sep():
    print("----------------------------")

In [167]:
def display_dataframe(data_frame,
                      reset_index_flag = False, 
                      copy_flag        = False, 
                      in_place_flag    = True) -> pd.DataFrame:
    
    """
    Returns formatted copy of given DataFrame

    :param DataFrame data_frame: The DataFrame to display
    :param Bool reset_index_flag: The recipient of the message
    :param Bool copy_flag: The body of the message
    :param Bool in_place_flag: The priority of the message, can be a number 1-5
    """    
    df = data_frame.copy(deep = True) #Not in situ
    
    if reset_index_flag:
        df.reset_index(inplace=in_place_flag)
    
    df.astype({Keys.city_id:    'int'},   copy=copy_flag) #.set_index(city_id)
    df.astype({Keys.humidity:   'int'},   copy=copy_flag) 
    df.astype({Keys.cloudiness: 'int'},   copy=copy_flag) 
    df.astype({Keys.lat:        'float'}, copy=copy_flag).round(2)
    df.astype({Keys.lng:        'float'}, copy=copy_flag).round(2)
    df.astype({Keys.date:       'str'})
    df[date].replace('.0', '', inplace=in_place_flag) 
    
    return df[[Keys.city,
               Keys.lat,
               Keys.lng,
               Keys.max_temp,
               Keys.humidity,
               Keys.cloudiness,
               Keys.wind_speed,
               Keys.country,
               Keys.date]]  

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [168]:
# Build query URL
  
#out_cols = ["City_ID","Cloudiness","Country","Date","Humidity","Lat","Lng","Max Temp","Wind Speed"]

total_success_fetch_cntr = 0
rec_set_cntr = 0
set_cntr = 1

print("Beginning Data Retrieval")
print_line_sep()

for i in range(len(weather_container)):
    
    weather_hash = weather_container[i]
    city_name    = weather_hash[city]
    weather_query_url = weather_url + "appid=" + weather_api_key + "&q=" + city_name
    
    if (i + 1) % 60 != 0:
        
        weather_response  = requests.get(weather_query_url)
        status_code       = weather_response.status_code

        if status_code == 200 :

            weather_json = weather_response.json()
            
            rec_set_cntr += 1
            
            weather_hash[is_fetch_ok] = True
            weather_hash[city_id]     = total_success_fetch_cntr #id starts at 0 not 1
            total_success_fetch_cntr += 1
            
            try:
                
                #weather_hash[date]        = "" if weather_json       == None  or \
                                                  #weather_json["dt"] == ""    or \
                                                  #weather_json["dt"] == None else str(weather_json["dt"])
                
                weather_hash[date]        = weather_json["dt"]
                
                weather_hash[max_temp]    = weather_json["main"]["temp_max"]
                weather_hash[humidity]    = weather_json["main"]["humidity"]      
                weather_hash[wind_speed]  = weather_json["wind"]["speed"]
                weather_hash[cloudiness]  = weather_json["clouds"]["all"]
                
                if is_test:
                    time.sleep(1)              
            
            except IndexError as ie:
                print(f"Index Exception occured {ie}")
                break
                
            except Exception as e:
                print(f"Index Exception occured {e}")
                break                
                
            else:
                      
                print_city_log(status_code,
                               city_name,
                               set_cntr,
                               rec_set_cntr)
                
        else:
            print_city_log(status_code)
    
    else:
        set_cntr    += 1
        rec_set_cntr = 0
        time.sleep(60)

print_line_sep()
print("Data Retrieval Complete")
print_line_sep()


Beginning Data Retrieval
----------------------------
Processing Record 1 of Set 1 | pitimbu
Processing Record 2 of Set 1 | palana
Processing Record 3 of Set 1 | jining
Processing Record 4 of Set 1 | jacksonville beach
Processing Record 5 of Set 1 | bathsheba
Processing Record 6 of Set 1 | chokurdakh
Processing Record 7 of Set 1 | masterton
Processing Record 8 of Set 1 | port alfred
Processing Record 9 of Set 1 | kaitangata
Processing Record 10 of Set 1 | fukue
Processing Record 11 of Set 1 | shingu
Processing Record 12 of Set 1 | umm lajj
Processing Record 13 of Set 1 | cape town
Processing Record 14 of Set 1 | sept-iles
Processing Record 15 of Set 1 | faanui
----------------------------
Data Retrieval Complete
----------------------------


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [169]:
# Put data into DataFrame
raw_results = pd.DataFrame(weather_container)
fetched_results_df = raw_results.loc[raw_results["Is_Fetch_Ok"] == True][[city_id,
                                                                       city,
                                                                       cloudiness,
                                                                       country,
                                                                       date,
                                                                       humidity,
                                                                       lat,
                                                                       lng,
                                                                       max_temp,
                                                                       wind_speed]].set_index(city_id)

#Export the DataFrame to CSV
fetched_results_df.to_csv(output_data_file,header=True,mode='w',index=True)

#Display the DataFrame
df = format_dataframe_for_display(display_df_1,
                                  reset_index_flag = True,
                                  copy_flag        = False,
                                  in_place_flag    = True)
df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,omboue,-7.23764,2.00953,297.7,89,93,3.46,GA,1635289234
1,kaeo,-28.743817,172.852828,292.77,69,100,2.64,NZ,1635289236
2,chokurdakh,80.156437,142.638173,252.63,97,97,2.21,RU,1635288900
3,vila,-14.919762,174.951315,283.11,87,36,1.37,VU,1635289238
4,qaanaaq,81.148517,-83.861294,265.88,95,100,6.34,GL,1635289239
5,mahibadhoo,3.575889,73.008867,300.72,77,97,5.07,MV,1635289241
6,andra,63.88943,66.498042,269.33,60,50,5.56,RU,1635289242
7,labuhan,-18.288568,95.978345,299.68,80,93,4.13,ID,1635289217
8,upernavik,82.540469,-48.016259,263.99,48,34,2.31,GL,1635288972
9,hermanus,-87.056943,-13.067097,292.54,82,100,7.44,ZA,1635289245


In [170]:
df = None #Clean up

In [171]:
#Display discriptive fundamental statistics visualization of successfully fetched results

fetched_results_df[[city,
                    lat,
                    lng,
                    max_temp,
                    humidity,
                    cloudiness,
                    wind_speed,
                    country,
                    date]].describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,15.0,15.0,15.0,15.0,15.0,15.0,15.0
mean,9.80792,47.917654,288.855333,69.066667,54.4,3.450667,1635291000.0
std,43.31669,108.049127,12.645419,23.389456,43.124073,2.915073,115.2827
min,-57.489756,-157.093257,252.89,1.0,0.0,1.03,1635291000.0
25%,-28.237931,-40.860525,288.195,59.5,12.5,2.125,1635291000.0
50%,25.649093,38.206184,290.86,69.0,75.0,2.39,1635291000.0
75%,38.185925,142.125628,298.33,85.0,95.5,3.495,1635291000.0
max,74.401977,175.604738,301.49,97.0,100.0,12.39,1635291000.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [172]:
#  Get the indices of cities that have humidity over 100%.

humidity_limit = test_humidity  ##Test Humidity [Test Variable] set above
print(f"\nIdentifying records with humidity over {humidity_limit}% to remove")

excessive_humidity_df = fetched_results_df.loc[fetched_results_df[humidity] > humidity_limit].reset_index()[[city_id,country,city,humidity]]  \
                                                                                             .astype({city_id: 'int'})                                                                       
excessive_humidity_df.head(15)


Identifying records with humidity over 70% to remove


Unnamed: 0,City_ID,Country,City,Humidity
0,0,BR,pitimbu,73
1,1,RU,palana,96
2,5,RU,chokurdakh,97
3,7,ZA,port alfred,86
4,9,JP,fukue,88
5,13,CA,sept-iles,84
6,14,PF,faanui,77


In [173]:
#Re-configure City ID type from float to int to match visualization requirement
excessive_humidity_df = excessive_humidity_df.astype({city_id: 'int'}) #.set_index(city_id)


In [174]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
idxs_to_drop = list((x for x in list(excessive_humidity_df[city_id])))

# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data = fetched_results_df.drop(fetched_results_df.index[idxs_to_drop], inplace = False )

#Display copy of dataset in a manner consistent other visualizations 
df = format_dataframe_for_display(clean_city_data,
                                  reset_index_flag = True,
                                  copy_flag        = False,
                                  in_place_flag    = True)

df.head(15)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,jining,42.346207,113.479359,287.04,1,31,2.28,CN,1635291330
1,jacksonville beach,30.268739,-80.984579,298.18,54,20,3.6,US,1635291079
2,bathsheba,13.619641,-51.51544,301.49,69,20,6.69,BB,1635291088
3,masterton,-41.028506,175.604738,290.86,57,100,2.39,NZ,1635291335
4,kaitangata,-54.574744,174.507564,289.38,68,90,2.28,NZ,1635291337
5,shingu,32.510137,137.460734,289.7,67,98,1.04,JP,1635291055
6,umm lajj,25.649093,38.206184,299.21,57,1,2.53,SA,1635291107
7,cape town,-57.489756,2.425422,292.66,62,0,1.34,ZA,1635291312


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression