# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

#Weather URL
weather_url = "http://api.openweathermap.org/data/2.5/weather?"

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
#Columns (Keys)
is_fetch_ok = "Is_Fetch_Ok"
city_id     = "City_ID"
city        = "City"
cloudiness  = "Cloudiness"
country     = "Country"
date        = "Date"
humidity    = "Humidity"
lat         = "Lat"
lng         = "Lng"
max_temp    = "Max Temp"
wind_speed  = "Wind Speed"

# List for holding lat_lngs and cities
lat_lngs     = []

#Keeping track of processed cities
cities       = []

#Stores all weather-relevant data for a given set of coordinates
weather_container = []




#************************************#
#*            Test Vars             *#
#************************************#
geographical_span = 15 #was 1500 TODO change back
is_test = True     #TODO remove
test_humidity = 70 #TODO remove
#************************************#
#*            Test Vars             *#
#************************************#





# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=geographical_span)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=geographical_span)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    
    city_name    = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    country_code = citipy.nearest_city(lat_lng[0], lat_lng[1]).country_code

    # If the city is unique, then add it to 'cities'
    if city_name not in cities:
        
        weather_hash  = {
                             is_fetch_ok : False,
                             city_id     : None, 
                             city        : city_name, 
                             cloudiness  : None, 
                             country     : country_code.upper(),
                             date        : None,
                             humidity    : None,
                             lat         : lat_lng[0],
                             lng         : lat_lng[1],
                             max_temp    : None,
                             wind_speed  : None
                        }        
        
        weather_container.append(weather_hash)
        cities.append(city_name)
        
# Print the city count to confirm (sufficient) count
print(f"The pseudo-random number of unique cities in this unit of execution is {len(cities)}")

The pseudo-random number of unique cities in this unit of execution is 15


In [3]:
#Function to print running log of city number and name.  Will print city name if the city contains 500+ people, otherwise
#the city name will not be printed

def print_city_log(status_code, 
                   city_name          = None, 
                   rec_set            = None, 
                   success_fetch_cntr = None):
    
    if(status_code == 200):
        print(f"Processing Record {success_fetch_cntr} of Set {rec_set} | {city_name}")
    else:
        print("City not found. Skipping...")
        
def print_line_sep():
    print("----------------------------")

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [4]:
# Build query URL
  
#out_cols = ["City_ID","Cloudiness","Country","Date","Humidity","Lat","Lng","Max Temp","Wind Speed"]

total_success_fetch_cntr = 0
rec_set_cntr = 0
set_cntr = 1

print("Beginning Data Retrieval")
print_line_sep()

for i in range(len(weather_container)):
    
    weather_hash = weather_container[i]
    city_name    = weather_hash[city]
    weather_query_url = weather_url + "appid=" + weather_api_key + "&q=" + city_name
    
    if (i + 1) % 60 != 0:
        
        weather_response  = requests.get(weather_query_url)
        status_code       = weather_response.status_code

        if status_code == 200 :

            weather_json = weather_response.json()
            
            rec_set_cntr += 1
            
            weather_hash[is_fetch_ok] = True
            weather_hash[city_id]     = total_success_fetch_cntr #id starts at 0 not 1
            total_success_fetch_cntr += 1
            
            try:
                
                #weather_hash[date]        = "" if weather_json       == None  or \
                                                  #weather_json["dt"] == ""    or \
                                                  #weather_json["dt"] == None else str(weather_json["dt"])
                weather_hash[date]        = weather_json["dt"]
                
                
                
                
                
                weather_hash[max_temp]    = weather_json["main"]["temp_max"]
                weather_hash[humidity]    = weather_json["main"]["humidity"]      
                weather_hash[wind_speed]  = weather_json["wind"]["speed"]
                weather_hash[cloudiness]  = weather_json["clouds"]["all"]
                
                if is_test:
                    time.sleep(1)              
            
            except IndexError as ie:
                print(f"Index Exception occured {ie}")
                break
                
            except Exception as e:
                print(f"Index Exception occured {e}")
                break                
                
            else:
                      
                print_city_log(status_code,
                               city_name,
                               set_cntr,
                               rec_set_cntr)
                
        else:
            print_city_log(status_code)
    
    else:
        set_cntr    += 1
        rec_set_cntr = 0
        time.sleep(60)

print_line_sep()
print("Data Retrieval Complete")
print_line_sep()
 

Beginning Data Retrieval
----------------------------
Processing Record 1 of Set 1 | cape town
Processing Record 2 of Set 1 | rikitea
Processing Record 3 of Set 1 | new norfolk
Processing Record 4 of Set 1 | baykit
Processing Record 5 of Set 1 | rudbar
Processing Record 6 of Set 1 | srednekolymsk
Processing Record 7 of Set 1 | chokurdakh
Processing Record 8 of Set 1 | punta arenas
Processing Record 9 of Set 1 | bluff
Processing Record 10 of Set 1 | avarua
Processing Record 11 of Set 1 | hilo
Processing Record 12 of Set 1 | hithadhoo
Processing Record 13 of Set 1 | baturaja
Processing Record 14 of Set 1 | praia da vitoria
City not found. Skipping...
----------------------------
Data Retrieval Complete
----------------------------


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [5]:
# Put data into DataFrame
raw_results = pd.DataFrame(weather_container)
fetched_results = raw_results.loc[raw_results["Is_Fetch_Ok"] == True][[city_id,
                                                                       city,
                                                                       cloudiness,
                                                                       country,
                                                                       date,
                                                                       humidity,
                                                                       lat,
                                                                       lng,
                                                                       max_temp,
                                                                       wind_speed]].set_index(city_id)


#Export the DataFrame to CSV
fetched_results.to_csv(output_data_file,header=True,mode='w',index=True)

#Display the DataFrame
fetched_results = fetched_results.reset_index()
fetched_results = fetched_results.astype({city_id: 'int'}) #.set_index(city_id)
fetched_results[[city,lat,lng,max_temp,humidity,cloudiness,wind_speed,country,date]].head(15)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,cape town,-47.170951,4.615849,292.38,61.0,0.0,2.24,ZA,1635188000.0
1,rikitea,-56.245338,-135.401303,296.96,71.0,93.0,8.25,PF,1635188000.0
2,new norfolk,-77.533652,128.467365,282.87,72.0,8.0,0.89,AU,1635188000.0
3,baykit,60.535134,98.111909,271.98,95.0,100.0,3.27,RU,1635188000.0
4,rudbar,29.718048,63.041542,293.12,17.0,0.0,2.24,AF,1635188000.0
5,srednekolymsk,72.396743,155.835827,252.64,97.0,90.0,1.93,RU,1635188000.0
6,chokurdakh,85.892094,149.911058,242.84,100.0,99.0,0.43,RU,1635188000.0
7,punta arenas,-82.22783,-90.541014,280.21,49.0,40.0,10.29,CL,1635188000.0
8,bluff,-67.603138,159.907849,284.03,92.0,66.0,3.45,NZ,1635188000.0
9,avarua,-24.631129,-165.24339,297.18,64.0,73.0,6.17,CK,1635188000.0


In [6]:
#Display fundamental statistics visualization
fetched_results[[city,
                 lat,
                 lng,
                 max_temp,
                 humidity,
                 cloudiness,
                 wind_speed,
                 country,
                 date]].describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,14.0,14.0,14.0,14.0,14.0,14.0,14.0
mean,-3.01838,27.102536,284.579286,75.642857,61.428571,3.798571,1635188000.0
std,57.641155,118.85777,17.719205,22.98363,39.351892,2.970804,51.79636
min,-82.22783,-165.24339,242.84,17.0,0.0,0.43,1635188000.0
25%,-53.976741,-74.218884,280.875,65.75,25.0,2.0075,1635188000.0
50%,-4.588558,69.162796,292.75,78.5,77.0,3.18,1635188000.0
75%,39.411137,122.345233,296.7875,94.25,92.25,5.5975,1635188000.0
max,85.892094,159.907849,301.4,100.0,100.0,10.29,1635188000.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [7]:
#  Get the indices of cities that have humidity over 100%.

humidity_limit = test_humidity
print(f"\nIdentifying records with humidity over {humidity_limit}%")

excessive_humidity_df = fetched_results.loc[fetched_results[humidity] > humidity_limit].reset_index()[[city_id,country,city,humidity]] \
                                                                                       .astype({city_id: 'int'})                                                                       
excessive_humidity_df.head(15)


Identifying records with humidity over 70%


Unnamed: 0,City_ID,Country,City,Humidity
0,1,PF,rikitea,71.0
1,2,AU,new norfolk,72.0
2,3,RU,baykit,95.0
3,5,RU,srednekolymsk,97.0
4,6,RU,chokurdakh,100.0
5,8,NZ,bluff,92.0
6,10,US,hilo,87.0
7,11,MV,hithadhoo,74.0
8,12,ID,baturaja,97.0
9,13,PT,praia da vitoria,83.0


In [10]:
#Re-configure City ID type from float to int to match visualization requirement

excessive_humidity_df = excessive_humidity_df.astype({city_id: 'int'}) #.set_index(city_id)
excessive_humidity_df.head(15)

Unnamed: 0,City_ID,Country,City,Humidity
0,1,PF,rikitea,71.0
1,2,AU,new norfolk,72.0
2,3,RU,baykit,95.0
3,5,RU,srednekolymsk,97.0
4,6,RU,chokurdakh,100.0
5,8,NZ,bluff,92.0
6,10,US,hilo,87.0
7,11,MV,hithadhoo,74.0
8,12,ID,baturaja,97.0
9,13,PT,praia da vitoria,83.0


In [12]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.

idxs_to_drop = list((x for x in list(excessive_humidity_df[city_id])))
idxs_to_drop

clean_city_data = fetched_results.drop(fetched_results.index[idxs_to_drop], inplace = False )
clean_city_data.head(15)

Unnamed: 0,City_ID,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,0,cape town,0.0,ZA,1635188000.0,61.0,-47.170951,4.615849,292.38,2.24
4,4,rudbar,0.0,AF,1635188000.0,17.0,29.718048,63.041542,293.12,2.24
7,7,punta arenas,40.0,CL,1635188000.0,49.0,-82.22783,-90.541014,280.21,10.29
9,9,avarua,73.0,CK,1635188000.0,64.0,-24.631129,-165.24339,297.18,6.17


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression