# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

#Weather URL
weather_url = "http://api.openweathermap.org/data/2.5/weather?"

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
#Columns (Keys)
is_fetch_ok = "Is_Fetch_Ok"
city_id     = "City_ID"
city        = "City"
cloudiness  = "Cloudiness"
country     = "Country"
date        = "Date"
humidity    = "Humidity"
lat         = "Lat"
lng         = "Lng"
max_temp    = "Max Temp"
wind_speed  = "Wind Speed"

# List for holding lat_lngs and cities
lat_lngs     = []

#Keeping track of processed cities
cities       = []

#Stores all weather-relevant data for a given set of coordinates
weather_container = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    
    city_name    = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    country_code = citipy.nearest_city(lat_lng[0], lat_lng[1]).country_code

    # If the city is unique, then add it to 'cities'
    if city_name not in cities:
        
        weather_hash  = {
                             is_fetch_ok : False,
                             city_id     : None, 
                             city        : city_name, 
                             cloudiness  : None, 
                             country     : country_code.upper(),
                             date        : None,
                             humidity    : None,
                             lat         : lat_lng[0],
                             lng         : lat_lng[1],
                             max_temp    : None,
                             wind_speed  : None
                        }        
        
        weather_container.append(weather_hash)
        cities.append(city_name)
        
# Print the city count to confirm (sufficient) count
print(f"The pseudo-random number of unique cities in this unit of execution is {len(cities)}")

The pseudo-random number of unique cities in this unit of execution is 609


In [3]:
weather_container

[{'Is_Fetch_Ok': False,
  'City_ID': None,
  'City': 'zhigansk',
  'Cloudiness': None,
  'Country': 'RU',
  'Date': None,
  'Humidity': None,
  'Lat': 68.39128520451987,
  'Lng': 121.98477979431635,
  'Max Temp': None,
  'Wind Speed': None},
 {'Is_Fetch_Ok': False,
  'City_ID': None,
  'City': 'husavik',
  'Cloudiness': None,
  'Country': 'IS',
  'Date': None,
  'Humidity': None,
  'Lat': 75.17255637283006,
  'Lng': -12.080263638295435,
  'Max Temp': None,
  'Wind Speed': None},
 {'Is_Fetch_Ok': False,
  'City_ID': None,
  'City': 'klaksvik',
  'Cloudiness': None,
  'Country': 'FO',
  'Date': None,
  'Humidity': None,
  'Lat': 62.557199099038826,
  'Lng': -4.2807205360929,
  'Max Temp': None,
  'Wind Speed': None},
 {'Is_Fetch_Ok': False,
  'City_ID': None,
  'City': 'cape town',
  'Cloudiness': None,
  'Country': 'ZA',
  'Date': None,
  'Humidity': None,
  'Lat': -68.78704835540442,
  'Lng': -12.489973323269908,
  'Max Temp': None,
  'Wind Speed': None},
 {'Is_Fetch_Ok': False,
  'Cit

In [4]:
#Function to print running log of city number and name.  Will print city name if the city contains 500+ people, otherwise
#the city name will not be printed

def print_city_log(status_code, 
                   city_name          = None, 
                   rec_set            = None, 
                   success_fetch_cntr = None):
    
    if(status_code == 200):
        print(f"Processing Record {success_fetch_cntr} of Set {rec_set} | {city_name}")
    else:
        print("City not found. Skipping...")
        
def print_line_sep():
    print("----------------------------")

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [188]:
# Build query URL
  
#out_cols = ["City_ID","Cloudiness","Country","Date","Humidity","Lat","Lng","Max Temp","Wind Speed"]

total_success_fetch_cntr = 0
rec_set_cntr = 0
set_cntr = 1


print("Beginning Data Retrieval")
print_line_sep()

for i in range(len(weather_container)):
    
    weather_hash = weather_container[i]
    city_name    = weather_hash[city]
    weather_query_url = weather_url + "appid=" + weather_api_key + "&q=" + city_name
    
    if (i + 1) % 60 != 0:
        
        weather_response  = requests.get(weather_query_url)
        status_code       = weather_response.status_code

        if(status_code == 200):

            weather_json = weather_response.json()
            
            total_success_fetch_cntr += 1
            rec_set_cntr += 1
            
            weather_hash[is_fetch_ok] = True
            weather_hash[city_id]     = total_success_fetch_cntr
            
            try:
                
                weather_hash[date]        = "" if weather_json       == None  or \
                                                  weather_json["dt"] == ""    or \
                                                  weather_json["dt"] == None else str(weather_json["dt"])
                
                weather_hash[max_temp]    = weather_json["main"]["temp_max"]
                weather_hash[humidity]    = weather_json["main"]["humidity"]      
                weather_hash[wind_speed]  = weather_json["wind"]["speed"]
                weather_hash[cloudiness]  = weather_json["clouds"]["all"]
            
            except IndexError as ie:
                print(f"Index Exception occured {ie}")
                break
                
            except Exception as e:
                print(f"Index Exception occured {e}")
                break                
                
            else:
                      
                print_city_log(status_code,
                               city_name,
                               set_cntr,
                               rec_set_cntr)

        else:
            print_city_log(status_code)
    
    else:
        set_cntr    += 1
        rec_set_cntr = 0
        time.sleep(60)

print_line_sep()
print("Data Retrieval Complete")
print_line_sep()
 

Beginning Data Retrieval
--------------------------
1635169954
Processing Record 1 of Set 1 | zhigansk
1635169954
Processing Record 2 of Set 1 | husavik
1635169817
Processing Record 3 of Set 1 | klaksvik
1635169732
Processing Record 4 of Set 1 | cape town
1635169867
Processing Record 5 of Set 1 | hobart
1635169955
Processing Record 6 of Set 1 | kavaratti
1635169955
Processing Record 7 of Set 1 | rosarito
1635169956
Processing Record 8 of Set 1 | bredasdorp
1635169956
Processing Record 9 of Set 1 | chokurdakh
1635169956
Processing Record 10 of Set 1 | yuxia
1635169957
Processing Record 11 of Set 1 | provideniya
1635169957
Processing Record 12 of Set 1 | soeng sang
1635169957
Processing Record 13 of Set 1 | capitan miranda
1635169957
Processing Record 14 of Set 1 | whitianga
1635169957
Processing Record 15 of Set 1 | coria
1635169958
Processing Record 16 of Set 1 | qaanaaq
1635169819
Processing Record 17 of Set 1 | punta arenas
1635169958
Processing Record 18 of Set 1 | sinjar


KeyboardInterrupt: 

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [173]:
# Put data into DataFrame
raw_results = pd.DataFrame(weather_container)
fetched_results = raw_results.loc[raw_results["Is_Fetch_Ok"] == True][[city_id,
                                                                       city,
                                                                       cloudiness,
                                                                       country,
                                                                       date,
                                                                       humidity,
                                                                       lat,
                                                                       lng,
                                                                       max_temp,
                                                                       wind_speed,
                                                                       date]].set_index(city_id)

#Export the DataFrame to CSV
fetched_results.to_csv(output_data_file,header=True,mode='w',index=True)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [174]:
#  Get the indices of cities that have humidity over 100%.

humidity_limit = 98

humidity_over_100_pct = fetched_results.loc[fetched_results[humidity] > humidity_limit].reset_index()[[city_id,country,city,humidity]] \
                                                                                       .astype({city_id: 'int'})                                                                       
humidity_over_100_pct.head(5)

Unnamed: 0,City_ID,Country,City,Humidity
0,9,RU,chokurdakh,100.0
1,32,PF,mataura,99.0
2,67,RU,deputatskiy,99.0
3,131,RU,cherskiy,99.0
4,134,RU,kysyl-syr,99.0


In [175]:
fetched_results = fetched_results.reset_index()
fetched_results = fetched_results.astype({city_id: 'int'}) #.set_index(city_id)
fetched_results.head(5)

Unnamed: 0,City_ID,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Date.1
0,1,zhigansk,100.0,RU,1635170000.0,96.0,68.391285,121.98478,268.59,3.43,1635170000.0
1,2,husavik,65.0,IS,1635170000.0,83.0,75.172556,-12.080264,275.1,4.14,1635170000.0
2,3,klaksvik,89.0,FO,1635170000.0,85.0,62.557199,-4.280721,282.66,9.48,1635170000.0
3,4,cape town,0.0,ZA,1635170000.0,52.0,-68.787048,-12.489973,298.77,2.68,1635170000.0
4,5,hobart,90.0,AU,1635170000.0,81.0,-79.72269,144.019599,283.07,1.79,1635170000.0


In [176]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.

idxs_to_drop = list((x - 1 for x in list(humidity_over_100_pct[city_id])))
idxs_to_drop

clean_city_data = fetched_results.drop(fetched_results.index[idxs_to_drop], inplace = False )
clean_city_data.head(50)

Unnamed: 0,City_ID,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed,Date.1
0,1,zhigansk,100.0,RU,1635170000.0,96.0,68.391285,121.98478,268.59,3.43,1635170000.0
1,2,husavik,65.0,IS,1635170000.0,83.0,75.172556,-12.080264,275.1,4.14,1635170000.0
2,3,klaksvik,89.0,FO,1635170000.0,85.0,62.557199,-4.280721,282.66,9.48,1635170000.0
3,4,cape town,0.0,ZA,1635170000.0,52.0,-68.787048,-12.489973,298.77,2.68,1635170000.0
4,5,hobart,90.0,AU,1635170000.0,81.0,-79.72269,144.019599,283.07,1.79,1635170000.0
5,6,kavaratti,32.0,IN,1635170000.0,76.0,13.288848,64.377464,301.25,5.25,1635170000.0
6,7,rosarito,9.0,MX,1635170000.0,88.0,28.636867,-121.307221,289.88,0.05,1635170000.0
7,8,bredasdorp,7.0,ZA,1635170000.0,52.0,-47.161023,19.042477,293.86,7.72,1635170000.0
9,10,yuxia,100.0,CN,1635170000.0,82.0,34.039898,108.019969,286.49,1.75,1635170000.0
10,11,provideniya,55.0,RU,1635170000.0,87.0,53.819959,-177.279734,264.07,3.31,1635170000.0


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression