# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [189]:
from citipy import citipy
import random
import requests
from pprint import pprint
from config import mykey
import pandas as pd
import numpy as np
from scipy import stats



## Generate Cities List

In [190]:

# Min and Max Latitude is -90 and +90
# Min and Max Longtitude is -180 and +180
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"



cities = []
d_cities = []

d_max_temp = []
d_humidity = []
d_lat      = []
d_lon      = []
d_country  = []
d_wind_spd = []
d_date     = []
d_clouds   = []
d_ids      = []

record_number = 0

#
# ****************************** Northern Hemisphere cities ******************************
#

while len(d_cities)<250:
    
    record_number+=1

    northern_lat = random.sample(range(0, 90),1)
    random.shuffle(northern_lat)     

    longtitude =  random.sample(range(-180, +180),1)
    random.shuffle(longtitude)
    
    city = citipy.nearest_city(int(northern_lat[0]), int(longtitude[0]))    

    print(f"Processing Record {record_number} | {city.city_name}")

    if [city.city_name,city.country_code] not in cities:

        query_url = f"{url}appid={mykey}&q={city.city_name}&units={units}"

        try:
            weather_response = requests.get(query_url)
            weather_json = weather_response.json()
  
            if weather_json['cod'] == 200:

                if (int(weather_json["coord"]["lat"]))<0:
                    continue    

                d_max_temp.append(weather_json["main"]["temp_max"])
                d_humidity.append(weather_json["main"]["humidity"])
                d_lat.append(weather_json["coord"]["lat"])
                d_lon.append(weather_json["coord"]["lon"])
                d_country.append(weather_json["sys"]["country"])
                d_wind_spd.append(weather_json["wind"]["speed"])
                d_date.append(weather_json["dt"])
                d_clouds.append(weather_json["clouds"]['all'])
                d_cities.append(city.city_name)    
                            
            elif weather_json['cod'] == '404':
                print(f"City not found. Skipping...") 
            else:
                print(f"Unexpected Error during API Call") 

        except KeyError:
                print(f"Unexpected Error during building listing") 

        city_name = city.city_name
        cities.append([city.city_name,city.country_code])
    else:
        print(f"Duplicate city. Skipping...") 
      
#
# ****************************** Southern Hemisphere cities ******************************
#

while len(d_cities)<500: 
    
    record_number+=1

    southern_lat = random.sample(range(-90, 0),1)
    random.shuffle(southern_lat)     

    longtitude =  random.sample(range(-180, +180),1)
    random.shuffle(longtitude)
    
    city = citipy.nearest_city(int(southern_lat[0]), int(longtitude[0]))    

    print(f"Processing Record {record_number} | {city.city_name}")

    if [city.city_name,city.country_code] not in cities:

        query_url = f"{url}appid={mykey}&q={city.city_name}&units={units}"

        try:
            weather_response = requests.get(query_url)
            weather_json = weather_response.json()
  
            if weather_json['cod'] == 200:

                
                if (int(weather_json["coord"]["lat"]))>0:
                    continue    

                d_max_temp.append(weather_json["main"]["temp_max"])
                d_humidity.append(weather_json["main"]["humidity"])
                d_lat.append(weather_json["coord"]["lat"])
                d_lon.append(weather_json["coord"]["lon"])
                d_country.append(weather_json["sys"]["country"])
                d_wind_spd.append(weather_json["wind"]["speed"])
                d_date.append(weather_json["dt"])
                d_clouds.append(weather_json["clouds"]['all'])
                d_cities.append(city.city_name)                
            elif weather_json['cod'] == '404':
                print(f"City not found. Skipping...") 
            else:
                print(f"Unexpected Error during API Call") 

        except KeyError:
                print(f"Unexpected Error during building listing") 

        city_name = city.city_name
        cities.append([city.city_name,city.country_code])
    else:
        print(f"Duplicate city. Skipping...") 
      


Processing Record 1 | torbay
Processing Record 2 | dikson
Processing Record 3 | clyde river
Processing Record 4 | sitka
Processing Record 5 | butaritari
Processing Record 6 | asayita
City not found. Skipping...
Processing Record 7 | verkhnyaya toyma
Processing Record 8 | tawkar
City not found. Skipping...
Processing Record 9 | thompson
Processing Record 10 | belushya guba
City not found. Skipping...
Processing Record 11 | xiaoweizhai
Processing Record 12 | dhidhdhoo
Processing Record 13 | asayita
Duplicate city. Skipping...
Processing Record 14 | atuona
Processing Record 15 | provideniya
Processing Record 16 | senj
Processing Record 17 | khatanga
Processing Record 18 | ribeira grande
Processing Record 19 | la ronge
Processing Record 20 | fallon
Processing Record 21 | sabha
Processing Record 22 | dalen
Processing Record 23 | tura
Processing Record 24 | butaritari
Duplicate city. Skipping...
Processing Record 25 | virginia beach
Processing Record 26 | hobyo
Processing Record 27 | bethel


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [191]:
cities_df = pd.DataFrame({            
            'city':d_cities,
            'Max Temp':d_max_temp,
            'Lat':d_lat,
            'Lng':d_lon,             
            'Humidity':d_humidity,
            'Cloudiness':d_clouds,
            'Wind Speed': d_wind_spd,
            'country':d_country,
            "Date": d_date
        })

cities_df.index.name = 'City ID'


In [192]:
cities_df.to_csv('weather_data.csv')

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [253]:
print("The following cities have humutiy over 100%")
pprint(cities_df.loc[cities_df["Humidity"] > 100])

cities_df = cities_df.loc[cities_df["Humidity"] <= 100]

The following cities have humutiy over 100%
Empty DataFrame
Columns: [city, Max Temp, Lat, Lng, Humidity, Cloudiness, Wind Speed, country, Date]
Index: []


In [254]:
#  Get the indices of cities that have humidity over 100%.
cities_df.describe()

Unnamed: 0,Max Temp,Lat,Lng,Humidity,Cloudiness,Wind Speed,Date
count,500.0,500.0,500.0,500.0,500.0,500.0,500.0
mean,19.20404,10.353401,18.71935,67.818,53.906,3.6313,1655105000.0
std,8.560639,34.903983,93.838291,22.239471,41.853753,2.646573,259.9026
min,-5.42,-54.8,-179.1667,1.0,0.0,0.0,1655104000.0
25%,13.1125,-19.430825,-63.63815,54.0,7.0,1.54,1655105000.0
50%,20.925,-0.45835,21.47235,72.0,58.5,3.06,1655105000.0
75%,25.68,41.4131,109.03655,86.0,100.0,5.14,1655105000.0
max,39.12,78.2186,178.4167,100.0,100.0,15.58,1655105000.0


In [264]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
import numpy as np
from scipy import stats


clean_city_data= cities_df[(np.abs(stats.zscore(cities_df["Humidity"])) < 3)]
clean_city_data.describe()


Unnamed: 0,Max Temp,Lat,Lng,Humidity,Cloudiness,Wind Speed,Date,zscore
count,499.0,499.0,499.0,499.0,499.0,499.0,499.0,499.0
mean,19.195671,10.364263,18.86174,67.951904,54.014028,3.638577,1655105000.0,0.006021
std,8.567183,34.938164,93.878373,22.059105,41.825918,2.644217,259.7302,0.99189
min,-5.42,-54.8,-179.1667,7.0,0.0,0.0,1655104000.0,-2.734687
25%,13.085,-19.47055,-63.6777,54.0,7.0,1.545,1655105000.0,-0.621328
50%,20.92,-0.6,22.2,72.0,60.0,3.06,1655105000.0,0.188044
75%,25.69,41.455,109.1064,86.0,100.0,5.14,1655105000.0,0.817555
max,39.12,78.2186,178.4167,100.0,100.0,15.58,1655105000.0,1.447067


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression