# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies
import matplotlib.pyplot as plt
import requests
import pandas as pd
import citipy
import random
import time

from citipy import citipy
from api_keys import weather_api_key
from datetime import date
from scipy.stats import linregress

## Generate Cities List

In [2]:
locations = pd.DataFrame()


locations['Latitude'] = []
locations['Latitude'] = []
value = float

lat = []
lng = []

for x in range(1500):
    value = random.randint(-90, 90)
    lat.append(value)
    
for x in range(1500):
    value = random.randint(-180, 180)
    lng.append(value)

# combinne them into a single dataframe
locations['Latitude'] = lat
locations['Longitude'] = lng

cities = []

locations['Locations'] = list(zip(locations['Latitude'], locations['Longitude']))

# Loop through the list of locations to get the nearby cities
for location in locations['Locations']:
    lan, lng = location
    city = citipy.nearest_city(lan, lng).city_name
    if city not in cities:
          cities.append(city)


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [None]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

filter_cities = []
lon = []
lat = []
temp_max = []
humidity = []
clouds = []
wind_speed = []
country = []
dt = []

# Loop through the list of cities and perform a request for data on each
for city in cities:
    try:
        response = requests.get(query_url + city).json()
        lat.append(response['coord']['lat'])
        lon.append(response['coord']['lon'])
        temp_max.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        clouds.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        dt.append(response['dt'])
        filter_cities.append(city)
        
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")
    
    time.sleep(5) # Sleep for 5 seconds



Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.


In [None]:
print(len(filter_cities))
print(len(lat))
print(len(lon))

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# read in the data

#city_data_df = pd.read_csv("../output_data/cities.csv")



city_data_df = pd.DataFrame({'City': filter_cities, 
                             'Lat': lat,
                             'Lng': lon,
                             'Max Temp': temp_max,
                             'Humidity': humidity,
                             'Cloudiness': clouds,
                             'Wind Speed': wind_speed,
                             'Country': country,
                             'Date': dt
                             })

city_data_df

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Remove rows with humidity greater than 100%
city_data_filtered_df =city_data_df.loc[city_data_df['Humidity'] < 100.1]
city_data_filtered_df

In [None]:
#  Get the indices of cities that have humidity over 100%.
# Get rows with humidity greater than 100%
city_data_greater100per_df = city_data_df.loc[city_data_df['Humidity'] > 100]
city_data_greater100per_df

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data_df = city_data_df.loc[city_data_df.index != 129]

clean_city_data_df

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Plot the latitude versus temperature
today = date.today()
plt.title(f"City Latitude vs. Max Temperature ({today})")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.scatter(clean_city_data_df['Lat'], clean_city_data_df['Max Temp'])

plt.savefig('C:/Users/jjel0\OneDrive/Homework/python-api-challenge/output_data/LatVsTempFig')

plt.show()



## Latitude vs. Humidity Plot

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Humidity ({today})')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.scatter(clean_city_data_df['Lat'], clean_city_data_df['Humidity'])

plt.savefig('C:/Users/jjel0\OneDrive/Homework/python-api-challenge/output_data/LatVsHumidityFig')

plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Cloudiness ({today})')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.scatter(clean_city_data_df['Lat'], clean_city_data_df['Cloudiness'])

plt.savefig('C:/Users/jjel0\OneDrive/Homework/python-api-challenge/output_data/LatVsCloudinessFig')

plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Wind Speed ({today})')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.scatter(clean_city_data_df['Lat'], clean_city_data_df['Wind Speed'])

plt.savefig('C:/Users/jjel0\OneDrive/Homework/python-api-challenge/output_data/LatVsWindSpeedFig')

plt.show()

## Linear Regression

In [None]:
# Create a dataframe for the Northern Hemisphere and one for the Southern Hemisphere
clean_city_data_SH_df = pd.DataFrame()
clean_city_data_NH_df = pd.DataFrame()
#

clean_city_data_NH_df = clean_city_data_df[clean_city_data_df['Lat'] > 0]
clean_city_data_SH_df = clean_city_data_df[clean_city_data_df['Lat'] < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus temperature
today = date.today()
plt.title(f"City Latitude vs. Max Temperature ({today})")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.scatter(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Max Temp'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Max Temp'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_NH_df['Lat'] + intercept
plt.plot(clean_city_data_NH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(3, 10, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus temperature
today = date.today()
plt.title(f"City Latitude vs. Max Temperature ({today})")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.scatter(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Max Temp'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Max Temp'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_SH_df['Lat'] + intercept
plt.plot(clean_city_data_SH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(-20, 40, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Humidity ({today})')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.scatter(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Humidity'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Humidity'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_NH_df['Lat'] + intercept
plt.plot(clean_city_data_NH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(50, 10, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Humidity ({today})')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.scatter(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Humidity'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Humidity'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_SH_df['Lat'] + intercept
plt.plot(clean_city_data_SH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(-50, 20, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Cloudiness ({today})')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.scatter(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Cloudiness'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Cloudiness'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_NH_df['Lat'] + intercept
plt.plot(clean_city_data_NH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(0, 10, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Cloudiness ({today})')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.scatter(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Cloudiness'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Cloudiness'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_SH_df['Lat'] + intercept
plt.plot(clean_city_data_SH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(-50, 10, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Wind Speed ({today})')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.scatter(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Wind Speed'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_NH_df['Lat'], clean_city_data_NH_df['Wind Speed'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_NH_df['Lat'] + intercept
plt.plot(clean_city_data_NH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(0, 27, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot the latitude versus humidity

plt.title(f'City Latitude vs. Wind Speed ({today})')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.scatter(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Wind Speed'])

# Get the regression data
(slope, intercept, rvalue, pvalue, stderr) = linregress(clean_city_data_SH_df['Lat'], clean_city_data_SH_df['Wind Speed'])

# Print the correlation coefficient
print(f"The r-value is: {round(rvalue, 4)}")

# Create the line for the regression data and plot them
tv_vs_wt_fit = slope * clean_city_data_SH_df['Lat'] + intercept
plt.plot(clean_city_data_SH_df['Lat'],tv_vs_wt_fit,'-', color='r');

# add the regression 
plt.text(-20, 40, f"y = {round(slope, 2)} * x + {round(intercept, 2)}", horizontalalignment='left', size='medium', color='r', weight='semibold')

plt.show()