# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import datetime
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output csv file
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If city is unique, add to cities list
    if city not in cities:
        cities.append(city)

# Print the city count
len(cities)

633

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Perform API Calls
# Perform a weather check on each city using a series of successive API calls.
# Include a print log of each city as it'sbeing processed (with the city number and city name).
url_main = "http://api.openweathermap.org/data/2.5/weather?"
query_url = url_main + "appid=" + weather_api_key + "&q=" + 'Miami'
weather = requests.get(query_url).json()
weather

# Define url
url_main = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&"
#http://api.openweathermap.org/data/2.5/weather?

# Create counters
record_count = 1
set_count = 1
columns = ["City", "Cloudiness", "Country", "Date", "Humidity", "Lat", "Lng", "Max Temp (F)", "Wind Speed"]
city_data = pd.DataFrame(columns = columns)
print("-----------------------------")
print("Beginning Data Retrieval.")
print("-----------------------------")

# Set iteration
for i, city in enumerate(cities):
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

# Set index and row in df.interrows():
    print(f"Processing City {record_count} of Set {set_count} | {city}")
    record_count +=1

# Define url for query
    query_url = url_main + "appid=" + weather_api_key + "&q=" + city
    
    try:
        weather = requests.get(query_url).json()
        city_clouds = weather['clouds']['all']
        country = weather['sys']['country']
        date = weather['dt']
        city_humidity = weather['main']['humidity']
        #city_temp = weather['main']['temp']
        lat = weather['coord']['lat']
        lon = weather['coord']['lon']
        city_max_temp = weather['main']['temp_max']
        city_wind_speed = weather['wind']['speed']    
        
        # Append city into city_info
        city_data = city_data.append({"City": city,
                                      "Cloudiness" : city_clouds,
                                      "Country" : country,
                                      "Date" : date,
                                      "Humidity" : city_humidity,
                                      "Lat" : lat,
                                      "Lng" : lon,
                                      "Max Temp (F)" : city_max_temp,
                                      "Wind Speed" : city_wind_speed,
                                     }, ignore_index=True)
        
    except:
        print("City not found. Skipping...")
        pass
    
print("-----------------------------")
print("Data Retrieval Complete.")
print("-----------------------------")


-----------------------------
Beginning Data Retrieval.
-----------------------------
Processing City 1 of Set 1 | sholapur
Processing City 2 of Set 1 | inirida
Processing City 3 of Set 1 | plettenberg bay
Processing City 4 of Set 1 | kapaa
Processing City 5 of Set 1 | dolores
Processing City 6 of Set 1 | jamestown
Processing City 7 of Set 1 | punta arenas
Processing City 8 of Set 1 | inhambane
Processing City 9 of Set 1 | amderma
City not found. Skipping...
Processing City 10 of Set 1 | arbazh
Processing City 11 of Set 1 | busselton
Processing City 12 of Set 1 | batagay
Processing City 13 of Set 1 | mount gambier
Processing City 14 of Set 1 | saint-pierre
Processing City 15 of Set 1 | upernavik
Processing City 16 of Set 1 | ushuaia
Processing City 17 of Set 1 | talnakh
Processing City 18 of Set 1 | vaini
Processing City 19 of Set 1 | thompson
Processing City 20 of Set 1 | rikitea
Processing City 21 of Set 1 | taolanaro
City not found. Skipping...
Processing City 22 of Set 1 | barrow
P

Processing City 45 of Set 4 | olonets
Processing City 46 of Set 4 | fereydun kenar
Processing City 47 of Set 4 | coxim
Processing City 48 of Set 4 | tilichiki
Processing City 49 of Set 4 | salalah
Processing City 0 of Set 5 | kutum
Processing City 1 of Set 5 | eskisehir
Processing City 2 of Set 5 | lompoc
Processing City 3 of Set 5 | marrakesh
Processing City 4 of Set 5 | wanning
Processing City 5 of Set 5 | biak
Processing City 6 of Set 5 | acarau
Processing City 7 of Set 5 | shibarghan
City not found. Skipping...
Processing City 8 of Set 5 | jalu
Processing City 9 of Set 5 | leningradskiy
Processing City 10 of Set 5 | provideniya
Processing City 11 of Set 5 | santa eulalia del rio
City not found. Skipping...
Processing City 12 of Set 5 | sidi ali
Processing City 13 of Set 5 | port hedland
Processing City 14 of Set 5 | bama
Processing City 15 of Set 5 | isangel
Processing City 16 of Set 5 | port blair
Processing City 17 of Set 5 | dalinghe
City not found. Skipping...
Processing City 1

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#Creating city dataframe
city_data.count()

# Display DataFrame of city_data
city_data.head()

# Save data as csv file
city_data.to_csv("../cities_output2.csv", encoding="utf-8", index=False)

city_data.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Define date
date = time.strftime("%m/%d/%Y")

# Set plot parameters
plt.scatter(city_data["Lat"], city_data["Max Temp (F)"],
            edgecolor="black", linewidths=1, marker="o", alpha=0.8, label="cities")
plt.title(f"City Latitude vs. Temperature {date}", fontsize = "12")
plt.xlabel(f"Latitude", fontsize="14")
plt.ylabel(f"Max Temperature (F)", fontsize="14")
plt.grid(True)
print("Plot shows that temperature decreases the further from the equator.")
           
plt.savefig("Temperature.png")

plt.show()


## Latitude vs. Humidity Plot

In [None]:
# Set plot parameters
timestamp = time.strftime("%m/%d/%Y")

plt.scatter(city_data["Lat"], city_data["Humidity"],
            edgecolor="black", linewidths=1, marker="o", alpha=0.8, label="cities")
plt.title(f"City Latitude vs. Humidity {timestamp}", fontsize = "12")
plt.xlabel(f"Latitude", fontsize="14")
plt.ylabel(f"Humidity (%)", fontsize="14")
plt.grid(True)
plt.savefig("Latitude vs Temperature (F).png")
plt.show()
print("The plot shows that humidity is predominantly 80% to 100% between latitudes -40 to 60.")



## Latitude vs. Cloudiness Plot

In [None]:
timestamp = time.strftime("%m/%d/%Y")

plt.scatter(city_data["Lat"], city_data["Cloudiness"], marker = "o", color = "blue",edgecolor = "black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Cloudiness(%)")
plt.title(f"City Latitude vs. Cloudiness ({timestamp})")
plt.savefig("Cloudiness.png")
plt.show()
print("This scatter plot does not present any discernible findings.")


## Latitude vs. Wind Speed Plot

In [None]:
timestamp = time.strftime("%m/%d/%Y")

plt.scatter(city_data["Lat"], city_data["Wind Speed"], marker = "o", color = "blue",edgecolor = "black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Wind Speed(mph)")
plt.title(f"City Latitude vs. Wind Speed ({timestamp})")
plt.grid(True)
plt.savefig("Wind Speed.png")
plt.show()
print("The plot shows that wind speed does not correlate with latitude.")


## Linear Regression

In [None]:
# Add Linear regression to plot

def linear_reg_plot(x_values,y_values):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,(x_values.median(),y_values.median()),fontsize=10,color="red")
    plt.xlabel("Latitude")
    print(f"The r-squared is: {rvalue}")
    plt.show()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Create Northern and Southern Hemisphere DataFrames
Northern_hemisphere = city_data.loc[pd.to_numeric(city_data["Lat"]).astype(float) > 0, :]
Southern_hemisphere = city_data.loc[pd.to_numeric(city_data["Lat"]).astype(float) < 0, :]

# Northern Hemisphere and Max Temp

x_values = pd.to_numeric(Northern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Northern_hemisphere['Max Temp (F)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a negative correlation.")
print("In the Northern Hemisphere, the temperature decreases with increased latitude.")
      
plt.show()


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Define Southern Hemisphere and Max Temp

x_values = pd.to_numeric(Southern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Southern_hemisphere['Max Temp (F)']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a positive correlation.")
print("In the Southern Hemisphere, temperatures increased the closer to the equator one is.")
      
plt.show()



####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Northern Hemisphere and Humidity

x_values = pd.to_numeric(Northern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Northern_hemisphere['Humidity']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title('Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a positive correlation.")
print("Northern hemisphere humidity increases slightly with higher latitude.")
      
plt.show()



####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Southern Hemisphere and Humidity

x_values = pd.to_numeric(Southern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Southern_hemisphere['Humidity']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title('Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a positive correlation.")
print("In the Southern Hemisphere, humidity slightly increases closer to the equator.")
      
plt.show()


####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Northern Hemisphere and Cloudiness

x_values = pd.to_numeric(Northern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Northern_hemisphere['Cloudiness']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('Northern Hemisphere - Cloudiness vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a positive correlation.")
print("Cloudiness increases with higher latitude in the Northern Hemisphere.")
      
plt.show()


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Define Southern Hemisphere and Cloudiness

x_values = pd.to_numeric(Southern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Southern_hemisphere['Cloudiness']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a positive correlation.")
print("Cloudiness slightly increases the closer to the equator in the Southern Hemisphere.")
      
plt.show()



####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Northern Hemisphere and Cloudiness

x_values = pd.to_numeric(Northern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Northern_hemisphere['Wind Speed']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a neutral/slightly positive correlation.")
print("In the Northern Hemisphere, wind speed has a slight increase with higher latitude.")
      
plt.show()


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Define Southern Hemisphere and Cloudiness

x_values = pd.to_numeric(Southern_hemisphere['Lat']).astype(float)
y_values = pd.to_numeric(Southern_hemisphere['Wind Speed']).astype(float)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(f"Regression line equation is: {line_eq}.")

plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values, "r-")
plt.annotate(line_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title('Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")
print("The regression displays a negative correlation.")
print("In the Southern Hemisphere, there is a slight decrease in wind speed closer to the equator.")
      
plt.show()
