# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
#Create url to call by City to get Weather

baseurl = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"
url = f"{baseurl}appid={weather_api_key}&units={units}&q="

# Create an empty list to store the responses

temp = []  # para llenar una lista necestaria usar append
cityID = []
latitude = []
humid = []
cloud = []
windSpeed = []
cityName = []
longitude = []

# Keep record of the temp & IdNum of each city, as a LIST
for city in cities:           # city is lookin for the position 
    response = requests.get(url + city).json()
    try:
        temp.append(response["main"]["temp"])
        cityID.append(response["id"])
        latitude.append(response["coord"]["lat"])
        humid.append(response["main"]["humidity"])
        cloud.append(response["clouds"]["all"])
        windSpeed.append(response["wind"]["speed"])
        longitude.append(response["coord"]["lon"])
        cityName.append(response["name"])
        for Temp in temp:
            for ID in cityID:
                continue
        #print(f"The wheather in {city} (ID number - {ID}), is {Temp}°C")
    except:
        #print("City information not found. Skipping...")
        pass  #


In [None]:
print(f"Temp = {len(temp)}")
print(f"cityID = {len(cityID)}")
print(f"latitude = {len(latitude)}")
print(f"humid = {len(humid)}")
print(f"cloud = {len(cloud)}")
print(f"windSpeed = {len(windSpeed)}")
print(f"cityName = {len(cityName)}")
print(f"longitude = {len(longitude)}")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create Dictionary
dictionary = {
    "City ID Number":cityID,"City":cityName,"Latitude":latitude,"Temperature":temp,
    "Humidity":humid,"Cloudiness": cloud,"Wind Speed":windSpeed,"Longitude":longitude}
dictionary

#Create DF
cityInfoDF = pd.DataFrame(dictionary)
cityInfoDF



In [None]:
#Export to CSV
cityInfoDF.to_csv(output_data_file)

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

In [None]:
import datetime

Date = datetime.datetime.now()
Date =Date.strftime("%x")
print(Date)

#### Latitude vs. Temperature Plot

In [None]:
# Build a scatter plot 
plt.scatter(cityInfoDF["Latitude"], cityInfoDF["Temperature"], marker="o")

plt.title(f"City Latitude VS Temperature {Date}")
plt.ylabel("Temperature (Celsius)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("Lat_vs_Temp.png")

# Show plot
plt.show()

The closer the locations are to latitude 0, the most concentrated is the temperature between 20°C and 30°C. 
After latitude |40| temperature starts to decrease.

#### Latitude vs. Humidity Plot

In [None]:
# Build a scatter plot 
plt.scatter(cityInfoDF["Latitude"], cityInfoDF["Humidity"], marker="o")

plt.title(f"City Latitude VS Humidity {Date}")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("Lat_vs_Humid.png")

# Show plot
plt.show()

It seems that along all latitudes, the humididty can be either low or high. 
Altough there is a greater concentration of high humidity (>60%) between 0 and |40| degrees of latitudes.

#### Latitude vs. Cloudiness Plot

In [None]:
# Build a scatter plot 
plt.scatter(cityInfoDF["Latitude"], cityInfoDF["Cloudiness"], marker="o")

plt.title(f"City Latitude VS Cloudiness {Date}")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("Lat_vs_Cloud.png")

# Show plot
plt.show()

There seems to be no relationship between latitude and cloudiness.

#### Latitude vs. Wind Speed Plot

In [None]:
# Build a scatter plot 
plt.scatter(cityInfoDF["Latitude"], cityInfoDF["Wind Speed"], marker="o")

plt.title(f"City Latitude VS Wind Speed {Date}")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("Lat_vs_Wind.png")

# Show plot
plt.show()

Aparently, are only a few place with high wind speed (>8 mph), with a higher concentration of locations along all latitudes with low wind speed (<8 mph).

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots

xLat = cityInfoDF.iloc[:,2]
regress = {}

for col in cityInfoDF.columns.values[2:]:
    yValues = cityInfoDF[col]
    formula = linregress(xLat, yValues)
    regress[col] = formula

regress

In [None]:
#DF for North and South
nsDF = cityInfoDF[["Latitude","Temperature","Humidity","Cloudiness","Wind Speed"]]
nsDF.head()

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northdf = nsDF.loc[nsDF["Latitude"]>0]
northdf.count()
northdf

In [None]:
# Create Northern and Southern Hemisphere DataFrames
southdf = nsDF.loc[nsDF["Latitude"]<0]
southdf

In [None]:
# Northern regression line 

xLatN = northdf.iloc[:,0]
regressN = {}

for col in northdf.columns.values[1:]:
    yValuesN = northdf[col]
    formulaN = linregress(xLatN, yValuesN)
    regressN[col] = formulaN

regressN

In [None]:
# Southern regression line 

xLatS = southdf.iloc[:,0]
regressS = {}

for col in southdf.columns.values[1:]:
    yValuesS = southdf[col]
    formulaS = linregress(xLatS, yValuesS)
    regressS[col] = formulaS

regressS

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressN["Temperature"]   
line_eqN = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesN = xLatN * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatN, yValuesN)
plt.plot(xLatN,regress_valuesN,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqN,(18,25),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Temperature in the Northern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Temperature (°Celcius)")

# Save the figure
plt.savefig("Lat_vs_Temp_North.png")

plt.show()

print(f"The linear regression model is based on the equation: {line_eqN}")
print(f"The r squared is: {rvalue}")



####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressS["Temperature"]   
line_eqS = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesS = xLatS * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatS, yValuesS)
plt.plot(xLatS,regress_valuesS,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqS,(-40,15),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Temperature in the Southern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Temperature (°Celcius)")

# Save the figure
plt.savefig("Lat_vs_Temp_South.png")

plt.show()


print(f"The linear regression model is based on the equation: {line_eqS}")
print(f"The r squared is: {rvalue}")


Acording to the data, in both hemispheres the temperature is always between 0 an 10°C,no mather how close or far away form the equator the city is. 
In the first graph the linear equation model, does not represent the trend of the data, acordingly to the rsquared value. While the second one, has a better fit.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressN["Humidity"]   
line_eqN = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesN = xLatN * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatN, yValuesN)
plt.plot(xLatN,regress_valuesN,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqN,(18,50),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Humidity (%) in the Northern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

# Save the figure
plt.savefig("Lat_vs_Humid_North.png")

plt.show()

print(f"The linear regression model is based on the equation: {line_eqN}")
print(f"The r squared is: {rvalue}")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressS["Humidity"]   
line_eqS = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesS = xLatS * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatS, yValuesS)
plt.plot(xLatS,regress_valuesS,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqS,(-40,40),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Humidity (%) in the Southern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

# Save the figure
plt.savefig("Lat_vs_Humid_South.png")

plt.show()


print(f"The linear regression model is based on the equation: {line_eqS}")
print(f"The r squared is: {rvalue}")

In this case, the regression lines have a continuous direction. In the north one, the humidity decreases in lower latitudes (although information does not show that), an it continues like this in the south graph.
In the south graph, data shows low level of humidity as well along all latitudes.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressN["Cloudiness"]   
line_eqN = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesN = xLatN * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatN, yValuesN)
plt.plot(xLatN,regress_valuesN,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqN,(15,50),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Cloudiness (%) in the Northern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

# Save the figure
plt.savefig("Lat_vs_Cloud_North.png")

plt.show()

print(f"The linear regression model is based on the equation: {line_eqN}")
print(f"The r squared is: {rvalue}")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressS["Cloudiness"]   
line_eqS = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesS = xLatS * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatS, yValuesS)
plt.plot(xLatS,regress_valuesS,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqS,(-40,15),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Cloudiness (%) in the Southern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

# Save the figure
plt.savefig("Lat_vs_Cloud_South.png")

plt.show()


print(f"The linear regression model is based on the equation: {line_eqS}")
print(f"The r squared is: {rvalue}")

In this case, both graphs show low values of cloudiness along all latitudes. 
The first linear regression suggest cloudiness should increase in higher latitudes (moving away from the equator), while the second one suggest, cloudiness should decrease while moving shouther from the equator. 

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressN["Wind Speed"]   
line_eqN = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesN = xLatN * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatN, yValuesN)
plt.plot(xLatN,regress_valuesN,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqN,(15,4.4),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Wind Speed (mph) in the Northern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

# Save the figure
plt.savefig("Lat_vs_Wind_North.png")

plt.show()

print(f"The linear regression model is based on the equation: {line_eqN}")
print(f"The r squared is: {rvalue}")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Linear regression is store in regress dictionary

# SPECIFIC VALUE PER VARIABLE TO COMPARE
slope,intercept,rvalue,pvalue,stderr = regressS["Wind Speed"]   
line_eqS = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

# To get the line for plotting
regress_valuesS = xLatS * slope + intercept

# Plot scatter+ linear regression line
plt.scatter(xLatS, yValuesS)
plt.plot(xLatS,regress_valuesS,"r-")  # regression values are calculated from the line equation
plt.annotate(line_eqS,(-40,10),fontsize=10,color="red") # to establish where is printing the formula in the plot

plt.title("Latitude vs Wind Speed (mph) in the Southern Hemisphere")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

# Save the figure
plt.savefig("Lat_vs_Wind_South.png")

plt.show()


print(f"The linear regression model is based on the equation: {line_eqS}")
print(f"The r squared is: {rvalue}")

In both cases, data is completely scatter, not showing any kind of relation between both variables (the second one, a little more). 
the linear regression in the second graph, does not represent the trend of the data, acordingly to the rsquared value. 