#  WeatherPy

In [None]:
%matplotlib notebook

# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats 
from datetime import datetime

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
#output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# No seed
# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

## Perform API Calls

In [None]:
# Weather data
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Lists of data
cloudiness = []
humidity =[]
latz = [] 
lngz = [] 
max_temp = [] 
wind_speed = []
country = []
date = []

# Get Weather API
num = 1
for city in cities:
    query_url = f"{url}q={city}&units={units}&appid={weather_api_key}" 
    response = requests.get(query_url).json()
    try:
        print(f"City #{num}: " + city.title())
        num += 1
        cloudiness.append(response['clouds']['all'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        humidity.append(response['main']['humidity'])
        latz.append(response['coord']['lat'])
        lngz.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        wind_speed.append(response['wind']['speed'])
    except: # If info is not available
        cloudiness.append(np.nan)
        country.append(np.nan)
        date.append(np.nan)
        humidity.append(np.nan)
        latz.append(np.nan)
        lngz.append(np.nan)
        max_temp.append(np.nan)
        wind_speed.append(np.nan)

## Convert Raw Data to DataFrame

In [None]:
df_cities = pd.DataFrame({"City": cities, "Country": country, "Cloudiness": cloudiness, "Date": date, 
                         "Humidity": humidity, "Latitude": latz, "Longitude": lngz, 
                          "Max Temperature": max_temp, "Wind Speed": wind_speed})

#df_cities
# Drop NaN values
clean_df_cities = df_cities.dropna(how='any') 

# Export data
clean_df_cities.to_csv("Cities.csv", index=False, header=True)
clean_df_cities

In [None]:
# For dates
t = clean_df_cities.iloc[0,3]
date_obj = datetime.fromtimestamp(t)
df_date = date_obj.strftime("%m/%d/%Y")
#print(df_date)

## Ploting the Data

In [None]:
# LATITUDE VS TEMPERATURE PLOT
plt.scatter(clean_df_cities['Latitude'], clean_df_cities['Max Temperature'], marker='+', color='purple')
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title("Latitude & Max Temperature at " + df_date) # La fecha no se :s

* In this plot the latitude and maximum temperature are plotted. Since we are analyzing the weather and which places are hotter, we start analyzing our data from the equator. The equator is considered as zero value in latitude.  So, if the value is positive, the imaginary line parallel to equator goes to the northern hemisphere, and if it is negative, goes to the southern hemisphere.

In [None]:
# LATITUDE VS HUMIDITY PLOT
plt.scatter(clean_df_cities['Latitude'], clean_df_cities['Humidity'], marker='+', color='royalblue')
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title('Latitude & Humidity at ' + df_date)

* Here, the latitude and humidity are plotted. In some cases, the humidity can be related to the temperature or the thermal sensation in certain zones.

In [None]:
# LATITUDE VS CLOUDINESS PLOT
plt.scatter(clean_df_cities['Latitude'], clean_df_cities['Cloudiness'], marker='+', color='darkgreen')
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Latitude & Cloudiness at "+ df_date)

 * In this graph, the latitude and cloudiness percentage are plotted to know if there is a relation between these two and weather conditions.

In [None]:
# LATITUDE VS WIND SPEED PLOT
plt.scatter(clean_df_cities['Latitude'], clean_df_cities['Wind Speed'], marker='+', color='darkgoldenrod')
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Latitude & Wind Speed at " + df_date)

* The latitude and wind speed are plotted in this figure. The wind speed could be a factor of thermal sensation depending of the temperature of the wind.

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots
def lin_reg(data_x, data_y):
    (d_slope, d_int, d_r, d_p, d_std_err) = scipy.stats.linregress(data_x,data_y)
    d_fit = d_slope * data_x + d_int
    line_eq = "y = " + str(round(d_slope,2)) + "x + " + str(round(d_int,2))
    #print(line_eq) 
    return d_fit, line_eq, d_r

## Northern and Southern Hemisphere DF

* In the next plots we show the relationship between the parameters plotted. Analyzing each hemisphere gives a better close up of the data points and a better aproximation of their correlation to other parameters. Using the r squared value help us to know how close or how far the correlation is, close to 1 is a good relationship, close to 0 there is not a relationship.

In [None]:
# Create Northern and Southern Hemisphere DataFrames
# North latitude +90, South latitude -90
north_cities = clean_df_cities.loc[clean_df_cities['Latitude'] >= 0.0]
south_cities = clean_df_cities.loc[clean_df_cities['Latitude'] < 0.0]

In [None]:
# NORTHERN HEMISPHERE - MAX TEMPERATURE VS LATITUDE LINEAR REGRESSION
y_axis = north_cities['Max Temperature'] 
x_axis = north_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='goldenrod')
plt.plot(x_axis,reg[0],"m-")
plt.annotate(reg[1],(6,10),fontsize=15,color="m") 
plt.xlabel('Latitude')
plt.ylabel('Max Temperature')
plt.title("Northern Hemisphere & Max Temperature at " + df_date)
print(f"The r-squared is: {reg[2]}")

In [None]:
# SOUTHERN HEMISPHERE - MAX TEMPERATURE VS LATITUDE LINEAR REGRESSION
y_axis = south_cities['Max Temperature'] 
x_axis = south_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='slategray')
plt.plot(x_axis,reg[0],"b-")
plt.annotate(reg[1],(-30,50),fontsize=15,color="b") 
plt.xlabel('Latitude')
plt.ylabel('Max Temperature')
plt.title("Southern Hemisphere & Max Temperature at " + df_date)
print(f"The r-squared is: {reg[2]}")

In [None]:
# NORTHERN HEMISPHERE - HUMIDITY VS LATITUDE LINEAR REGRESSION
y_axis = north_cities['Humidity'] 
x_axis = north_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='goldenrod')
plt.plot(x_axis,reg[0],"m-")
plt.annotate(reg[1],(40,10),fontsize=15,color="m") 
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title("Northern Hemisphere & Humidity at " + df_date)
print(f"The r-squared is: {reg[2]}")

In [None]:
# SOUTHERN HEMISPHERE - HUMIDITY VS LATITUDE LINEAR REGRESSION
y_axis = south_cities['Max Temperature'] 
x_axis = south_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='slategray')
plt.plot(x_axis,reg[0],"b-")
plt.annotate(reg[1],(-40,50),fontsize=15,color="b") 
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title("Southern Hemisphere & Humidity at " + df_date)
print(f"The r-squared is: {reg[2]}")

In [None]:
# NORTHERN HEMISPHERE - CLOUDINESS VS LATITUDE LINEAR REGRESSION
y_axis = north_cities['Cloudiness'] 
x_axis = north_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='goldenrod')
plt.plot(x_axis,reg[0],"m-")
plt.annotate(reg[1],(2,10),fontsize=15,color="m") 
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title("Northern Hemisphere & Cloudiness at " + df_date)
print(f"The r-squared is: {reg[2]}")

In [None]:
# SOUTHERN HEMISPHERE - CLOUDINESS VS LATITUDE LINEAR REGRESSION
y_axis = south_cities['Cloudiness'] 
x_axis = south_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='slategray')
plt.plot(x_axis,reg[0],"m-")
plt.annotate(reg[1],(-22,5),fontsize=15,color="m") 
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title("Southern Hemisphere & Cloudiness at " + df_date)
print(f"The r-squared is: {reg[2]}")

In [None]:
# NORTHERN HEMISPHERE - WIND SPEED VS LATITUDE LINEAR REGRESSION
y_axis = north_cities['Wind Speed'] 
x_axis = north_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='goldenrod')
plt.plot(x_axis,reg[0],"m-")
plt.annotate(reg[1],(2,30),fontsize=15,color="m") 
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title("Northern Hemisphere & Wind Speed at " + df_date)
print(f"The r-squared is: {reg[2]}")

In [None]:
# SOUTHERN WIND SPEED VS LATITUDE
y_axis = south_cities['Wind Speed'] 
x_axis = south_cities['Latitude']
reg = lin_reg(x_axis,y_axis)
plt.scatter(x_axis, y_axis, marker='.', color='slategray')
plt.plot(x_axis,reg[0],"b-")
plt.annotate(reg[1],(-50,20),fontsize=15,color="b") 
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title("Southern Hemisphere & Wind Speed at " + df_date)
print(f"The r-squared is: {reg[2]}")