# WeatherPy
----

#### Dependencies

In [None]:
# import dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
from scipy.stats import linregress
from citipy import citipy

# import api keys
from api_keys import weather_api_keyx

# default strings
output_data_file = "../output_data/cities.csv"
output_nhemisphere_data_file = "../output_data/nhemisphere.csv"
output_shemisphere_data_file = "../output_data/shemisphere.csv"
weather_url = "https://api.openweathermap.org/data/2.5/weather?q="

# latitude and longitude ranges
lat_range = (-90, 90)
lng_range = (-180, 180)

#### List of cities

In [None]:
# create lists for storing values
lat_lngs = []
cities = []

# randomize lats and lngs
lats = np.random.uniform(low=-90, high=90, size=1500)
lngs = np.random.uniform(low=-180, high=180, size=1500)
lat_lngs = zip(lats, lngs)

# find cities using citipy with lats and lngs
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    if city not in cities:
        cities.append(city)
        
print(f"Number of cities: {len(cities)}")

#### Data Retrieval

In [None]:
# create counters for output
record = 1
group = 1

# create list for json data
responses = []

# initial printout
print(f"Beginning Data Retrieval\n"
      f"-----------------------------")

# for loop for api calls for each city
for city in cities:
    
    # create target_url
    target_url = f"{weather_url}{city}&appid={weather_api_key}"
        
    # generate api request and make json
    response = requests.get(target_url).json()
        
    # create a response string
    response_string = str(response)
    
    # city not found
    if response_string == "{'cod': '404', 'message': 'city not found'}":
       
        #print error
        print(f"Processing Record {record} of Set {group} | {city}")
        print("City not found. Skipping...")
    
    # city is found
    else:
        
        # print which city is processing
        print(f"Processing Record {record} of Set {group} | {city}")
        responses.append(response)
        
    # count record and set
    if record < 50:
        record += 1
    else:
        record = 1
        group += 1

# final printout
print(f"-----------------------------\n"
      f"Data Retrieval Complete\n"
      f"-----------------------------")

#### City Data Frame

In [None]:
# create dictionary for storing datapoints
df = {
    "City": [],
    "Cloudiness": [],
    "Country": [],
    "Date": [],
    "Humidity": [],
    "Lat": [],
    "Lng": [],
    "Max Temp": [],
    "Wind Speed": []
}

# fill out dictionary with data points
for response in range(0, len(responses)):
    df["City"].append(responses[response]['name'])
    df["Cloudiness"].append(responses[response]['clouds']['all'])
    df["Country"].append(responses[response]['sys']['country'])
    df["Date"].append(responses[response]['dt'])
    df["Humidity"].append(responses[response]['main']['humidity'])
    df["Lat"].append(responses[response]['coord']['lat'])
    df["Lng"].append(responses[response]['coord']['lon'])
    df["Max Temp"].append(responses[response]['main']['temp_max'])
    df["Wind Speed"].append(responses[response]['wind']['speed'])
    
# convert dictionary to data frame
city_data = pd.DataFrame(df)

# convert from K to F
city_data["Max Temp"] = round((city_data["Max Temp"] - 273.15) * (9/5) + 32, 2)

# create data frames for each hemisphere
n_hemisphere = city_data.loc[city_data["Lat"] > 0]
s_hemisphere = city_data.loc[city_data["Lat"] < 0]

# export city_data to CSV
f = city_data.to_csv(output_data_file, index=False)
f = n_hemisphere.to_csv(output_nhemisphere_data_file, index=False)
f = s_hemisphere.to_csv(output_shemisphere_data_file, index=False)

# display the data frame
city_data.head()

### Import CSVs (Don't need to retrieve data again)

In [None]:
city_data = pd.read_csv(output_data_file)
n_hemisphere = pd.read_csv(output_nhemisphere_data_file)
s_hemisphere = pd.read_csv(output_shemisphere_data_file)

city_data

#### Function for Scatter Plots

In [None]:
def scatter(x_value, y_value, x_label, y_label):
    # strings for title and labels
    date = city_data["Date"].max()
    date = time.strftime("(%m/%d/%y)", time.localtime(date))
    title = f"{x_label} vs. {y_label} {date}"

    # data to plot
    x_axis = city_data[x_value]
    y_axis = city_data[y_value]

    # plot the data
    plt.scatter(x_axis, y_axis, c=y_axis, edgecolors="grey")

    # assign title and labels
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    # clean up plot
    plt.grid(axis='both')
    plt.tight_layout()

    # display the plot
    plt.show()

### Latitude vs Temperature Plot

In [None]:
scatter("Lat", "Max Temp", "Latitude", "Max Temperature (F)")

### Latitude vs Humidity Plot

In [None]:
scatter("Lat", "Humidity", "Latitude", "Humidity (%)")

### Latitude vs Cloudiness Plot

In [None]:
scatter("Lat", "Cloudiness", "Latitude", "Cloudiness (%)")

### Latitude vs Wind Speed Plot

In [None]:
scatter("Lat", "Wind Speed", "Latitude", "Wind Speed (mph)")

## Regression Analysis

#### Regression Analysis Function

In [None]:
# create function for regression analysis
def regress(hemisphere, x_column, y_column, x_label, y_label, x, y):

    # create title
    title = f"{hemisphere} - {x_label} vs. {y_column}"

    # check hemispehere
    if hemisphere == "Northern Hemisphere":
        data_frame = n_hemisphere[[x_column, y_column]]
    
        #define x and y values
        x_values = n_hemisphere[x_column]
        y_values = n_hemisphere[y_column]
    
    elif hemisphere == "Southern Hemisphere":
        data_frame = s_hemisphere[[x_column, y_column]]
    
        # define x and y values
        x_values = s_hemisphere[x_column]
        y_values = s_hemisphere[y_column]

    # run regression
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)

    # calculate regression values and r-squared
    regress_values = x_values * slope + intercept
    r_squared = rvalue ** 2

    # create line equation
    line_eq = f"y = {round(slope, 2)}x + {round(intercept, 2)}"

    # plot the data
    plt.scatter(x_values, y_values)

    # plot the regression line
    plt.plot(x_values, regress_values, "r-")

    # add equation
    plt.annotate(line_eq, (x, y), fontsize=15, color="red")

    # add title and labels to plot
    plt.title(title)
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    # print summary data
    print(f"The r-squared value is: {r_squared}")

    # display the plot
    plt.show()

### Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
regress("Northern Hemisphere", "Lat", "Max Temp", "Latitude", "Max Temp (F)", 0, 12)

### Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
regress("Southern Hemisphere", "Lat", "Max Temp", "Latitude", "Max Temp (F)", -25, 55)

### Northern Hemisphere - Humidity vs. Latitude Linear Regression

In [None]:
regress("Northern Hemisphere", "Lat", "Humidity", "Latitude", "Humidity (%)", 43, 25)

### Southern Hemisphere - Humidity vs. Latitude Linear Regression

In [None]:
regress("Southern Hemisphere", "Lat", "Humidity", "Latitude", "Humidity (%)", -56, 10)

### Northern Hemisphere - Cloudiness vs. Latitude Linear Regression

In [None]:
regress("Northern Hemisphere", "Lat", "Cloudiness", "Latitude", "Cloudiness (%)", 30, 27)

### Southern Hemisphere - Cloudiness vs. Latitude Linear Regression

In [None]:
regress("Southern Hemisphere", "Lat", "Cloudiness", "Latitude", "Cloudiness (%)", -57, 58)

### Northern Hemisphere - Wind Speed vs. Latitude Linear Regression

In [None]:
regress("Northern Hemisphere", "Lat", "Wind Speed", "Latitude", "Wind Speed (mph)", 25, 12)

### Southern Hemisphere - Wind Speed vs. Latitude Linear Regression

In [None]:
regress("Southern Hemisphere", "Lat", "Wind Speed", "Latitude", "Wind Speed (mph)", -25, 12)