In [None]:
%matplotlib inline

In [None]:
from matplotlib import style
style.use('fivethirtyeight')

import matplotlib.pyplot as plt

In [None]:
import numpy as np
import pandas as pd
import datetime as dt

# Reflect Tables into SQLAlchemy ORM

In [None]:
# Python SQL toolkit and Object Relational Mapper

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

In [None]:
# create engine to hawaii.sqlite

engine = create_engine("sqlite:///Resources/hawaii.sqlite")

In [None]:
# reflect an existing database into a new model

Base = automap_base()

In [None]:
# reflect the tables

Base.prepare(engine, reflect=True) 

In [None]:
# View all of the classes that automap found

Base.classes.keys() 

In [None]:
# Save references to each table

Measurement = Base.classes.measurement

Station = Base.classes.station

In [None]:
# Create our session (link) from Python to the DB

session = Session(engine) 

# Exploratory Precipitation Analysis

In [None]:
# Find the most recent date in the data set.

first_row_M = session.query(Measurement).first()
first_row_M.__dict__

In [None]:
first_row_S = session.query(Station).first()
first_row_S.__dict__

In [None]:
def convert_from_iso(obj):    
    return dt.date.fromisoformat(obj)
        
def convert_to_iso(obj):    
    return dt.date.toisoformat(obj)     
        
    

In [None]:
dates = session.query(Measurement.date)

# print(dates.all())

max_date = convert_from_iso(max(dates)[0])

print(max_date)

print(f"MAX: {max(dates)[0]}")
print(f"MIN: {min(dates)[0]}")

In [None]:
# Design a query to retrieve the last 12 months of precipitation data and plot the results. 
# Starting from the most recent data point in the database. 

from dateutil.relativedelta import relativedelta                 # thx to Tom! 

last_12_months = max_date - relativedelta(months=12)

print(last_12_months)

# Calculate the date one year from the last date in data set.

plus_one_yr = max_date + relativedelta(years=1)

print(plus_one_yr)

In [None]:
# Perform a query to retrieve the data and precipitation scores

precip = session.query(Measurement.prcp, Measurement.date).filter(Measurement.date >= last_12_months).all()

# print(precip)

# Save the query results as a Pandas DataFrame and set the index to the date column

df = pd.DataFrame(data=precip)

df = df.set_index(keys=df["date"], drop=True)

df = df.drop(["date"], axis=1)

# Sort the dataframe by date

df = df.sort_index(axis=0)

df

In [None]:
# Use Pandas Plotting with Matplotlib to plot the data

df.plot(figsize=(18, 6), rot=90, xlabel="Date", ylabel="Inches")

In [None]:
# Use Pandas to calcualte the summary statistics for the precipitation data

df.describe()

# Exploratory Station Analysis

In [None]:
# Design a query to calculate the total number stations in the dataset

stations1 = session.query(Station.station).distinct().count()
print(stations1)

In [None]:
stations2 = session.query(Measurement.station).distinct().count()
print(stations2)

In [None]:
station_lst = session.query(Station.station).distinct().all()
print(station_lst)

new_lst = []

for station in station_lst:
    new_lst.append(station[0])
    
print(new_lst)

In [None]:
# Design a query to find the most active stations (i.e. what stations have the most rows?)
# List the stations and the counts in descending order.

newer_lst = []

for station in new_lst:
    cnt = session.query(Measurement.station).filter(Measurement.station == station).count()
    print(f"Station: {station}")
    print(f"Row Count: {cnt}\n")
    newer_lst.append((cnt, station))
    
newer_lst = sorted(newer_lst, reverse=True)
print(f"\n\n{newer_lst}")
    
    

In [None]:
# Using the most active station id from the previous query, calculate the lowest, highest, and average temperature.

most_active_station = newer_lst[0][1]

print(most_active_station)


sel = [Measurement.tobs,
       func.min(Measurement.tobs),
       func.max(Measurement.tobs),
       func.avg(Measurement.tobs)
       ]


results = session.query(*sel).filter(Measurement.station == most_active_station).first()

print(results)            



In [None]:
# Using the most active station id
# Query the last 12 months of temperature observation data for this station and plot the results as a histogram

dates = session.query(Measurement.date, Measurement.tobs).filter(Measurement.station == most_active_station)

# print(dates.all())

max_date = convert_from_iso(max(dates)[0])

# print(max_date)

last_12_months = max_date - relativedelta(months=12)

# print(last_12_months)

results = dates.filter(Measurement.date >= last_12_months).all()

# print(results)

df = pd.DataFrame(data=results)

df = df.set_index(keys="date")

df.plot.hist(bins=12)
plt.xlabel("Temp (F)")
plt.ylabel("Number of Observations")
plt.figure(figsize=(8,6))

In [None]:
# Calculate the date one year from the last date in data set.

plus_one_yr = max_date + relativedelta(years=1)

print(plus_one_yr)

# Close session

In [None]:
# Close Session
session.close()