In [1]:
#%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt

In [2]:
import numpy as np
import pandas as pd
import datetime as dt
from datetime import date

# Reflect Tables into SQLAlchemy ORM

In [3]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine
from sqlalchemy import func 
from sqlalchemy import inspect 

In [4]:
# create engine and inspector to hawaii.sqlite
engine = create_engine("sqlite:///Resources/hawaii.sqlite")
insp = inspect(engine)

# list the tables 
print(insp.get_table_names())

['measurement', 'station']


In [5]:
# list the columns in the measurement table 
m_cols = insp.get_columns('measurement')
for col in m_cols:
    print(col['name'], col['type'])

id INTEGER
station TEXT
date TEXT
prcp FLOAT
tobs FLOAT


In [6]:
# reflect an existing database into a new model
Base = automap_base()
Base.prepare(engine, reflect=True)

# Save references to each table
Measurement = Base.classes.measurement 
Station = Base.classes.station

# Create our session (link) from Python to the DB
session = Session(engine)

# Exploratory Precipitation Analysis

In [7]:
# Find the most recent date in the data set.
end_date_str = session.query(Measurement.date).order_by(Measurement.date.desc()).first()

The data are daily rainfall totals and some kind of temperature, possibly daily max.  
I will import data from all the stations, so the date will not be able to be used as an index.   

In [38]:
# Design a query to retrieve the last 12 months of precipitation data and plot the results. 
# Starting from the most recent data point in the database. 
end_date = date(*map(int, end_date_str[0].split('-')))

# Calculate the date one year from the last date in data set.
start_date = end_date - dt.timedelta(days=7)
print(f"start date = {start_date}, end date = {end_date}")

# Perform a query to retrieve the data and precipitation scores
start_date_str = start_date.strftime("%Y-%m-%d")
precip_df = pd.DataFrame(session.query(Measurement.date, Measurement.station, Measurement.prcp).filter(Measurement.date > start_date_str).all())

# Save the query results as a Pandas DataFrame and set the index to the date column

# Sort the dataframe by date (and station number)  
precip_df = precip_df.sort_values(by=['date', 'station'])

# Use Pandas Plotting with Matplotlib to plot the data
# I will plot out the mean from all the stations as a time series 
mean_precip = precip_df[['date','prcp']].groupby(by='date').mean()
mean_precip


start date = 2017-08-16, end date = 2017-08-23


Unnamed: 0_level_0,prcp
date,Unnamed: 1_level_1
2017-08-17,0.0475
2017-08-18,0.02
2017-08-19,0.03
2017-08-20,0.005
2017-08-21,0.193333
2017-08-22,0.166667
2017-08-23,0.1325


In [13]:
# Use Pandas to calcualte the summary statistics for the precipitation data


# Exploratory Station Analysis

In [14]:
# Design a query to calculate the total number stations in the dataset


In [15]:
# Design a query to find the most active stations (i.e. what stations have the most rows?)
# List the stations and the counts in descending order.


In [16]:
# Using the most active station id from the previous query, calculate the lowest, highest, and average temperature.


In [17]:
# Using the most active station id
# Query the last 12 months of temperature observation data for this station and plot the results as a histogram


# Close session

In [18]:
# Close Session
session.close()