In [88]:
# Import dependencies
import datetime as dt
import numpy as np
import pandas as pd

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, desc
from sqlalchemy.sql import label
from sqlalchemy import and_

from flask import Flask, jsonify

import matplotlib.pyplot as plt

In [89]:
#create engine
engine = create_engine("sqlite:///hawaii.sqlite")
conn = engine.connect()

In [90]:
#prepare and reflect the database
Base = automap_base()
Base.prepare(engine, reflect=True)

In [91]:
#save each table as their own reference classes
Measurement = Base.classes.measurement
Station = Base.classes.station

#conn = engine.connect()

# Create a "Metadata" Layer That Abstracts our SQL Database
# ----------------------------------
#Base.metadata.create_all(engine)

In [92]:
# Create our session (link) from Python to the DB
session = Session(bind = engine)

In [93]:
# Getting the table names for each table
inspector = inspect(engine)
inspector.get_table_names()

['measurement', 'measurements', 'station', 'stations']

In [94]:
# Get a list of column names and types - measurement
columns = inspector.get_columns('measurements')
for c in columns:
    print(c['name'], c["type"])

id INTEGER
station VARCHAR
date VARCHAR
prcp FLOAT
tobos INTEGER


In [95]:
# Get a list of column names and types - station
columns = inspector.get_columns('stations')
for c in columns:
    print(c['name'], c["type"])

id INTEGER
name VARCHAR
station VARCHAR
latitude FLOAT
longitude FLOAT
elevation FLOAT


In [96]:
conn.execute('SELECT * FROM stations LIMIT 5').fetchall()

[]

In [97]:
conn.execute('SELECT * FROM measurements LIMIT 5').fetchall()

[]

## Precipitation Analysis

In [99]:
#query the database for a years worth of precipitation data. Focus on most recent.
precipitation_data = session.query(Measurement.date, Measurement.prcp).filter(Measurement.date >= "2016-08-23").\
    filter(Measurement.date <= "2017-08-23").all()

In [100]:
#set index equal to date
pre_df = pd.DataFrame(precipitation_data)
pre_df.head()

In [101]:
pre_df['date'] = pd.to_datetime(pre_df['date'])

KeyError: 'date'

In [68]:
prec_df = pre_df.set_index('date')
prec_df.head()

KeyError: 'date'

In [67]:
#plot the data
prec_plot = prec_df.plot(figsize = (16, 9), sort_columns=True, rot=45, use_index=True, legend=True, grid=True, color='b')
plt.ylabel('Precipitation', weight='bold')
plt.xlabel('Date', weight='bold')
plt.title("Precipitation in Hawaii from 8-23-2016 to 8-23-2017")
plt.show()

NameError: name 'prec_df' is not defined

In [69]:
#show short statistical analysis for data
prec_df.describe()

NameError: name 'prec_df' is not defined

## Station Analysis

In [None]:
## Start Station Analysis
station_num = session.query(Station.id).count()
station_num

In [None]:
#Design a query to find the most active stations.
#List the stations and observation counts in descending order
#Which station has the highest number of observations?
most_active_station = session.query(Measurements.station, Stations.name, func.count(Measurements.tobs)).\
filter(Measurements.station == Station.station).group_by(Measurement.station).order_by(func.count(Measurement.tobs).desc()).all()

In [None]:
#Waihee 837.5 has the highest number of observations
most_active_station

In [None]:
waihee_tobs = session.query(Measurement.tobs).\
filter(Measurement.station == "USC00519281", Measurement.station == Station.station, Measurement.date >="2016-08-23", Measurement.date <="2017-08-23").\
all()

In [None]:
len(waihee_tobs)

In [None]:
print(waihee_tobs[0][0])

## Temperature Analysis

In [None]:
temperatures = [temp[0] for temp in waihee_tobs]

In [None]:
plt.hist(temperatures, bins=12, color='b')
plt.title("Temperature Observation for Waihee Station (2016-08-23 to 2017)")
plt.ylabel('Frequency', weight='bold')
plt.xlabel('Temperature', weight='bold')
labels = ['tobs']
plt.legend(labels)

In [None]:
def calc_temps(start, end):
    query = session.query(Measurement.tobs).filter(Measurement.date>=start, Measurement.date<=end).all()
    temperatures = [temp[0] for temp in query]
    avg_temp = np.mean(temperatures)
    lowest_temp = min(temperatures)
    highest_temp = max(temperatures)
    

    plt.figure(figsize=(3,5))
    plt.bar(1, avg_temp, yerr=(highest_temp - lowest_temp), tick_label='', color='salmon')
    plt.ylabel("Temperature", weight="bold")
    plt.title("Average Trip Temperature", weight="bold")
    plt.show()

In [None]:
columns = inspector.get_columns('Measurement')
for column in columns:
    print(column['name'])

In [None]:
calc_temps(start="2017-08-07", end="2017-08-23")