In [4]:
## Dependencies
import pandas as pd
import numpy as np
import matplotlib
matplotlib.use('nbagg')
from matplotlib import style
style.use('seaborn')
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(sum(map(ord, "aesthetics")))

import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect, func, and_


In [5]:
# Create an engine for the hawaii.sqlite database
engine = create_engine("sqlite:///hawaii.sqlite", echo=False)

In [6]:
# Use the Inspector to explore the database
inspector = inspect(engine)
inspector.get_table_names()

['station', 'weather']

In [7]:
# Use Inspector to print the column names and types
columns = inspector.get_columns('weather')
for c in columns:
    print(c['name'], c["type"])

id INTEGER
station TEXT
date TEXT
prcp FLOAT
tobs FLOAT


In [8]:
# Use Inspector to print the column names and types
columns = inspector.get_columns('station')
for c in columns:
    print(c['name'], c["type"])

id INTEGER
station TEXT
name TEXT
latitude FLOAT
longitude FLOAT
elevation FLOAT


In [9]:
# Reflect hawaii database table station into its ORM class
Base = automap_base()
Base.prepare(engine, reflect=True)
station = Base.classes.station

In [10]:
# Reflect hawaii database table weather into it's ORM class
Base = automap_base()
Base.prepare(engine, reflect=True)
weather = Base.classes.weather

In [11]:
# Start a session to query the database
session = Session(engine)

In [12]:
# ***************************************************************************
# Queries for climate analysis

In [13]:
# Get a number for count(*) by using scalar()
session.query(func.count(station.id)).scalar()

9

In [14]:
# Get a number for count(*) by using scalar()
session.query(func.count(weather.id)).scalar()

18103

In [15]:
# Station - USC00519397 WAIKIKI 717.2, HI US
s519397 = session.query(weather.date, weather.prcp).\
 filter(and_(weather.date.between('2015-01-01','2015-12-31'), weather.station == 'USC00519397')).\
 order_by(weather.station, weather.date.asc()).all()
s519397

[('2015-01-01', 0.0),
 ('2015-01-02', 0.04),
 ('2015-01-03', 0.86),
 ('2015-01-04', 0.02),
 ('2015-01-05', 0.0),
 ('2015-01-06', 0.02),
 ('2015-01-07', 0.0),
 ('2015-01-08', 0.0),
 ('2015-01-09', 0.0),
 ('2015-01-10', 0.0),
 ('2015-01-11', 0.0),
 ('2015-01-12', 0.0),
 ('2015-01-13', 0.0),
 ('2015-01-14', 0.0),
 ('2015-01-15', 0.0),
 ('2015-01-16', 0.0),
 ('2015-01-17', 0.0),
 ('2015-01-18', 0.0),
 ('2015-01-19', 0.0),
 ('2015-01-20', 0.0),
 ('2015-01-21', 0.0),
 ('2015-01-22', 0.0),
 ('2015-01-23', 0.0),
 ('2015-01-24', 0.2),
 ('2015-01-25', 0.03),
 ('2015-01-26', 0.0),
 ('2015-01-27', 0.0),
 ('2015-01-28', 0.0),
 ('2015-01-29', 0.0),
 ('2015-01-30', 0.0),
 ('2015-01-31', 0.0),
 ('2015-02-01', 0.0),
 ('2015-02-02', 0.0),
 ('2015-02-03', 0.0),
 ('2015-02-04', 0.01),
 ('2015-02-05', 0.19),
 ('2015-02-06', 0.0),
 ('2015-02-07', 0.0),
 ('2015-02-08', 0.0),
 ('2015-02-09', 0.0),
 ('2015-02-10', 0.1),
 ('2015-02-11', 0.0),
 ('2015-02-12', 0.0),
 ('2015-02-13', 0.0),
 ('2015-02-14', 0.08),
 (

In [16]:
# put data into a data frame for charting
s519397_df = pd.DataFrame(s519397) 
print(len(s519397_df.index))
s519397_df.info()

360
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 2 columns):
date    360 non-null object
prcp    360 non-null float64
dtypes: float64(1), object(1)
memory usage: 5.7+ KB


In [17]:
# Convert the string to a datetime object
s519397_df["date"] = pd.to_datetime(s519397_df["date"], format='%Y-%m-%d')
s519397_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 2 columns):
date    360 non-null datetime64[ns]
prcp    360 non-null float64
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.7 KB


In [18]:
# Set the index to the date column for pandas df.plot()
s519397_df = s519397_df.set_index('date')
print(len(s519397_df.index))
s519397_df.info()

360
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 360 entries, 2015-01-01 to 2015-12-31
Data columns (total 1 columns):
prcp    360 non-null float64
dtypes: float64(1)
memory usage: 5.6 KB


In [19]:
# Summary Statistics for Station - USC00519397 WAIKIKI 717.2, HI US
s519397_df["prcp"].max()

1.5700000000000001

In [20]:
# Summary Statistics for Station - USC00519397 WAIKIKI 717.2, HI US
s519397_df["prcp"].min()

0.0

In [21]:
# Summary Statistics for Station - USC00519397 WAIKIKI 717.2, HI US
s519397_df["prcp"].mean()

0.05099999999999995

In [22]:
# Summary Statistics for Station - USC00519397 WAIKIKI 717.2, HI US
s519397_df["prcp"].sum()

18.35999999999998

In [23]:
# Summary Statistics for Station - USC00519397 WAIKIKI 717.2, HI US
s519397_df["prcp"].describe()

count    360.000000
mean       0.051000
std        0.155938
min        0.000000
25%        0.000000
50%        0.000000
75%        0.020000
max        1.570000
Name: prcp, dtype: float64

In [24]:
#import matplotlib.dates as mdates

ax = s519397_df.plot(kind='bar',color='magenta', width=1.5, 
                     xticks=s519397_df.index,title="Station - USC00519397 WAIKIKI 717.2, HI US")
ax.set_xlabel("Dates")
ax.set_ylabel("Precipitation")

plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [None]:
results = session.query(weather.station, weather.date, weather.prcp, weather.tobs).\
    filter(weather.date.between('2015-01-01','2015-12-31')).\
    order_by(weather.station, weather.date.asc()).all()
results

In [None]:
# put data into a data frame for charting
results_df = pd.DataFrame(results) 
results_df.head(5)

In [None]:
# Get an actual number for count(*) by using scalar()
session.query(func.count(station.id)).scalar()

In [None]:
# prcp_df = pd.read_sql_query(results, session.bind)
# prcp_df.head(10)

# stmt = session.query(Emoji).\
#     order_by(Emoji.score.desc()).statement
# df2 = pd.read_sql_query(stmt, session.bind)
# df2.head(10)

# for result in results:
#   print(result.fetchall())

# for record in records:
#   print(record.prcp)

# for record in records:
#   print(record)


