In [1]:
# Dependencies
import numpy as np
import pandas as pd

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy, os
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect

from sqlalchemy import extract

In [2]:
# Change the connection to work better with my setup and system
hawaii_path = os.path.join("Resources", "hawaii.sqlite")

# engine = create_engine("sqlite:///hawaii.sqlite")
engine = create_engine(f"sqlite:///{hawaii_path}")

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

# Save references to each table
Measurement = Base.classes.measurement
Station = Base.classes.station

# Create our session (link) from Python to the DB
session = Session(engine)

In [3]:
inspector = inspect(engine)
inspector.get_table_names()

['measurement', 'station']

In [4]:
columns = inspector.get_columns('measurement')
for column in columns:
    print(column)

{'name': 'id', 'type': INTEGER(), 'nullable': False, 'default': None, 'autoincrement': 'auto', 'primary_key': 1}
{'name': 'station', 'type': TEXT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}
{'name': 'date', 'type': TEXT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}
{'name': 'prcp', 'type': FLOAT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}
{'name': 'tobs', 'type': FLOAT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}


In [17]:
station_columns = inspector.get_columns('station')
for column in station_columns:
    print(column)

{'name': 'id', 'type': INTEGER(), 'nullable': False, 'default': None, 'autoincrement': 'auto', 'primary_key': 1}
{'name': 'station', 'type': TEXT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}
{'name': 'name', 'type': TEXT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}
{'name': 'latitude', 'type': FLOAT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}
{'name': 'longitude', 'type': FLOAT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}
{'name': 'elevation', 'type': FLOAT(), 'nullable': True, 'default': None, 'autoincrement': 'auto', 'primary_key': 0}


In [19]:
# Stations
weather_stations = session.query(Station.name).distinct()
for station in weather_stations:
    print(station)

('WAIKIKI 717.2, HI US',)
('KANEOHE 838.1, HI US',)
('KUALOA RANCH HEADQUARTERS 886.9, HI US',)
('PEARL CITY, HI US',)
('UPPER WAHIAWA 874.3, HI US',)
('WAIMANALO EXPERIMENTAL FARM, HI US',)
('WAIHEE 837.5, HI US',)
('HONOLULU OBSERVATORY 702.2, HI US',)
('MANOA LYON ARBO 785.2, HI US',)


In [5]:
# January Temps
january_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-01-%'))
january_temp_list = [temp[3] for temp in january_temps]
january_temp_df = pd.DataFrame(january_temp_list, columns=['january'])
january_temp_df.describe()

Unnamed: 0,january
count,1727.0
mean,68.726115
std,4.367892
min,54.0
25%,66.0
50%,69.0
75%,72.0
max,81.0


In [6]:
# February Temps
february_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-02-%'))
february_temp_list = [temp[3] for temp in february_temps]
february_temp_df = pd.DataFrame(february_temp_list, columns=['february'])
february_temp_df.describe()

Unnamed: 0,february
count,1610.0
mean,69.442236
std,3.981399
min,56.0
25%,67.0
50%,70.0
75%,72.0
max,83.0


In [7]:
# March Temps
march_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-03-%'))
march_temp_list = [temp[3] for temp in march_temps]
march_temp_df = pd.DataFrame(march_temp_list, columns=['march'])
march_temp_df.describe()

Unnamed: 0,march
count,1693.0
mean,70.059067
std,3.845271
min,56.0
25%,68.0
50%,70.0
75%,73.0
max,82.0


In [8]:
# April Temps
april_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-04-%'))
april_temp_list = [temp[3] for temp in april_temps]
april_temp_df = pd.DataFrame(april_temp_list, columns=['april'])
april_temp_df.describe()

Unnamed: 0,april
count,1713.0
mean,72.357268
std,3.632131
min,53.0
25%,70.0
50%,73.0
75%,75.0
max,84.0


In [9]:
# May Temps
may_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-05-%'))
may_temp_list = [temp[3] for temp in may_temps]
may_temp_df = pd.DataFrame(may_temp_list, columns=['may'])
may_temp_df.describe()

Unnamed: 0,may
count,1733.0
mean,73.6809
std,3.355783
min,63.0
25%,71.0
50%,74.0
75%,76.0
max,87.0


In [10]:
# June Temps
june_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-06-%'))
june_temp_list = [temp[3] for temp in june_temps]
june_temp_df = pd.DataFrame(june_temp_list, columns=['june'])
june_temp_df.describe()

Unnamed: 0,june
count,1700.0
mean,74.944118
std,3.257417
min,64.0
25%,73.0
50%,75.0
75%,77.0
max,85.0


In [11]:
# July Temps
july_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-07-%'))
july_temp_list = [temp[3] for temp in july_temps]
july_temp_df = pd.DataFrame(july_temp_list, columns=['july'])
july_temp_df.describe()

Unnamed: 0,july
count,1711.0
mean,76.082408
std,3.356532
min,61.0
25%,74.0
50%,76.0
75%,78.0
max,87.0


In [12]:
# August Temps
august_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-08-%'))
august_temp_list = [temp[3] for temp in august_temps]
august_temp_df = pd.DataFrame(august_temp_list, columns=['august'])
august_temp_df.describe()

Unnamed: 0,august
count,1622.0
mean,76.412454
std,3.453138
min,65.0
25%,74.0
50%,76.0
75%,79.0
max,87.0


In [13]:
# September Temps
september_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-09-%'))
september_temp_list = [temp[3] for temp in september_temps]
september_temp_df = pd.DataFrame(september_temp_list, columns=['september'])
september_temp_df.describe()

Unnamed: 0,september
count,1480.0
mean,76.164865
std,3.535178
min,64.0
25%,74.0
50%,76.0
75%,79.0
max,87.0


In [14]:
# October Temps
october_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-10-%'))
october_temp_list = [temp[3] for temp in october_temps]
october_temp_df = pd.DataFrame(october_temp_list, columns=['october'])
october_temp_df.describe()

Unnamed: 0,october
count,1556.0
mean,75.391388
std,3.545084
min,64.0
25%,73.0
50%,76.0
75%,78.0
max,86.0


In [15]:
# November Temps
november_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-11-%'))
november_temp_list = [temp[3] for temp in november_temps]
november_temp_df = pd.DataFrame(november_temp_list, columns=['november'])
november_temp_df.describe()

Unnamed: 0,november
count,1488.0
mean,73.252688
std,3.617282
min,61.0
25%,71.0
50%,74.0
75%,76.0
max,83.0


In [16]:
# December Temps
december_temps = session.query(Measurement.id, Measurement.station, Measurement.date, Measurement.tobs).filter(Measurement.date.like('%-12-%'))
december_temp_list = [temp[3] for temp in december_temps]
december_temp_df = pd.DataFrame(december_temp_list, columns=['december'])
december_temp_df.describe()

Unnamed: 0,december
count,1517.0
mean,71.041529
std,3.74592
min,56.0
25%,69.0
50%,71.0
75%,74.0
max,83.0
