In [1]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import create_engine, MetaData, inspect, func, and_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float

from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session

import pandas as pd
import numpy as np
import os

import datetime as dt

In [2]:
engine = create_engine("sqlite:///hawaii.sqlite")

In [3]:
conn = engine.connect()

In [None]:
conn.close()

# Create stations table

In [4]:
# Use `declarative_base` from SQLAlchemy to model the stations table as an ORM class
Base = declarative_base()

class Stations(Base):
    __tablename__ = 'stations'

    id = Column(Integer, primary_key=True)
    station = Column(String(255))
    name = Column(String(255))
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)
   
    def __repr__(self):
        return f"id={self.id}, name={self.name}"
# More on __repr__: https://stackoverflow.com/questions/1984162/purpose-of-pythons-repr    

In [5]:
# Use `create_all` to create the stations table in the database
Base.metadata.create_all(engine)

In [6]:
new_csv = "clean_stations.csv"

In [7]:
# Load the cleaned csv file into a pandas dataframe
new_df = pd.read_csv(new_csv)

In [8]:
# Use Orient='records' to create a list of data to write
# to_dict() cleans out DataFrame metadata as well
data = new_df.to_dict(orient='records')

In [9]:
# Data is just a list of dictionaries that represent each row of data
print(data[:5])

[{'station': 'USC00519397', 'name': 'WAIKIKI 717.2, HI US', 'latitude': 21.2716, 'longitude': -157.8168, 'elevation': 3.0}, {'station': 'USC00513117', 'name': 'KANEOHE 838.1, HI US', 'latitude': 21.4234, 'longitude': -157.8015, 'elevation': 14.6}, {'station': 'USC00514830', 'name': 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 'latitude': 21.5213, 'longitude': -157.8374, 'elevation': 7.0}, {'station': 'USC00517948', 'name': 'PEARL CITY, HI US', 'latitude': 21.3934, 'longitude': -157.9751, 'elevation': 11.9}, {'station': 'USC00518838', 'name': 'UPPER WAHIAWA 874.3, HI US', 'latitude': 21.4992, 'longitude': -158.0111, 'elevation': 306.6}]


In [10]:
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

In [11]:
# Save the reference to the `stations` table as a variable called `table`
table = sqlalchemy.Table('stations', metadata, autoload=True)

In [12]:
# Use `table.delete()` to remove any pre-existing data.
# Note that this is a convenience function so that you can re-run the example code multiple times.
# You would not likely do this step in production.
conn.execute(table.delete())

<sqlalchemy.engine.result.ResultProxy at 0x25cb2d34048>

In [13]:
data

[{'elevation': 3.0,
  'latitude': 21.2716,
  'longitude': -157.8168,
  'name': 'WAIKIKI 717.2, HI US',
  'station': 'USC00519397'},
 {'elevation': 14.6,
  'latitude': 21.4234,
  'longitude': -157.8015,
  'name': 'KANEOHE 838.1, HI US',
  'station': 'USC00513117'},
 {'elevation': 7.0,
  'latitude': 21.5213,
  'longitude': -157.8374,
  'name': 'KUALOA RANCH HEADQUARTERS 886.9, HI US',
  'station': 'USC00514830'},
 {'elevation': 11.9,
  'latitude': 21.3934,
  'longitude': -157.9751,
  'name': 'PEARL CITY, HI US',
  'station': 'USC00517948'},
 {'elevation': 306.6,
  'latitude': 21.4992,
  'longitude': -158.0111,
  'name': 'UPPER WAHIAWA 874.3, HI US',
  'station': 'USC00518838'},
 {'elevation': 19.5,
  'latitude': 21.33556,
  'longitude': -157.71139,
  'name': 'WAIMANALO EXPERIMENTAL FARM, HI US',
  'station': 'USC00519523'},
 {'elevation': 32.9,
  'latitude': 21.45167,
  'longitude': -157.84888999999998,
  'name': 'WAIHEE 837.5, HI US',
  'station': 'USC00519281'},
 {'elevation': 0.9,
  '

In [14]:
# Use `table.insert()` to insert the data into the table
# The SQL table is populated during this step
conn.execute(table.insert(), data)

<sqlalchemy.engine.result.ResultProxy at 0x25cb4841f60>

In [16]:
# Test that the insert works by fetching the first 5 rows. 
conn.execute("select * from stations limit 5").fetchall()

[(1, 'USC00519397', 'WAIKIKI 717.2, HI US', 21.2716, -157.8168, 3.0),
 (2, 'USC00513117', 'KANEOHE 838.1, HI US', 21.4234, -157.8015, 14.6),
 (3, 'USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 21.5213, -157.8374, 7.0),
 (4, 'USC00517948', 'PEARL CITY, HI US', 21.3934, -157.9751, 11.9),
 (5, 'USC00518838', 'UPPER WAHIAWA 874.3, HI US', 21.4992, -158.0111, 306.6)]

In [None]:
# Create the inspector and connect it to the engine
inspector = inspect(engine)

In [None]:
# Collect the names of tables within the database
inspector.get_table_names()

In [None]:
# Using the inspector to print the column names within the 'dow' table and its types
columns = inspector.get_columns('stations')
for column in columns:
    print(column["name"], column["type"])

In [None]:
#engine = create_engine("sqlite:///measurements.sqlite")

In [None]:
#conn = engine.connect()

# Create measurements table

In [17]:
Base = declarative_base()

class Measurements(Base):
    __tablename__ = 'measurements'

    id = Column(Integer, primary_key=True)
    station = Column(String)
    date = Column(String)
    prcp = Column(Float)
    tobs = Column(Integer)
    

    
    
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [18]:
# Use `create_all` to create the customers table in the database
Base.metadata.create_all(engine)

In [19]:
new_csv = "clean_measurements.csv"

In [20]:
# Load the cleaned csv file into a pandas dataframe
new_df = pd.read_csv(new_csv)

In [21]:
# Use Orient='records' to create a list of data to write
# to_dict() cleans out DataFrame metadata as well
# http://pandas-docs.github.io/pandas-docs-travis/io.html#orient-options
data = new_df.to_dict(orient='records')

In [22]:
# Data is just a list of dictionaries that represent each row of data
print(data[:5])

[{'station': 'USC00519397', 'date': '2010-01-01', 'prcp': 0.08, 'tobs': 65}, {'station': 'USC00519397', 'date': '2010-01-02', 'prcp': 0.0, 'tobs': 63}, {'station': 'USC00519397', 'date': '2010-01-03', 'prcp': 0.0, 'tobs': 74}, {'station': 'USC00519397', 'date': '2010-01-04', 'prcp': 0.0, 'tobs': 76}, {'station': 'USC00519397', 'date': '2010-01-07', 'prcp': 0.06, 'tobs': 70}]


In [23]:
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

In [24]:
# Save the reference to the `measurements` table as a variable called `table`
table = sqlalchemy.Table('measurements', metadata, autoload=True)

In [25]:
# Use `table.delete()` to remove any pre-existing data.
# Note that this is a convenience function so that you can re-run the example code multiple times.
# You would not likely do this step in production.
conn.execute(table.delete())

<sqlalchemy.engine.result.ResultProxy at 0x25cb48cc668>

In [26]:
# Use `table.insert()` to insert the data into the table
# The SQL table is populated during this step
conn.execute(table.insert(), data)

<sqlalchemy.engine.result.ResultProxy at 0x25cb48cc518>

In [27]:
# Test that the insert works by fetching the first 5 rows. 
conn.execute("select * from measurements limit 5").fetchall()

[(1, 'USC00519397', '2010-01-01', 0.08, 65),
 (2, 'USC00519397', '2010-01-02', 0.0, 63),
 (3, 'USC00519397', '2010-01-03', 0.0, 74),
 (4, 'USC00519397', '2010-01-04', 0.0, 76),
 (5, 'USC00519397', '2010-01-07', 0.06, 70)]

In [None]:
conn.close()