In [50]:
# Dependencies
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import csv
import os

# Imports the method used for connecting to DBs
from sqlalchemy import create_engine, MetaData

# Imports the methods needed to abstract classes into tables
from sqlalchemy.ext.declarative import declarative_base

# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Float


In [51]:
# Read CSVs
stationDf = pd.read_csv("hawaii_stations.csv")
measurementsDf = pd.read_csv("hawaii_measurements.csv")
measurementsDf.head()

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.0,63
2,USC00519397,2010-01-03,0.0,74
3,USC00519397,2010-01-04,0.0,76
4,USC00519397,2010-01-06,,73


In [52]:
measurementsCleanDf = measurementsDf.dropna()
measurementsCleanDf = measurementsCleanDf.reset_index()

stationDf.to_csv("clean_hawaii_stations.csv",index = False)
measurementsCleanDf.to_csv("clean_hawaii_measurements.csv", index = False)


In [67]:
# Database creation
import sqlalchemy
# Imports the method used for connecting to DBs
from sqlalchemy import create_engine

# Imports the methods needed to abstract classes into tables
from sqlalchemy.ext.declarative import declarative_base

# Allow us to declare column types
from sqlalchemy import Column, Integer, String, Float, Text

In [68]:
# Sets an object to utilize the default declarative base in SQL Alchemy
Base = declarative_base()

In [69]:
# Create an engine to a SQLite database file called `hawaii.sqlite`
engine = create_engine("sqlite:///hawaii.sqlite")

In [70]:
# Create a connection to the engine called `conn`
conn = engine.connect()

In [71]:
# Creating two classes - Station and Meausrement
class Station (Base):
    __tablename__ = 'Station'

    id = Column(Integer, primary_key=True)
    station = Column(String)
    name = Column(String)
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)
    
    def __repr__(self):
        return f"id={self.id}, name={self.name}"
    
class Measurement (Base):
    __tablename__ = 'Measurement'

    id = Column(Integer, primary_key=True)
    station = Column(String)
    date = Column(String)
    prcp = Column(Float)
    tobs = Column(Float)
   
    
    def __repr__(self):
        return f"id={self.id}, name={self.name}"

In [72]:
# Use `create_all` to create the above tables in the database
Base.metadata.create_all(engine)

In [73]:
# loading cleaned CSVs into dataframes
cleanStationDf = pd.read_csv("clean_hawaii_stations.csv")
stationData = cleanStationDf.to_dict(orient='records')

cleanMeasurementDf =pd.read_csv("clean_hawaii_measurements.csv")
measurementData = cleanMeasurementDf.to_dict(orient='records')
print(measurementData[:50])

[{'index': 0, 'station': 'USC00519397', 'date': '2010-01-01', 'prcp': 0.08, 'tobs': 65}, {'index': 1, 'station': 'USC00519397', 'date': '2010-01-02', 'prcp': 0.0, 'tobs': 63}, {'index': 2, 'station': 'USC00519397', 'date': '2010-01-03', 'prcp': 0.0, 'tobs': 74}, {'index': 3, 'station': 'USC00519397', 'date': '2010-01-04', 'prcp': 0.0, 'tobs': 76}, {'index': 5, 'station': 'USC00519397', 'date': '2010-01-07', 'prcp': 0.06, 'tobs': 70}, {'index': 6, 'station': 'USC00519397', 'date': '2010-01-08', 'prcp': 0.0, 'tobs': 64}, {'index': 7, 'station': 'USC00519397', 'date': '2010-01-09', 'prcp': 0.0, 'tobs': 68}, {'index': 8, 'station': 'USC00519397', 'date': '2010-01-10', 'prcp': 0.0, 'tobs': 73}, {'index': 9, 'station': 'USC00519397', 'date': '2010-01-11', 'prcp': 0.01, 'tobs': 64}, {'index': 10, 'station': 'USC00519397', 'date': '2010-01-12', 'prcp': 0.0, 'tobs': 61}, {'index': 11, 'station': 'USC00519397', 'date': '2010-01-14', 'prcp': 0.0, 'tobs': 66}, {'index': 12, 'station': 'USC00519397

In [74]:
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

In [75]:
# Save the reference to the station and measurement tables
stationTable = sqlalchemy.Table('Station', metadata, autoload=True)
measurementTable = sqlalchemy.Table('Measurement', metadata, autoload=True)


In [76]:
# Insert the tables. Before you do that delete any pre-exsiting tables
conn.execute(stationTable.delete())
conn.execute(measurementTable.delete())

conn.execute(stationTable.insert(), stationData)
conn.execute(measurementTable.insert(), measurementData)

<sqlalchemy.engine.result.ResultProxy at 0xac3b1ea0f0>

In [77]:
#Testing to make sure that the data transfer is done - Station
conn.execute("select * from Station limit 5").fetchall()

[(1, 'USC00519397', 'WAIKIKI 717.2, HI US', 21.2716, -157.8168, 3.0),
 (2, 'USC00513117', 'KANEOHE 838.1, HI US', 21.4234, -157.8015, 14.6),
 (3, 'USC00514830', 'KUALOA RANCH HEADQUARTERS 886.9, HI US', 21.5213, -157.8374, 7.0),
 (4, 'USC00517948', 'PEARL CITY, HI US', 21.3934, -157.9751, 11.9),
 (5, 'USC00518838', 'UPPER WAHIAWA 874.3, HI US', 21.4992, -158.0111, 306.6)]

In [80]:
#Testing to make sure that the data transfer is done - Station
conn.execute("select prcp from Measurement limit 50").fetchall()

OperationalError: (sqlite3.OperationalError) no such column: prcp [SQL: 'select prcp from Measurement limit 50']