In [140]:
# dependencies

import pandas as pd

# additional needed dependencies
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float, Date

In [141]:
# hawaii_measurements csv to df
hw_measurements_df = pd.read_csv("../../Resources/hawaii_measurements.csv")
hw_measurements_df

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.00,63
2,USC00519397,2010-01-03,0.00,74
3,USC00519397,2010-01-04,0.00,76
4,USC00519397,2010-01-06,,73
...,...,...,...,...
19545,USC00516128,2017-08-19,0.09,71
19546,USC00516128,2017-08-20,,78
19547,USC00516128,2017-08-21,0.56,76
19548,USC00516128,2017-08-22,0.50,76


In [142]:
hw_measurements_df.count()

station    19550
date       19550
prcp       18103
tobs       19550
dtype: int64

In [143]:
# Drop NaN values
clean_hw_measurements_df = hw_measurements_df.dropna()
clean_hw_measurements_df

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.00,63
2,USC00519397,2010-01-03,0.00,74
3,USC00519397,2010-01-04,0.00,76
5,USC00519397,2010-01-07,0.06,70
...,...,...,...,...
19543,USC00516128,2017-08-17,0.13,72
19545,USC00516128,2017-08-19,0.09,71
19547,USC00516128,2017-08-21,0.56,76
19548,USC00516128,2017-08-22,0.50,76


In [144]:
# Check
clean_hw_measurements_df.count()

station    18103
date       18103
prcp       18103
tobs       18103
dtype: int64

In [145]:
# reset index without adding old one
clean_hw_measurements_df = clean_hw_measurements_df.reset_index(drop=True)
clean_hw_measurements_df

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.00,63
2,USC00519397,2010-01-03,0.00,74
3,USC00519397,2010-01-04,0.00,76
4,USC00519397,2010-01-07,0.06,70
...,...,...,...,...
18098,USC00516128,2017-08-17,0.13,72
18099,USC00516128,2017-08-19,0.09,71
18100,USC00516128,2017-08-21,0.56,76
18101,USC00516128,2017-08-22,0.50,76


In [146]:
# Save to a new csv clean file avoiding duplicating index
clean_hw_measurements_df.to_csv("data/clean_hw_measurements.csv", index=False)

In [147]:
# Repeat process with hawaii stations csv file
hw_stations_df = pd.read_csv("../../Resources/hawaii_stations.csv")
hw_stations_df

Unnamed: 0,station,name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [148]:
# we see is a much shorter and perfectly consistant folder, no changes needed. 
# Save file in data as csv

In [149]:
# Save to a new csv clean file avoiding duplicating index
hw_stations_df.to_csv("data/hawaii_stations.csv", index=False)

In [150]:
# Addition inspect
from sqlalchemy import create_engine, func, inspect
#refelctin for checking tables
from sqlalchemy.ext.automap import automap_base

In [151]:
# create engine to hawaii.sqlite
engine = create_engine("sqlite:///hawaii.sqlite")
# conn = engine.connect()

In [152]:
# 
Base = automap_base()
# reflect the tables
# Use the Base class to reflect the database tables
Base.prepare(engine, reflect=True)

# View all of the classes that automap found
Base.classes.keys()

['measurement', 'station']

In [153]:
# Close the session
# session.close()

In [154]:
# Create measurement and station classes to update a SQL database using Python 
class Measurement(Base):
    __tablename__ = "measurement"
    __table_args__ = {'extend_existing': True}
    id = Column(Integer, primary_key = True)
    station = Column(String(255))
    date = Column(String)
    prcp = Column(Float)
    tobs = Column(Float)
    
class Station(Base):
    __tablename__ = "station"
    __table_args__ = {'extend_existing': True}
    id = Column(Integer, primary_key=True)
    station = Column(String(255))
    name = Column(String(255))
    latitude = Column(Float)
    longitude = Column(Float)
    elevation = Column(Float)

In [155]:
# create engine to hawaii.sqlite
engine = create_engine("sqlite:///hawaii_clean.sqlite")
conn = engine.connect()

In [156]:
# Create metadata layer that abstracts SQL database
Base.metadata.create_all(engine)

In [157]:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_sql.html
# 'replace' let drop previous values
clean_hw_measurements_df.to_sql("measurement", engine, if_exists='replace', index=False)

In [158]:
hw_stations_df.to_sql("station", engine, if_exists='replace', index=False)

In [161]:
# Chack content of measurements table
data_m = pd.read_sql('SELECT * FROM measurement',conn)
data_m

Unnamed: 0,station,date,prcp,tobs
0,USC00519397,2010-01-01,0.08,65
1,USC00519397,2010-01-02,0.00,63
2,USC00519397,2010-01-03,0.00,74
3,USC00519397,2010-01-04,0.00,76
4,USC00519397,2010-01-07,0.06,70
...,...,...,...,...
18098,USC00516128,2017-08-17,0.13,72
18099,USC00516128,2017-08-19,0.09,71
18100,USC00516128,2017-08-21,0.56,76
18101,USC00516128,2017-08-22,0.50,76


In [162]:
# Check content of stations table
data_st = pd.read_sql('SELECT * FROM station',conn)
data_st

Unnamed: 0,station,name,latitude,longitude,elevation
0,USC00519397,"WAIKIKI 717.2, HI US",21.2716,-157.8168,3.0
1,USC00513117,"KANEOHE 838.1, HI US",21.4234,-157.8015,14.6
2,USC00514830,"KUALOA RANCH HEADQUARTERS 886.9, HI US",21.5213,-157.8374,7.0
3,USC00517948,"PEARL CITY, HI US",21.3934,-157.9751,11.9
4,USC00518838,"UPPER WAHIAWA 874.3, HI US",21.4992,-158.0111,306.6
5,USC00519523,"WAIMANALO EXPERIMENTAL FARM, HI US",21.33556,-157.71139,19.5
6,USC00519281,"WAIHEE 837.5, HI US",21.45167,-157.84889,32.9
7,USC00511918,"HONOLULU OBSERVATORY 702.2, HI US",21.3152,-157.9992,0.9
8,USC00516128,"MANOA LYON ARBO 785.2, HI US",21.3331,-157.8025,152.4


In [163]:
# View all of the classes that automap found
Base.classes.keys()

['measurement', 'station']