## Create Dependencies

In [1]:
import numpy as np
import pandas as pd
import datetime as dt
import os 

from config import pwd, uname

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func
from sqlalchemy import Column, Integer, String, Float 
from sqlalchemy import desc

## Create Variables

In [2]:
caDataSet = os.path.join('..','DataSets','CA Weed Data.csv')
cenDataSet = os.path.join('..','DataSets','Census Data.csv')
coDataSet = os.path.join('..','DataSets','CO Weed Data.csv')
massDataSet = os.path.join('..','DataSets','Mass Weed Data.csv')
stateDataSet = os.path.join('..','DataSets','State Names and Abbr.csv')


## Establish Database Connections and Get Tables Lists

In [3]:
# create engine to postgres db
postgres = f'postgresql://{uname}:{pwd}@localhost:5432/etl_project'  #path to local db

engine = create_engine(postgres)

In [4]:
# reflect an existing database into a new model
base = automap_base()

# reflect the tables
base.prepare(engine, reflect=True)

In [5]:
# View all of the classes that automap found
base.classes.keys()

['ca_raw', 'co_raw', 'mass_raw', 'states', 'sales_by_qtr', 'census']

In [6]:
# Save references to each table
# adding these for fun and for sanity checks

caRaw = base.classes.ca_raw
coRaw = base.classes.co_raw
massRaw = base.classes.mass_raw
states = base.classes.states
sales = base.classes.sales_by_qtr
census = base.classes.census

In [7]:
# Create our session (link) from Python to the DB
session = Session(bind=engine)

## Exploratory Analysis

In [8]:
# Load data into dataframes
massRawDF = pd.read_csv(massDataSet) 
cenRawDF = pd.read_csv(cenDataSet)

In [9]:
massRawDF.head()

Unnamed: 0,activitysummarydate,total_plantimmaturecount,total_planttrackedcount,total_plantfloweringcount,total_plantvegetativecount,total_plantdestroyedcount,total_plantharvestedcount,total_plantcount,salestotal,total_active_harvestcount,total_active_packagecount,total_plantbatchcount,total_activeproducts,total_activestrains,total_employees
0,6/4/2021 0:00,140747,225685,114894,110791,231460,966667,1423812,1658394000.0,1231,131289,3388,155452,30337,8334
1,6/3/2021 0:00,140747,225685,114894,110791,231460,966667,1423812,1658393000.0,1231,131271,3388,155452,30337,8334
2,6/2/2021 0:00,146669,221316,111723,109593,229462,961421,1412199,1651406000.0,1208,128029,3466,155053,30165,8282
3,6/1/2021 0:00,153497,211654,108947,102707,228692,960539,1400885,1648229000.0,1212,126709,3520,154325,30045,8282
4,5/31/2021 0:00,155021,210406,108882,101524,228595,959964,1398965,1645279000.0,1232,127630,3626,154348,30010,8274


In [10]:
cenRawDF.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,NAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017,RNETMIG2018,RNETMIG2019,RNETMIG2020
0,10,0,0,0,United States,308745538,308758105,309327143,311583481,313877662,...,2.561705,2.745929,2.697816,2.980977,3.317393,3.308478,2.92626,2.208328,1.735857,1.450346
1,20,1,0,0,Northeast Region,55317240,55318414,55380764,55608318,55782661,...,0.951355,0.02291,-0.406301,-0.904675,-2.01199,-2.436197,-1.773545,-2.008364,-3.16071,-3.67449
2,20,2,0,0,Midwest Region,66927001,66929737,66975328,67164092,67348275,...,-0.883842,-0.895516,0.06239,-0.697566,-1.320504,-1.176126,-0.487257,-0.800567,-1.205668,-2.011623
3,20,3,0,0,South Region,114555744,114563042,114869421,116019483,117264196,...,5.218129,5.93444,5.373447,6.236211,7.355913,7.220696,6.272594,5.291755,5.479642,5.743507
4,20,4,0,0,West Region,71945553,71946912,72101630,72791588,73482530,...,2.752027,3.083047,3.183793,4.039593,5.004949,5.284859,4.022332,2.968883,1.822074,1.214309


In [11]:
cenRawDF.describe

<bound method NDFrame.describe of     SUMLEV REGION DIVISION  STATE                  NAME  CENSUS2010POP  \
0       10      0        0      0         United States      308745538   
1       20      1        0      0      Northeast Region       55317240   
2       20      2        0      0        Midwest Region       66927001   
3       20      3        0      0          South Region      114555744   
4       20      4        0      0           West Region       71945553   
5       40      3        6      1               Alabama        4779736   
6       40      4        9      2                Alaska         710231   
7       40      4        8      4               Arizona        6392017   
8       40      3        7      5              Arkansas        2915918   
9       40      4        9      6            California       37253956   
10      40      4        8      8              Colorado        5029196   
11      40      1        1      9           Connecticut        3574097   
12  

## Load Raw Data into Database

In [12]:
connection = engine.connect()
connection.execute( '''TRUNCATE TABLE mass_raw; TRUNCATE TABLE census''' )

connection.close()

In [13]:
cenRawDF.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,NAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,POPESTIMATE2011,POPESTIMATE2012,...,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017,RNETMIG2018,RNETMIG2019,RNETMIG2020
0,10,0,0,0,United States,308745538,308758105,309327143,311583481,313877662,...,2.561705,2.745929,2.697816,2.980977,3.317393,3.308478,2.92626,2.208328,1.735857,1.450346
1,20,1,0,0,Northeast Region,55317240,55318414,55380764,55608318,55782661,...,0.951355,0.02291,-0.406301,-0.904675,-2.01199,-2.436197,-1.773545,-2.008364,-3.16071,-3.67449
2,20,2,0,0,Midwest Region,66927001,66929737,66975328,67164092,67348275,...,-0.883842,-0.895516,0.06239,-0.697566,-1.320504,-1.176126,-0.487257,-0.800567,-1.205668,-2.011623
3,20,3,0,0,South Region,114555744,114563042,114869421,116019483,117264196,...,5.218129,5.93444,5.373447,6.236211,7.355913,7.220696,6.272594,5.291755,5.479642,5.743507
4,20,4,0,0,West Region,71945553,71946912,72101630,72791588,73482530,...,2.752027,3.083047,3.183793,4.039593,5.004949,5.284859,4.022332,2.968883,1.822074,1.214309


In [14]:
massRawDF.to_sql('mass_raw', engine, if_exists='append', index=False)
cenRawDF.to_sql('census_raw', engine, if_exists='append', index=False)

In [15]:
#check mass raw 
session.query(massRaw).count()

956

In [21]:
#check census raw
engine.execute('select count(*) from census_raw').fetchall()

[(57,)]