In [19]:
import sqlite3

print(sqlite3.sqlite_version)

3.39.3


**Load the SQLite packgage**

In [20]:
from pandasql import sqldf

def pysqldf(q):
    """this function eliminates the need to include locals/globals all the time"""
    return sqldf(q, globals())

**Connect to the database**

In [21]:
conn = sqlite3.connect('aw_dw.db') #permanent database

In [22]:
print(conn)

<sqlite3.Connection object at 0x00000132B385DC60>


**Create a cursor to execute statement to SQLite.**

In [23]:
cursor = conn.cursor()

In [8]:
cursor

<sqlite3.Cursor at 0x132b3857c00>

# Our Strategy to Build the Warehouse.

- Use pandas dataframes as the source of data to be loaded into warehouse.
- Use SQLDF() to query and extract data from the source dataframes.
- Note: SQLDF() allows you to query dataframes with SQL but there are no tables!!!
- Use the Python SQLite3 library to create the warehouse tables and insert data.

In [10]:
import pandas as pd

dfdate = pd.read_csv('./data/DimDate.csv')
dfdate = dfdate[dfdate['DateKey'] > 20111231]
dfdate.head(2)

Unnamed: 0,DateKey,FullDateAlternateKey,DayNumberOfWeek,EnglishDayNameOfWeek,SpanishDayNameOfWeek,FrenchDayNameOfWeek,DayNumberOfMonth,DayNumberOfYear,WeekNumberOfYear,EnglishMonthName,SpanishMonthName,FrenchMonthName,MonthNumberOfYear,CalendarQuarter,CalendarYear,CalendarSemester,FiscalQuarter,FiscalYear,FiscalSemester
2556,20120101,2012-01-01,1,Sunday,Domingo,Dimanche,1,1,1,January,Enero,Janvier,1,1,2012,1,3,2011,2
2557,20120102,2012-01-02,2,Monday,Lunes,Lundi,2,2,1,January,Enero,Janvier,1,1,2012,1,3,2011,2


**We can use sqldf to query the dataframe with SQL.**

In [11]:
pysqldf('''select * from dfdate limit 3''')

Unnamed: 0,DateKey,FullDateAlternateKey,DayNumberOfWeek,EnglishDayNameOfWeek,SpanishDayNameOfWeek,FrenchDayNameOfWeek,DayNumberOfMonth,DayNumberOfYear,WeekNumberOfYear,EnglishMonthName,SpanishMonthName,FrenchMonthName,MonthNumberOfYear,CalendarQuarter,CalendarYear,CalendarSemester,FiscalQuarter,FiscalYear,FiscalSemester
0,20120101,2012-01-01,1,Sunday,Domingo,Dimanche,1,1,1,January,Enero,Janvier,1,1,2012,1,3,2011,2
1,20120102,2012-01-02,2,Monday,Lunes,Lundi,2,2,1,January,Enero,Janvier,1,1,2012,1,3,2011,2
2,20120103,2012-01-03,3,Tuesday,Martes,Mardi,3,3,1,January,Enero,Janvier,1,1,2012,1,3,2011,2


**To execute more than one statement, use the executescript() method.**

In [12]:
create_dimdate = '''
DROP TABLE IF EXISTS dimdate;

CREATE TABLE dimdate (
    DateKey                INTEGER NOT NULL PRIMARY KEY,
    DateValue              DATE    NOT NULL,
    DayNumberOfWeek        INTEGER NOT NULL,
    DayNameOfWeek          TEXT    NOT NULL,
    DayNumberOfMonth       INTEGER NOT NULL,
    DayNumberOfYear        INTEGER NOT NULL,
    WeekNumberOfYear       INTEGER NOT NULL,
    MonthName              TEXT NOT NULL,
    MonthNumberOfYear      INTEGER NOT NULL,
    CalendarQuarter        INTEGER NOT NULL,
    CalendarYear           INTEGER NOT NULL,
    FiscalQuarter          INTEGER NOT NULL,
    FiscalYear             INTEGER NOT NULL 
);
''' 

In [13]:
cursor.executescript(create_dimdate)

<sqlite3.Cursor at 0x132b3857c00>

#### Use sqldf() to pull just the data you want from the dataframe
and append .to_sql() to write the results to your SQLite database.

In [15]:
pysqldf(''' 
select  DateKey           
       ,FullDateAlternateKey as DateValue         
       ,DayNumberOfWeek   
       ,EnglishDayNameOfWeek as DayNameOfWeek     
       ,DayNumberOfMonth   
       ,DayNumberOfYear   
       ,WeekNumberOfYear  
       ,EnglishMonthName as MonthName         
       ,MonthNumberOfYear 
       ,CalendarQuarter   
       ,CalendarYear      
       ,FiscalQuarter     
       ,FiscalYear   
from dfdate;
''').to_sql('dimdate', 
             con=conn, index=False, 
             if_exists='append')

1096

### SalesTerritory

In [16]:
import pandas as pd

dfsalesterritory = pd.read_csv('./data/DimSalesTerritory.csv')

### Let pandas create the table for us.

In [17]:
pysqldf('''select SalesTerritoryKey, 
         SalesTerritoryRegion, 
         SalesTerritoryCountry, 
         SalesTerritoryGroup from dfsalesterritory;''').to_sql('dimsalesterritory', 
                                                               con=conn, index=False, 
                                                               if_exists='replace')

11

### Create a unique index non the table.

In [18]:
cursor.executescript('''
BEGIN TRANSACTION;
create unique index if not exists idx_salesterritorykey on dimsalesterritory (SalesTerritoryKey);  
commit;
 ''')

<sqlite3.Cursor at 0x132b3857c00>