# Lesson 2 Exercise 3: Creating Fact and Dimension Tables with Star Schema

<img src="../../../images/postgre_sql_logo.png" width="250" height="250">

In [3]:
from src.utils.postgres.generic_commands import PostgresCommands

##### Create a connection to the database, get a cursor, and set autocommit to true

In [4]:
postgres = PostgresCommands()

PostgresCommands class initiated


In [6]:
postgres.connect(host='localhost', port='5432', database_name='nano_data_engineering_db', user='postgres', password='###', autocommit=True)

Connection established with nano_data_engineering_db


##### Imagine you work at an online Music Store. There will be many tables in our database, but let's just focus on 4 tables around customer purchases. 

<img src="../../../images/star_schema.png" width="750" height="750">

##### From this representation you can start to see the makings of a "STAR". You will have one fact table (the center of the star) and 3  dimension tables that are coming from it.

##### Create the Fact table and insert the data into the table

In [18]:
postgres.create_table(table_name='customer_transactions', schema='(customer_id int, store_id int, spent float)')

postgres.insert_rows(table_name='customer_transactions', 
                     columns='(customer_id, store_id, spent)',
                     rows=[
                           (1, 1, 20.50),
                           (2, 1, 35.21)
                          ]
                    )

postgres.print_rows(table_name='customer_transactions')

Table named customer_transactions created
(1, 1, 20.5)
(2, 1, 35.21)


##### Create the Dimension tables and insert the data into the table

In [28]:
# Customer
postgres.create_table(table_name='customer', schema='(customer_id int, name text, rewards text)')

postgres.insert_rows(table_name='customer', 
                     columns='(customer_id, name, rewards)',
                     rows=[
                           (1, 'Amanda', 'Y'),
                           (2, 'Toby', 'N')
                          ]
                    )

postgres.print_rows(table_name='customer')

# Items purchased
postgres.create_table(table_name='items_purchased', schema='(customer_id int, item_number int, item_name text)')

postgres.insert_rows(table_name='items_purchased', 
                     columns='(customer_id, item_number, item_name)',
                     rows=[
                           (1, 1, 'Rubber Soul'),
                           (2, 3, 'Let It Be')
                          ]
                    )

postgres.print_rows(table_name='items_purchased')

# Store
postgres.create_table(table_name='store', schema='(store_id int, state text)')

postgres.insert_rows(table_name='store', 
                     columns='(store_id, state)',
                     rows=[
                           (1, 'CA'),
                           (2, 'WA')
                          ]
                    )

postgres.print_rows(table_name='store')

Table named customer created
(1, 'Amanda', 'Y')
(2, 'Toby', 'N')
Table named items_purchased created
(1, 1, 'Rubber Soul')
(2, 3, 'Let It Be')
Table named store created
(1, 'CA')
(2, 'WA')


### Now run the following queries on this data easily because of utilizing the Fact/ Dimension and Star Schema
 
##### Query 1: Find all the customers that spent more than 30 dollars, who are they, which store they bought it from, location of the store, what they bought and if they are a rewards member.


In [35]:
postgres.custom_query(query="""
                            SELECT 
                                name,
                                store_id,
                                state,
                                item_name,
                                rewards
                            FROM customer_transactions
                            LEFT JOIN customer
                            USING (customer_id)
                            LEFT JOIN items_purchased
                            USING (customer_id)
                            LEFT JOIN store
                            USING (store_id)
                            WHERE spent > 30
                            """
                     )

[('Toby', 1, 'CA', 'Let It Be', 'N')]

##### Query 2: How much did Customer 2 spend?

In [36]:
postgres.custom_query(query="""
                            SELECT 
                                customer_id,
                                SUM(spent) AS spent
                            FROM customer_transactions
                            WHERE customer_id = 2
                            GROUP BY 1
                            """
                     )

[(2, 35.21)]

##### And finally close your cursor and connection. 

In [37]:
postgres.close_connection()

Closed cursor
Closed connection
