# Lesson 2 Exercise 1: Creating Normalized Tables

<img src="../../../images/postgre_sql_logo.png" width="250" height="250">

In [5]:
from src.utils.postgres.generic_commands import PostgresCommands

##### Create a connection to the database, get a cursor, and set autocommit to true

In [2]:
postgres = PostgresCommands()

PostgresCommands class initiated


In [3]:
postgres.connect(host='localhost', port='5432', database_name='nano_data_engineering_db', user='postgres', password='####', autocommit=True)

Connection established with nano_data_engineering_db


##### Let's imagine we have a table called Music Store

`Table Name: music_store` \
`column 0: Transaction Id` \
`column 1: Customer Name` \
`column 2: Cashier Name` \
`column 3: Year` \
`column 4: Albums Purchased` 

<img src="../../..//images/non_normalized_table.png" width="650" height="650">

##### Generate table

In [None]:
postgres.create_table(table_name='music_store', schema='(transaction_id int, customer_name varchar, cashier_name varchar, year int, albums_purchased text[])')

In [8]:
postgres.insert_rows(table_name='music_store', 
                     columns='(transaction_id, customer_name, cashier_name, year, albums_purchased)',
                     rows=[
                           (1, 'Amanda', 'Sam', 2000, ['Rubber Soul', 'Let it Be']),
                           (2, 'Toby', 'Sam', 2000, ['My Generation']),
                           (3, 'Max', 'Bob', 2018, ['Meet the Beatles', 'Help!'])
                          ]
                    )

In [9]:
postgres.print_rows(table_name='music_store')

(1, 'Amanda', 'Sam', 2000, ['Rubber Soul', 'Let it Be'])
(2, 'Toby', 'Sam', 2000, ['My Generation'])
(3, 'Max', 'Bob', 2018, ['Meet the Beatles', 'Help!'])


#### Moving to 1st Normal Form (1NF)

In [10]:
postgres.create_table(table_name='music_store_2', schema='(transaction_id int, customer_name varchar, cashier_name varchar, year int, album_purchased varchar)')

Table named music_store_2 created


In [11]:
postgres.insert_rows(table_name='music_store_2', 
                     columns='(transaction_id, customer_name, cashier_name, year, album_purchased)',
                     rows=[
                           (1, 'Amanda', 'Sam', 2000, 'Rubber Soul'),
                           (1, 'Amanda', 'Sam', 2000, 'Let it Be'),
                           (2, 'Toby', 'Sam', 2000, 'My Generation'),
                           (3, 'Max', 'Bob', 2018, 'Meet the Beatles'),
                           (3, 'Max', 'Bob', 2018, 'Help!')
                          ]
                    )

In [4]:
postgres.print_rows(table_name='music_store_2')

(1, 'Amanda', 'Sam', 2000, 'Rubber Soul')
(1, 'Amanda', 'Sam', 2000, 'Let it Be')
(2, 'Toby', 'Sam', 2000, 'My Generation')
(3, 'Max', 'Bob', 2018, 'Meet the Beatles')
(3, 'Max', 'Bob', 2018, 'Help!')


<img src="../../../images/normalized_1nf.png" width="650" height="650">

#### Moving to 2nd Normal Form (2NF)
You have now moved the data into 1NF, which is the first step in moving to 2nd Normal Form. The table is not yet in 2nd Normal Form. While each of the records in the table is unique, our Primary key (transaction id) is not unique. 

Therefore we need to break up the table into two tables, transactions and albums sold

`Table Name: transactions` \
`column 0: Transaction Id` \
`column 1: Customer Name` \
`column 2: Cashier Name` \
`column 3: Year` \
`column 4: Albums Purchased` 

`Table Name: albums` \
`column 0: Album Id` \
`column 1: Transaction Id` \
`column 2: Album Purchased`

<img src="../../../images/normalized_2nf.png" width="650" height="650">

In [5]:
postgres.create_table(table_name='transactions', schema='(transaction_id int, customer_name varchar, cashier_name varchar, year int)')
postgres.insert_rows(table_name='transactions', columns='(transaction_id, customer_name, cashier_name, year)', 
                     rows=[
                           (1, 'Amanda', 'Sam', 2000),
                           (2, 'Toby', 'Sam', 2000),
                           (3, 'Max', 'Bob', 2018)
                          ]
                    )
postgres.print_rows(table_name='transactions')

Table named transactions created
(1, 'Amanda', 'Sam', 2000)
(2, 'Toby', 'Sam', 2000)
(3, 'Max', 'Bob', 2018)


In [6]:
postgres.create_table(table_name='albums', schema='(album_id int, transaction_id int, album_purchased varchar)')
postgres.insert_rows(table_name='albums', columns='(album_id, transaction_id, album_purchased)', 
                     rows=[
                           (1, 1, 'Rubber Soul'),
                           (2, 1, 'Let it Be'),
                           (3, 2, 'My Generation'),
                           (4, 3, 'Meet the Beatles'),
                           (5, 3, 'Help!')
                          ]
                    )
postgres.print_rows(table_name='albums')

Table named albums created
(1, 1, 'Rubber Soul')
(2, 1, 'Let it Be')
(3, 2, 'My Generation')
(4, 3, 'Meet the Beatles')
(5, 3, 'Help!')


##### Join these tables to get all the information in the original first table

In [4]:
postgres.custom_query(query='SELECT * FROM transactions JOIN albums USING (transaction_id)')

[(1, 'Amanda', 'Sam', 2000, 1, 'Rubber Soul'),
 (1, 'Amanda', 'Sam', 2000, 2, 'Let it Be'),
 (2, 'Toby', 'Sam', 2000, 3, 'My Generation'),
 (3, 'Max', 'Bob', 2018, 4, 'Meet the Beatles'),
 (3, 'Max', 'Bob', 2018, 5, 'Help!')]

#### Moving to 3rd Normal Form (3NF)

`Table Name: transactions` \
`column 0: Transaction Id` \
`column 1: Customer Name` \
`column 2: Cashier Id` \
`column 3: Year` \
`column 4: Albums Purchased` 

`Table Name: albums` \
`column 0: Album Id` \
`column 1: Transaction Id` \
`column 2: Album Purchased`

`Table Name: employees` \
`column 0: Employee Id` \
`column 1: Cashier Name` 

<img src="../../../images/normalized_3nf.png" width="650" height="650">

In [8]:
postgres.create_table(table_name='employees', schema='(employee_id int, cashier_name varchar)')
postgres.insert_rows(table_name='employees', columns='(employee_id, cashier_name)', 
                     rows=[
                           (1, 'Sam'),
                           (2, 'Bob'),
                          ]
                    )
postgres.print_rows(table_name='employees')

Table named employees created
(1, 'Sam')
(2, 'Bob')


In [9]:
postgres.drop_table(table_name='transactions')

Dropped table transactions


In [10]:
postgres.create_table(table_name='transactions', schema='(transaction_id int, customer_name varchar, cashier_id int, year int)')
postgres.insert_rows(table_name='transactions', columns='(transaction_id, customer_name, cashier_id, year)', 
                     rows=[
                           (1, 'Amanda', 1, 2000),
                           (2, 'Toby', 1, 2000),
                           (3, 'Max', 2, 2018)
                          ]
                    )
postgres.print_rows(table_name='transactions')

Table named transactions created
(1, 'Amanda', 1, 2000)
(2, 'Toby', 1, 2000)
(3, 'Max', 2, 2018)


##### Join these 3 tables so we can get all the information we had in our first table

In [11]:
postgres.custom_query(query='SELECT * FROM transactions JOIN albums USING (transaction_id) JOIN employees ON transactions.cashier_id = employees.employee_id')

[(1, 'Amanda', 1, 2000, 1, 'Rubber Soul', 1, 'Sam'),
 (1, 'Amanda', 1, 2000, 2, 'Let it Be', 1, 'Sam'),
 (2, 'Toby', 1, 2000, 3, 'My Generation', 1, 'Sam'),
 (3, 'Max', 2, 2018, 4, 'Meet the Beatles', 2, 'Bob'),
 (3, 'Max', 2, 2018, 5, 'Help!', 2, 'Bob')]

#### Finally close your cursor and connection

In [4]:
postgres.close_connection()

Closed cursor
Closed connection
