In [2]:
import psycopg2

In [6]:
# Setup udacity database
conn = psycopg2.connect(dbname='postgres',
                        user='admin',
                        password='admin',
                        host='127.0.0.1')
cur = conn.cursor()
conn.set_session(autocommit=True)
cur.execute('CREATE DATABASE udacity')

In [12]:
# Connect to udacity database
conn = psycopg2.connect(dbname='udacity',
                        user='admin',
                        password='admin',
                        host='127.0.0.1')
cur = conn.cursor()
conn.set_session(autocommit=True)

In [13]:
# Create music library table and insert some rows
# text[] is a data type that contains a list of strings
cur.execute("CREATE TABLE IF NOT EXISTS music_library (album_id int, \
                                                       album_name varchar, artist_name varchar, \
                                                       year int, songs text[]);")
cur.execute("INSERT INTO music_library (album_id, album_name, artist_name, year, songs) \
                 VALUES (%s, %s, %s, %s, %s)", \
                 (1, "Rubber Soul", "The Beatles", 1965, ["Michelle", "Think For Yourself", "In My Life"]))
cur.execute("INSERT INTO music_library (album_id, album_name, artist_name, year, songs) \
                 VALUES (%s, %s, %s, %s, %s)", \
                 (2, "Let It Be", "The Beatles", 1970, ["Let It Be", "Across The Universe"]))

In [14]:
# Get the data to 1NF
# - seperate data into tables (we don't need a list of songs as a value in a table)
cur.execute("CREATE TABLE IF NOT EXISTS music_library2 (album_id int, \
                                                           album_name varchar, artist_name varchar, \
                                                           year int, song_name varchar);")
# Insert values where each row is a song
cur.execute("INSERT INTO music_library2 (album_id, album_name, artist_name, year, song_name) \
                 VALUES (%s, %s, %s, %s, %s)", \
                 (1, "Rubber Soul", "The Beatles", 1965, "Michelle"))
cur.execute("INSERT INTO music_library2 (album_id, album_name, artist_name, year, song_name) \
             VALUES (%s, %s, %s, %s, %s)", \
             (1, "Rubber Soul", "The Beatles", 1965, "Think For Yourself"))
cur.execute("INSERT INTO music_library2 (album_id, album_name, artist_name, year, song_name) \
             VALUES (%s, %s, %s, %s, %s)", \
             (1, "Rubber Soul", "The Beatles", 1965, "In My Life"))
cur.execute("INSERT INTO music_library2 (album_id, album_name, artist_name, year, song_name) \
             VALUES (%s, %s, %s, %s, %s)", \
             (2, "Let It Be", "The Beatles", 1970, "Let It Be"))
cur.execute("INSERT INTO music_library2 (album_id, album_name, artist_name, year, song_name) \
             VALUES (%s, %s, %s, %s, %s)", \
             (2, "Let It Be", "The Beatles", 1970, "Across The Universe"))

In [15]:
# We got the data in 1NF, each row is unique and we don't have songs in a list
# However there is a lot of duplicated values like alub_id, alub_name etc.
cur.execute('SELECT * from music_library2')
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Rubber Soul', 'The Beatles', 1965, 'Michelle')
(1, 'Rubber Soul', 'The Beatles', 1965, 'Think For Yourself')
(1, 'Rubber Soul', 'The Beatles', 1965, 'In My Life')
(2, 'Let It Be', 'The Beatles', 1970, 'Let It Be')
(2, 'Let It Be', 'The Beatles', 1970, 'Across The Universe')


In [17]:
# Move data into 2NF - reduce duplicates
# We will split this data further into song_library and album_library where each entity
# will have a primary key
cur.execute("CREATE TABLE IF NOT EXISTS album_library (album_id int, \
                                                       album_name varchar, artist_name varchar, \
                                                       year int);")
cur.execute("CREATE TABLE IF NOT EXISTS song_library (song_id int, album_id int, \
                                                      song_name varchar);")
cur.execute("INSERT INTO album_library (album_id, album_name, artist_name, year) \
             VALUES (%s, %s, %s, %s)", \
             (1, "Rubber Soul", "The Beatles", 1965))
cur.execute("INSERT INTO album_library (album_id, album_name, artist_name, year) \
             VALUES (%s, %s, %s, %s)", \
             (2, "Let It Be", "The Beatles", 1970))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (1, 1, "Michelle"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (2, 1, "Think For Yourself"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (3, 1, "In My Life"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (4, 2, "Let It Be"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (5, 2, "Across the Universe"))

In [18]:
# Print out the results
print("Table: album_library\n")
cur.execute("SELECT * FROM album_library;")
row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()
print("\nTable: song_library\n")
cur.execute("SELECT * FROM song_library;")
row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

Table: album_library

(1, 'Rubber Soul', 'The Beatles', 1965)
(2, 'Let It Be', 'The Beatles', 1970)

Table: song_library

(1, 1, 'Michelle')
(2, 1, 'Think For Yourself')
(3, 1, 'In My Life')
(4, 2, 'Let It Be')
(5, 2, 'Across the Universe')


In [19]:
# Since our data is separated and have primary keys, we can join it together
# to get the same view as in 1NF but now we don't use duplicated data
cur.execute("SELECT * FROM album_library JOIN\
             song_library ON album_library.album_id = song_library.album_id ;")
row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

(1, 'Rubber Soul', 'The Beatles', 1965, 1, 1, 'Michelle')
(1, 'Rubber Soul', 'The Beatles', 1965, 2, 1, 'Think For Yourself')
(1, 'Rubber Soul', 'The Beatles', 1965, 3, 1, 'In My Life')
(2, 'Let It Be', 'The Beatles', 1970, 4, 2, 'Let It Be')
(2, 'Let It Be', 'The Beatles', 1970, 5, 2, 'Across the Universe')


In [26]:
# Moving data to 3NF
# We still have some room to improve. album_library contains a transitive value of a band that is 
# duplicated across rows. We will need to create an artist table. So that The Beatles will exists on their own in that table
# We then don't have to update this data across rows in album_library table

# Create album library
cur.execute("CREATE TABLE IF NOT EXISTS album_library (album_id int, \
                                                       album_name varchar, artist_id int, \
                                                       year int);")
cur.execute("INSERT INTO album_library (album_id, album_name, artist_id, year) \
             VALUES (%s, %s, %s, %s)", \
             (1, "Rubber Soul", 1, 1965))
cur.execute("INSERT INTO album_library (album_id, album_name, artist_id, year) \
             VALUES (%s, %s, %s, %s)", \
             (2, "Let It Be", 1, 1970))
# Create artist library
cur.execute("CREATE TABLE IF NOT EXISTS artist_library (artist_id int, \
                                                       artist_name varchar);")
cur.execute("INSERT INTO artist_library (artist_id, artist_name) \
             VALUES (%s, %s)", \
             (1, "The Beatles"))
# Create song library
cur.execute("CREATE TABLE IF NOT EXISTS song_library (song_id int, album_id int, \
                                                      song_name varchar);")
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (1, 1, "Michelle"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (2, 1, "Think For Yourself"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (3, 1, "In My Life"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (4, 2, "Let It Be"))
cur.execute("INSERT INTO song_library (song_id, album_id, song_name) \
             VALUES (%s, %s, %s)", \
             (5, 2, "Across the Universe"))

In [27]:
# Now that we have reached 3NF, we can do a join
 
cur.execute("SELECT * FROM (artist_library JOIN album_library ON \
                           artist_library.artist_id = album_library.artist_id) JOIN \
                           song_library ON album_library.album_id=song_library.album_id;")

row = cur.fetchone()
while row:
   print(row)
   row = cur.fetchone()

(1, 'The Beatles', 1, 'Rubber Soul', 1, 1965, 1, 1, 'Michelle')
(1, 'The Beatles', 1, 'Rubber Soul', 1, 1965, 2, 1, 'Think For Yourself')
(1, 'The Beatles', 1, 'Rubber Soul', 1, 1965, 3, 1, 'In My Life')
(1, 'The Beatles', 2, 'Let It Be', 1, 1970, 4, 2, 'Let It Be')
(1, 'The Beatles', 2, 'Let It Be', 1, 1970, 5, 2, 'Across the Universe')
