In [1]:
import psycopg2

In [2]:
# Connect to udacity database
conn = psycopg2.connect(dbname='udacity',
                        user='admin',
                        password='admin',
                        host='127.0.0.1')
cur = conn.cursor()
conn.set_session(autocommit=True)

In [3]:
# Create table song_length and insert rows
cur.execute("CREATE TABLE IF NOT EXISTS song_length (song_id int, song_length int);")
cur.execute("INSERT INTO song_length (song_id, song_length) \
             VALUES (%s, %s)", \
             (1, 163))
cur.execute("INSERT INTO song_length (song_id, song_length) \
             VALUES (%s, %s)", \
             (2, 137))  
cur.execute("INSERT INTO song_length (song_id, song_length) \
             VALUES (%s, %s)", \
             (3, 145))
cur.execute("INSERT INTO song_length (song_id, song_length) \
             VALUES (%s, %s)", \
             (4, 240))
cur.execute("INSERT INTO song_length (song_id, song_length) \
             VALUES (%s, %s)", \
             (5, 227))

In [4]:
# In order to create denormalized table in order to run fast queries on it
# we need to think about the queries themselves when creating such a table like:
# a) I want a list of all my songs, with album name, artist and length
# b) I want to know the total lenght of an album

# a) I want a list of all my songs, with album name, artist and length
cur.execute('''
CREATE TABLE IF NOT EXISTS song_library_denormalized AS (
    select
        song_library.song_id
        ,song_library.song_name
        ,album_library.album_name
        ,album_library.year
        ,artist_library.artist_name
        ,song_length.song_length
    from song_library
    join album_library using (album_id)
    join artist_library using (artist_id)
    join song_length using (song_id)
    )
''')

In [5]:
# Out data is duplicated across rows but that's okay. This is the point of denormalized tables
# We want users to query our data super fast
cur.execute('SELECT * from song_library_denormalized')
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Michelle', 'Rubber Soul', 1965, 'The Beatles', 163)
(2, 'Think For Yourself', 'Rubber Soul', 1965, 'The Beatles', 137)
(3, 'In My Life', 'Rubber Soul', 1965, 'The Beatles', 145)
(4, 'Let It Be', 'Let It Be', 1970, 'The Beatles', 240)
(5, 'Across the Universe', 'Let It Be', 1970, 'The Beatles', 227)


In [6]:
# b) I want to know the total lenght of an album
cur.execute('''
CREATE TABLE IF NOT EXISTS album_total_length AS (
    select
        album_library.album_id
        ,album_library.album_name
        ,sum(song_length.song_length) as album_length
    from album_library
    join song_library using (album_id)
    join song_length using (song_id)
    group by 1,2
    order by album_id
    )
''')
cur.execute('SELECT * from album_total_length')
row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

(1, 'Rubber Soul', 445)
(2, 'Let It Be', 467)


In [7]:
cur.close()
conn.close()