# Data Warehouse Creation

In [1]:
%run create_tables.py

In [2]:
%run etl.py

# Building connection

In [3]:
import configparser
import psycopg2

In [4]:
config = configparser.ConfigParser()
config.read('dwh.cfg')

conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config['CLUSTER'].values()))
cur = conn.cursor()

# Querying user dimension

In [5]:
%%time
query = """
select * from dim_user limit 10
"""

cur.execute(query)
result: tuple = cur.fetchall()

for r in result:
    print(r)

(61, 'Samuel', 'Gonzalez', 'M', 'free')
(75, 'Joseph', 'Gutierrez', 'M', 'free')
(20, 'Aiden', 'Ramirez', 'M', 'paid')
(77, 'Magdalene', 'Herman', 'F', 'free')
(70, 'Jaleah', 'Hayes', 'F', 'paid')
(15, 'Lily', 'Koch', 'F', 'paid')
(27, 'Carlos', 'Carter', 'M', 'free')
(15, 'Lily', 'Koch', 'F', 'free')
(41, 'Brayden', 'Clark', 'M', 'free')
(45, 'Dominick', 'Norris', 'M', 'free')
CPU times: user 0 ns, sys: 2.49 ms, total: 2.49 ms
Wall time: 205 ms


# Querying song dimension

In [6]:
%%time
query = """
select * from dim_song limit 10
"""

cur.execute(query)
result: tuple = cur.fetchall()

for r in result:
    print(r)

('SOTCYVC12A8C138F87', 'Who Am I?', 'AROQL0S1187FB57CFD', 0, 168.6722)
('SOGTRKA12A8C137D72', 'Somos Gitanos', 'AR2UQQ51187B9AC816', 2001, 222.92853)
('SOTDKGV12AB017E39A', 'Why', 'AR8WV031187FB3F903', 2009, 190.17098)
('SOBMVTW12A6D4F636C', 'Love Theme From "Lady Sings The Blues', 'ARRL7WS1187FB576F1', 0, 185.88689)
('SOCHCGY12A8C13B029', 'Odysseus (1999 Digital Remaster)', 'ARBHPCJ1187B9B0853', 0, 347.08853)
('SONJRUV12AF72A20D7', 'Numb Milleneum', 'ARI8U521187B9A8B97', 2000, 105.09016)
('SOUVTOG12A67ADAE5C', 'Ich vermiss Dich', 'ARJEOGL1187B98E9D4', 2006, 210.72934)
('SOHTGJW12A58A7D7D3', 'The Fundamental Alienation', 'ARLSF8H1187B9A76B0', 2007, 317.36118)
('SOBBJEE12A8C141C76', 'Intensity', 'AR7Z6XC1187FB39800', 2007, 393.482)
('SOYVAAQ12A6D4F8026', 'Sex', 'AR4R0741187FB39AF2', 0, 167.83628)
CPU times: user 1.88 ms, sys: 1.15 ms, total: 3.03 ms
Wall time: 118 ms


# Querying artist dimension

In [7]:
%%time
query = """
select * from dim_artist limit 10
"""

cur.execute(query)
result: tuple = cur.fetchall()

for r in result:
    print(r)

('ARNQGIZ1187B9A7546', 'NOVI FOSILI', '', None, None)
('ARDY1CB1187B9B027D', 'Osvaldo Pugliese', '', None, None)
('ARAPI451187B9B6E6F', 'John Michael Talbot', 'Oklahoma City, OK', 35.472, -97.52033)
('ARUYNK81187FB51B15', 'Atlas Sound', '', None, None)
('AR8CZ1U1187FB4CED1', 'The Be Good Tanyas', 'Vancouver, British Columbia, Cana', 49.26044, -123.11403)
('AR6XXDN1187FB4B01B', 'Chiens De Paille', 'Cannes, France', 43.55326, 7.01325)
('ARMAC4T1187FB3FA4C', 'The Dillinger Escape Plan', 'Morris Plains, NJ', 40.82624, -74.47995)
('ARUMM9B1187FB4584C', 'Reno & Smiley (With Carter & Ralph Stanley)', '', None, None)
('AR4K2P91187B9B2B35', 'Sam Cooke', 'Clarksdale, MS', 34.19451, -90.5651)
('ARNGX901187B9B194F', 'Gorefest', '', None, None)
CPU times: user 1.27 ms, sys: 878 µs, total: 2.15 ms
Wall time: 111 ms


# Querying time dimension

In [8]:
%%time
query = """
select * from dim_time limit 10
"""

cur.execute(query)
result: tuple = cur.fetchall()

for r in result:
    print(r)

(datetime.datetime(2018, 11, 10, 4, 1, 42), 4, 10, 45, 11, 2018, False)
(datetime.datetime(2018, 11, 10, 8, 26, 32), 8, 10, 45, 11, 2018, False)
(datetime.datetime(2018, 11, 3, 1, 4, 33), 1, 3, 44, 11, 2018, False)
(datetime.datetime(2018, 11, 3, 1, 5, 50), 1, 3, 44, 11, 2018, False)
(datetime.datetime(2018, 11, 3, 16, 7, 39), 16, 3, 44, 11, 2018, False)
(datetime.datetime(2018, 11, 3, 16, 10, 32), 16, 3, 44, 11, 2018, False)
(datetime.datetime(2018, 11, 3, 16, 39, 49), 16, 3, 44, 11, 2018, False)
(datetime.datetime(2018, 11, 15, 0, 45, 41), 0, 15, 46, 11, 2018, True)
(datetime.datetime(2018, 11, 15, 6, 10, 33), 6, 15, 46, 11, 2018, True)
(datetime.datetime(2018, 11, 12, 2, 45, 52), 2, 12, 46, 11, 2018, True)
CPU times: user 2.06 ms, sys: 8 µs, total: 2.07 ms
Wall time: 116 ms


# Querying songplay fact

In [9]:
%%time
query = """
select * from fact_songplay limit 10
"""

cur.execute(query)
result: tuple = cur.fetchall()

for r in result:
    print(r)

(257, datetime.datetime(2018, 11, 9, 11, 44, 35), 95, 'paid', 'SOHTKMO12AB01843B0', 'AR5EYTL1187B98EDA0', 276, 'Winston-Salem, NC', '"Mozilla/5.0 (iPhone; CPU iPhone OS 7_1_2 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D257 Safari/9537.53"')
(513, datetime.datetime(2018, 11, 13, 17, 47, 5), 29, 'paid', 'SOJWFXM12A3F1EBE8B', 'AR049S81187B9AE8A5', 486, 'Atlanta-Sandy Springs-Roswell, GA', '"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_4) AppleWebKit/537.78.2 (KHTML, like Gecko) Version/7.0.6 Safari/537.78.2"')
(130, datetime.datetime(2018, 11, 20, 17, 18, 31), 49, 'paid', 'SONQWXY12A81C204D7', 'ARCEXLE1187FB3A93E', 758, 'San Francisco-Oakland-Hayward, CA', 'Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0')
(226, datetime.datetime(2018, 11, 26, 11, 35, 51), 15, 'paid', 'SOLLOSO12AB0184A7A', 'ARVXU2X1187B9AE6D8', 834, 'Chicago-Naperville-Elgin, IL-IN-WI', '"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromi

# Debugging

In [10]:
query = """
select * from stl_load_errors
"""

cur.execute(query)
result: tuple = cur.fetchall()

for r in result:
    print(r)

# Close Connection

In [11]:
conn.close()