In [22]:
%load_ext sql

import pandas as pd
import boto3
import json
import amazon_redshift_cluster as aws

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [23]:
conn_string = aws.Cluster.getConnString()
%sql $conn_string

'Connected: dwhuser@dwh'

## Denormalize data (= join all tables)

In [24]:
%%sql

SELECT A.start_time, E.week, E.month, E.year
    , A.user_id, B.first_name, B.last_name
    , A.song_id, C.title, C.duration
    , A.artist_id, D.name AS artist_name
FROM songplays AS A
INNER JOIN users AS B ON A.user_id = B.user_id
INNER JOIN songs AS C ON A.song_id = C.song_id
INNER JOIN artists AS D ON A.artist_id = D.artist_id
INNER JOIN time AS E ON A.start_time = E.start_time
ORDER BY A.start_time
LIMIT 10;


 * postgresql://dwhuser:***@dwhcluster.czw6kev3ol8q.us-west-2.redshift.amazonaws.com:5439/dwh
10 rows affected.


start_time,week,month,year,user_id,first_name,last_name,song_id,title,duration,artist_id,artist_name
2018-11-01 21:11:13,44,11,2018,8,Kaylee,Summers,SOEIQUY12AF72A086A,Eriatarka,380,ARHUC691187B9AD27F,The Mars Volta
2018-11-02 16:35:00,44,11,2018,50,Ava,Robinson,SOBONKR12A58A7A7E0,You're The One,239,AR5E44Z1187B9A1D74,Dwight Yoakam
2018-11-02 17:31:45,44,11,2018,10,Sylvie,Cruz,SOHTKMO12AB01843B0,Catch You Baby (Steve Pitron & Max Sanna Radio Edit),181,AR5EYTL1187B98EDA0,Lonnie Gordon
2018-11-02 18:02:42,44,11,2018,50,Ava,Robinson,SOBONKR12A58A7A7E0,You're The One,239,AR5E44Z1187B9A1D74,Dwight Yoakam
2018-11-02 18:36:53,44,11,2018,71,Ayleen,Wise,SOBBZPM12AB017DF4B,Pop Is Dead,130,ARH6W4X1187B99274F,Radiohead
2018-11-03 01:12:26,44,11,2018,53,Celeste,Williams,SOSELMV12A6D4FCF5A,Valerie,229,ARWD25M1187FB4C563,Amy Winehouse
2018-11-03 17:59:01,44,11,2018,15,Lily,Koch,SOHTKMO12AB01843B0,Catch You Baby (Steve Pitron & Max Sanna Radio Edit),181,AR5EYTL1187B98EDA0,Lonnie Gordon
2018-11-03 18:19:10,44,11,2018,95,Sara,Johnson,SOPANEB12A8C13E81E,God Smack,230,ARSW5F51187FB4CFC9,Alice In Chains
2018-11-03 19:33:39,44,11,2018,95,Sara,Johnson,SOYTFSY12A6D4FD84E,Born To Be Wild,211,ARRFSMX1187FB39B03,Steppenwolf
2018-11-03 21:14:28,44,11,2018,49,Chloe,Cuevas,SOFVOQL12A6D4F7456,The Boy With The Thorn In His Side,196,ARPN0Y61187B9ABAA0,The Smiths


## Top 10 users by music streaming

In [25]:
%%sql

SELECT A.user_id
    , MIN(B.first_name + ' ' + B.last_name) AS name
    , MIN(B.gender) AS gender
    , MIN(B.level) AS level
    , COUNT(*) AS n
FROM songplays AS A
INNER JOIN users AS B ON A.user_id = B.user_id
GROUP BY A.user_id
ORDER BY n DESC
LIMIT 10;


 * postgresql://dwhuser:***@dwhcluster.czw6kev3ol8q.us-west-2.redshift.amazonaws.com:5439/dwh
10 rows affected.


user_id,name,gender,level,n
49,Chloe Cuevas,F,paid,42
97,Kate Harrell,F,paid,32
80,Tegan Levine,F,paid,31
44,Aleena Kirby,F,paid,21
73,Jacob Klein,M,paid,18
88,Mohammad Rodriguez,M,paid,17
15,Lily Koch,F,paid,15
29,Jacqueline Lynch,F,paid,13
24,Layla Griffin,F,paid,13
36,Matthew Jones,M,paid,13


## Top 10 songs

In [29]:
%%sql

SELECT A.song_id
    , MIN(B.title) AS title
    , MIN(B.duration) AS duration
    , MIN(C.name) AS artist_name
    , COUNT(*) AS n
FROM songplays AS A
INNER JOIN songs AS B ON A.song_id = B.song_id
INNER JOIN artists AS C ON B.artist_id = C.artist_id
GROUP BY A.song_id
ORDER BY n DESC
LIMIT 10;

 * postgresql://dwhuser:***@dwhcluster.czw6kev3ol8q.us-west-2.redshift.amazonaws.com:5439/dwh
10 rows affected.


song_id,title,duration,artist_name,n
SOBONKR12A58A7A7E0,You're The One,239,Dwight Yoakam,37
SOUNZHU12A8AE47481,I CAN'T GET STARTED,497,Ron Carter,9
SOHTKMO12AB01843B0,Catch You Baby (Steve Pitron & Max Sanna Radio Edit),181,Lonnie Gordon,9
SOULTKQ12AB018A183,Nothin' On You [feat. Bruno Mars] (Album Version),269,B.o.B,8
SOLZOBD12AB0185720,Hey Daddy (Daddy's Home),224,Usher,6
SOARUPP12AB01842E0,Up Up & Away,227,Kid Cudi,5
SOTNHIP12AB0183131,Make Her Say,237,Kid Cudi,5
SOIZLKI12A6D4F7B61,Supermassive Black Hole (Album Version),209,Muse,4
SONQEYS12AF72AABC9,Mr. Jones,272,Counting Crows,4
SOIOESO12A6D4F621D,Unwell (Album Version),229,matchbox twenty,4


## Top 10 artists

In [27]:
%%sql

SELECT A.artist_id
    , MIN(B.name) AS name
    , MIN(B.location) AS location
    , MIN(ISNULL(C.song_num, 0)) AS song_num
    , COUNT(*) AS n
FROM songplays AS A
INNER JOIN artists AS B ON A.artist_id = B.artist_id
LEFT OUTER JOIN (
    SELECT artist_id, COUNT(*) AS song_num
    FROM songs
    GROUP BY artist_id 
) AS C ON B.artist_id = C.artist_id
GROUP BY A.artist_id
ORDER BY n DESC
LIMIT 10;

 * postgresql://dwhuser:***@dwhcluster.czw6kev3ol8q.us-west-2.redshift.amazonaws.com:5439/dwh
10 rows affected.


artist_id,name,location,song_num,n
AR5E44Z1187B9A1D74,Dwight Yoakam,"Pikeville, KY",1,37
ARD46C811C8A414F3F,Kid Cudi,"Cleveland, Ohio",2,10
AR37SX11187FB3E164,Ron Carter,"Ferndale, MI",1,9
AR5EYTL1187B98EDA0,Lonnie Gordon,Nevada,2,9
ARKQQZA12086C116FC,B.o.B,Washington DC,1,8
ARPDVPJ1187B9ADBE9,Usher,"Atlanta, GA",4,6
ARR3ONV1187B9A2F59,Muse,UK,7,6
ARM0P6Z1187FB4D466,Arctic Monkeys,"Sheffield, South Yorkshire, Engla",4,5
ARPN0Y61187B9ABAA0,The Smiths,"Manchester, England",6,4
ART5MUE1187B98C961,Metallica,"Los Angeles, CA",3,4


## Users per week

In [30]:
%%sql

SELECT A.week, COUNT(*) AS user_count
FROM (
    SELECT DISTINCT AA.user_id, BB.week
    FROM songplays AS AA
    INNER JOIN time AS BB ON AA.start_time = BB.start_time
) AS A
GROUP BY A.week
ORDER BY A.week;

 * postgresql://dwhuser:***@dwhcluster.czw6kev3ol8q.us-west-2.redshift.amazonaws.com:5439/dwh
5 rows affected.


week,user_count
44,13
45,27
46,26
47,25
48,24
