# Table creation

##### Notebook description

Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello Hello 

## Setup

In [608]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [609]:
from time import time
import boto3
import configparser
import pandas as pd

In [610]:
config = configparser.ConfigParser()
config.read_file(open('capstone.cfg'))

HOST=config.get('CLUSTER', 'HOST')
DB_NAME=config.get('CLUSTER', 'DB_NAME')
DB_USER=config.get('CLUSTER', 'DB_USER')
DB_PASSWORD=config.get('CLUSTER', 'DB_PASSWORD')
DB_PORT=config.get('CLUSTER', 'DB_PORT')

KEY=config.get('KEYS', 'KEY')
SECRET=config.get('KEYS', 'SECRET')

ARN=config.get('IAM_ROLE', 'ARN')

In [612]:
# import os 
conn_string="postgresql://{}:{}@{}:{}/{}".format(DB_USER, DB_PASSWORD, HOST, DB_PORT, DB_NAME)
print(conn_string)
%sql $conn_string

postgresql://capstoneuser:P6cq2n!kan@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev


In [613]:
s3 = boto3.resource('s3',
                       region_name="us-west-2",
                       aws_access_key_id=KEY,
                       aws_secret_access_key=SECRET
                   )

s3Bucket = s3.Bucket("spotify-dataeng-nano")

s3client = boto3.client('s3')

## Creating secondary dim tables 

#### dim_region

In [577]:
%%sql
DROP TABLE IF EXISTS dim_country;

CREATE TABLE IF NOT EXISTS dim_country (
    id_country INT IDENTITY(100,1) PRIMARY KEY
    , name VARCHAR
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

Cannot get the insertion to happen in alphabetical order. Oh well

In [578]:
dim_country_insert = """
INSERT INTO dim_country (name)
SELECT DISTINCT region
FROM staging_charts
ORDER BY region

""".format('dim_country', ARN)

In [579]:
%sql $dim_country_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
70 rows affected.


[]

In [618]:
%%sql
SELECT *
FROM dim_country
LIMIT 5;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
5 rows affected.


id_country,name
113,Guatemala
129,Sweden
107,Argentina
123,Czech Republic
139,Japan


----------------------------------------------------------------------------------------------------------------------------

#### dim_date

In [581]:
%%sql
DROP TABLE IF EXISTS dim_date;

CREATE TABLE IF NOT EXISTS dim_date (
      ds DATE PRIMARY KEY
    , day INT
    , day_of_week INT
    , weekday_name VARCHAR
    , week INT
    , month INT
    , year INT
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [582]:
%%sql
INSERT INTO dim_date

WITH distinct_ds AS (
	SELECT DISTINCT ds 
	FROM fct_chart_movement
	ORDER BY ds
)

SELECT
	  ds
	, EXTRACT(DAY FROM ds)
	, EXTRACT(DOW FROM ds)
	, to_char(ds, 'Day')
	, EXTRACT(WEEK FROM ds)
	, EXTRACT(MONTH FROM ds)
	, EXTRACT(YEAR FROM ds)
FROM distinct_ds;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
1673 rows affected.


[]

In [619]:
%%sql
SELECT *
FROM dim_date
ORDER BY ds
LIMIT 10;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
10 rows affected.


ds,day,day_of_week,weekday_name,week,month,year
2017-01-01,1,0,Sunday,52,1,2017
2017-01-02,2,1,Monday,1,1,2017
2017-01-03,3,2,Tuesday,1,1,2017
2017-01-04,4,3,Wednesday,1,1,2017
2017-01-05,5,4,Thursday,1,1,2017
2017-01-06,6,5,Friday,1,1,2017
2017-01-07,7,6,Saturday,1,1,2017
2017-01-08,8,0,Sunday,1,1,2017
2017-01-09,9,1,Monday,2,1,2017
2017-01-10,10,2,Tuesday,2,1,2017


----------------------------------------------------------------------------------------------------------------------------

#### dim_chart

In [584]:
%%sql
DROP TABLE IF EXISTS dim_chart;

CREATE TABLE IF NOT EXISTS dim_chart (
      id_chart INT IDENTITY(100,1) PRIMARY KEY
    , name VARCHAR
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [585]:
dim_chart_insert = """
INSERT INTO dim_chart (name)
SELECT DISTINCT chart
FROM staging_charts
ORDER BY chart;

""".format('dim_chart', ARN)

In [586]:
%sql $dim_chart_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
2 rows affected.


[]

In [620]:
%%sql
SELECT *
FROM dim_chart
ORDER BY id_chart
LIMIT 5;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
2 rows affected.


id_chart,name
102,viral50
106,top200


----------------------------------------------------------------------------------------------------------------------------

#### dim_descriptors

In [588]:
%%sql
DROP TABLE IF EXISTS dim_descriptors;

CREATE TABLE IF NOT EXISTS dim_descriptors (
      id_song VARCHAR PRIMARY KEY
    , descriptor_list VARCHAR
    , genre VARCHAR
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [589]:
dim_descriptors_insert = """
INSERT INTO dim_descriptors
SELECT 
	  spotify_id
	, seeds 
	, genre
FROM staging_song_adjectives;

""".format('dim_descriptors', ARN)

In [590]:
%sql $dim_descriptors_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
90001 rows affected.


[]

In [621]:
%%sql
SELECT *
FROM dim_descriptors
LIMIT 5;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
5 rows affected.


id_song,descriptor_list,genre
3DaC3oRpGInTU0x8DvdSGp,"['aggressive', 'angry']",punk
2yxAAU2xKo2Z04Se01J4Bq,['aggressive'],progressive metal
7n6DnNdKKhjyDa1GlGgLeQ,['aggressive'],pop
4DuE7W3HEq037BuHzas2Ym,['aggressive'],gothic metal
1UKOgSwVAFcnDEsYYOlyV1,"['aggressive', 'spooky']",electro


----------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------------------------------------------------------------

### dim_song

In [592]:
%%sql
DROP TABLE IF EXISTS dim_song;


CREATE TABLE IF NOT EXISTS dim_song (
      id_song VARCHAR PRIMARY KEY
    , title VARCHAR
    , id_artists VARCHAR
    , id_album VARCHAR
    , release_date DATE
    , explicit BOOLEAN
    , duration_ms INT
    , tempo INT
    , key INT
	, time_signature DECIMAL
	, danceability FLOAT
	, energy FLOAT
	, loudness FLOAT
	, speechiness FLOAT
	, acousticness FLOAT
	, instrumentalness FLOAT
	, liveness FLOAT
	, valence FLOAT
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [593]:
dim_song_insert = """
INSERT INTO dim_song

WITH duplicate_songs AS (
	SELECT *, ROW_NUMBER() OVER (PARTITION BY name, artist_ids, release_date ORDER BY id) AS row_num
	FROM staging_songs_full
)

SELECT
	id 							
  , name						
  , artist_ids										
  , album_id					
  , CAST(release_date AS DATE)			
  , CASE WHEN explicit = 'True' then TRUE else FALSE END
  , CAST(duration_ms AS INT)
  , ROUND(CAST(tempo AS FLOAT), 0)
  , CAST(key AS INT)
  , ROUND(CAST(time_signature AS FLOAT), 1)
  , ROUND(CAST(danceability AS FLOAT), 2)
  , ROUND(CAST(energy AS FLOAT), 2)
  , ROUND(CAST(loudness AS FLOAT), 2)
  , ROUND(CAST(speechiness AS FLOAT), 2)
  , ROUND(CAST(acousticness AS FLOAT), 2)
  , ROUND(CAST(instrumentalness AS FLOAT), 2)
  , ROUND(CAST(liveness AS FLOAT), 2)
  , ROUND(CAST(valence AS FLOAT), 2)
FROM duplicate_songs
WHERE 
    release_date ~ '[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]'
	AND row_num = 1;
""".format('dim_song', ARN)

In [594]:
%sql $dim_song_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
1063438 rows affected.


[]

In [622]:
%%sql
SELECT *
FROM dim_song
LIMIT 6;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
6 rows affected.


id_song,title,id_artists,id_album,release_date,explicit,duration_ms,tempo,key,time_signature,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
74DyYsJP3zYeLYznqkzWkf,"""...And Now The Man You've All Been Waiting For!""",['3yw2yyy1h5yBImpvbYbp7l'],0UBjbewM4UIN0vGnsDbvUm,2003-08-26,False,1110520,136,8,3,0.15,0.0,-44.57,0.04,0.81,0.95,0.07,0.03
5mpK0Ztp8HEWWTiV5yVxI9,"""Ach Gott, wie manches Herzeleid"" BMV58: Chorale and Aria (Soprano, Bass):","['5aIqB5nVVvmFsvSdExz408', '7lnBwiS5iL2qKOohNsrsQX', '21ZrUwCPuok2HGHtuUVCGL', '7Fu8GId2mQkNzqDbZbcDWM']",2g5TpKTnpxzCzknQCIDLa1,2007-03-07,False,146040,96,11,4,0.45,0.18,-21.1,0.03,0.94,0.07,0.29,0.79
5JT4nyG7pjsWoRuHXPSlFw,"""Ach, lieben Christen, seid getrost"" BWV 114: Chorale: ""Wir wachen oder schlafen ein""","['5aIqB5nVVvmFsvSdExz408', '2CMg8J0WjftvFR5zFbufVA', '5K6IkoVilt3TIylwWo2hE2', '7Fu8GId2mQkNzqDbZbcDWM']",49WrEpG7VIBtqCZdJ1mVbX,2007-02-27,False,46907,87,9,4,0.26,0.09,-23.03,0.05,1.0,0.11,0.09,0.41
1dS8lrGsZCDNIscIqeOIuY,"""Also hat Gott die Welt geliebt"", BWV 68: III. Recitative, ""Ich bin mit Petro nicht vermessen"" (Bass)","['5aIqB5nVVvmFsvSdExz408', '5K6IkoVilt3TIylwWo2hE2', '4ZCNO2ZNSFS2BhaDxeU8pw', '6uhxLhuh3jcNhKhM6pUo2V', '0gmKLfaE6sG19fpCMJWhe3', '7Fu8GId2mQkNzqDbZbcDWM', '2CMg8J0WjftvFR5zFbufVA']",3LXDB2QFCDS3bxzlCiJZpm,2009-04-22,False,46933,85,6,3,0.49,0.01,-27.97,0.06,0.83,0.0,0.11,0.28
746jySj5phPTQZYP9OhAAy,"""B""",['5QoPmqq8UfE0zsTWkM3CQD'],0Q8iX0mGlXl0HQCnXVZThO,2006-12-19,True,216320,66,11,4,0.42,0.73,-5.4,0.04,0.02,0.0,0.08,0.54
4hv1VWTThdKrmko2orxwGG,"""Bereitet die Wege, bereitet die Bahn"" (Concerto) BWV 132: Aria (Bass): ""Wer bist du? frage dein Gewissen""","['5aIqB5nVVvmFsvSdExz408', '7Fu8GId2mQkNzqDbZbcDWM']",5kYKTy9hbbtaztzMr6tkBK,2007-02-26,False,181493,134,5,4,0.49,0.04,-26.27,0.04,0.96,0.0,0.1,0.58


----------------------------------------------------------------------------------------------------------------------------

### fct_chart_movement

In [596]:
%%sql
CREATE OR REPLACE FUNCTION f_primary_artist (artist_array VARCHAR)
  RETURNS VARCHAR
STABLE
AS $$
  return eval(artist_array)[0]
$$ LANGUAGE plpythonu;

CREATE OR REPLACE FUNCTION f_secondary_artist (artist_array VARCHAR)
  RETURNS VARCHAR
STABLE
AS $$
  if len(eval(artist_array)) > 1:
    return eval(artist_array)[1]
  else: 
    return None
$$ LANGUAGE plpythonu;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [None]:
%%sql
DROP TABLE IF EXISTS fct_chart_movement;


CREATE TABLE IF NOT EXISTS fct_chart_movement (
      id_movement INT IDENTITY(100,1) PRIMARY KEY
    , id_song VARCHAR
    , id_artists VARCHAR
    , id_primary_artist VARCHAR
    , id_secondary_artist VARCHAR
    , id_chart INT
    , current_rank INT
    , movement VARCHAR
    , id_country INT 
	, ds DATE
);

In [598]:
fct_chart_movement_insert = """
INSERT INTO fct_chart_movement (id_song, id_artists, id_primary_artist, id_secondary_artist, id_chart, current_rank, movement, id_country, ds)
SELECT
  ss.id								
, ss.artist_ids				
, f_primary_artist(artist_ids)	
, f_secondary_artist(artist_ids)		
, CAST (dch.id_chart AS INT)					
, CAST (sc.rank AS INT)							
, sc.trend							
, CAST (dc.id_country AS INT)					
, CAST (sc.date AS DATE)							
FROM staging_charts sc
JOIN staging_songs_full ss 
    ON LOWER(sc.title) = LOWER(ss.name)
    AND REGEXP_REPLACE(sc.artists, '[^a-zA-Z0-9]+', '') = REGEXP_REPLACE(ss.artists, '[^a-zA-Z0-9]+', '')
JOIN dim_country dc 
	ON sc.region = dc.name
JOIN dim_chart dch 
	ON sc.chart = dch.name
JOIN dim_song ds
	ON ss.id = ds.id_song
WHERE 
	sc.rank ~ '[0-9]+'
    AND ds.release_date ~ '[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]'
	AND LENGTH(artist_ids) < 150;
""".format('fct_chart_movement', ARN)

In [599]:
%sql $fct_chart_movement_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
7611836 rows affected.


[]

In [600]:
%%sql
SELECT COUNT(*)
FROM fct_chart_movement

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


count
7611836


In [623]:
%%sql
SELECT *
FROM fct_chart_movement
LIMIT 5;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
5 rows affected.


id_movement,id_song,id_artists,id_primary_artist,id_secondary_artist,id_chart,current_rank,movement,id_country,ds
113,2ekn2ttSfGqwhhate0LSR0,['6M2wZ9GZgrQXHCFfjv46we'],6M2wZ9GZgrQXHCFfjv46we,,102,23,MOVE_UP,100,2017-08-01
129,4frr19lIKPxLo690m94MAR,['4kYSro6naA4h99UJvo89HB'],4kYSro6naA4h99UJvo89HB,,102,7,MOVE_UP,104,2017-08-01
145,43oK4OAWUsRZUDVeHtKI4U,['6M2wZ9GZgrQXHCFfjv46we'],6M2wZ9GZgrQXHCFfjv46we,,102,13,MOVE_UP,108,2017-08-01
161,2ekn2ttSfGqwhhate0LSR0,['6M2wZ9GZgrQXHCFfjv46we'],6M2wZ9GZgrQXHCFfjv46we,,102,11,SAME_POSITION,112,2017-08-01
177,0ERbK7qVqveCaBWIiYCrl3,"['5lwmRuXgjX8xIwlnauTZIP', '4VMYDCV2IEDYJArk749S6m', '1SupJlEpv7RS2tPNRaHViT']",5lwmRuXgjX8xIwlnauTZIP,4VMYDCV2IEDYJArk749S6m,102,1,SAME_POSITION,115,2017-08-01


----------------------------------------------------------------------------------------------------------------------------

#### dim_artist

In [340]:
%%sql
DROP TABLE IF EXISTS dim_artist;

CREATE TABLE IF NOT EXISTS dim_artist (
      id_artist VARCHAR PRIMARY KEY
    , name VARCHAR
    , genres VARCHAR
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [343]:
dim_artist_insert = """
INSERT INTO dim_artist

WITH distinct_ids AS (
	SELECT DISTINCT id_primary_artist
	FROM fct_chart_movement
)

SELECT 
	  TRANSLATE(spotify_uri, 'spotify:artist:', '') AS id_artist
	, name
	, genres
FROM staging_artist_id_mapping saim  
JOIN distinct_ids di
	ON TRANSLATE(saim.spotify_uri, 'spotify:artist:', '') = di.id_primary_artist;

""".format('dim_artist', ARN)

In [344]:
%sql $dim_artist_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
150 rows affected.


[]

In [346]:
%%sql
SELECT *
FROM dim_artist
where genres != '[]'
LIMIT 8;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
8 rows affected.


id_artist,name,genres
7Dx7RhX0mFuXhCOUgB01uM,JJ Lin,"['mandopop', 'singaporean mandopop', 'singaporean pop']"
2m62cc253Xvd9qYQ8d2X3d,The Alan Parsons Project,"['album rock', 'art rock', 'classic rock', 'mellow gold', 'progressive rock', 'soft rock', 'symphonic rock']"
4K9OTkRXEFL6NDXFTqVmq9,Elina,"['electropop', 'indie cafe pop', 'pop', 'viral pop']"
6GwZIG7UlxQR6n9kO8QlV6,Antonio Flores,"['cantautor', 'rumba', 'spanish new wave', 'spanish pop']"
5EM6xJN2QNk0cL7EEm9HR9,Radical Face,"['indie folk', 'indie pop', 'jacksonville indie', 'stomp and holler']"
1ll45mCZIveI9KHNZx2DdS,Purple Mountains,"['alternative country', 'alternative rock', 'freak folk', 'indie rock', 'lo-fi', 'modern power pop']"
5W5bDNCqJ1jbCgTxDD0Cb3,Willie Nelson,"['classic country pop', 'country', 'country rock', 'nashville sound', 'outlaw country', 'singer-songwriter', 'texas country']"
57kIMCLPgkzQlXjblX7XXP,Phosphorescent,"['alternative americana', 'chamber pop', 'freak folk', 'indie folk', 'indie pop', 'indie rock', 'new americana', 'stomp and holler', 'swedish americana']"


In [347]:
%%sql
SELECT count(distinct id_artist)
FROM dim_artist
UNION
SELECT count(distinct name)
FROM dim_artist

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
1 rows affected.


count
150


----------------------------------------------------------------------------------------------------------------------------

#### dim_indiv_influence

In [394]:
%%sql
DROP TABLE IF EXISTS dim_indiv_influence;

CREATE TABLE IF NOT EXISTS dim_indiv_influence (
      id_influence INT IDENTITY(100,1) PRIMARY KEY
    , follower_name VARCHAR
    , follower_main_genre VARCHAR
    , follower_active_start INT
    , influencer_name VARCHAR
    , influencer_main_genre VARCHAR
    , influencer_active_start INT
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [410]:
dim_indiv_influence_insert = """
INSERT INTO dim_indiv_influence (follower_name, follower_main_genre, follower_active_start, influencer_name, influencer_main_genre, influencer_active_start)

SELECT 
	  follower_name
	, follower_main_genre
	, CAST(follower_active_start AS INT)
	, influencer_name
	, influencer_main_genre
	, CAST(influencer_active_start AS INT)
FROM staging_all_influences sai 
JOIN dim_artist da
	ON CASE 
        WHEN sai.follower_name = da.name THEN 1
        WHEN sai.influencer_name = da.name THEN 1
        ELSE 0 END = 1;

""".format('dim_indiv_influence', ARN)

In [411]:
%sql $dim_indiv_influence_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
967 rows affected.


[]

In [439]:
%%sql
SELECT *
FROM dim_indiv_influence
WHERE follower_name LIKE '%s'
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


id_influence,follower_name,follower_main_genre,follower_active_start,influencer_name,influencer_main_genre,influencer_active_start
389,Don Williams,Country,1960,Elvis Presley,Pop/Rock,1950
677,The Stranglers,Pop/Rock,1970,The Clash,Pop/Rock,1970
757,Enrique Iglesias,Latin,1990,Chris Isaak,Pop/Rock,1980
933,Enrique Iglesias,Latin,1990,Lionel Richie,R&B;,1970


----------------------------------------------------------------------------------------------------------------------------

#### dim_agg_influence

In [480]:
%%sql
DROP TABLE IF EXISTS dim_agg_influence;

CREATE TABLE IF NOT EXISTS dim_agg_influence (
      influencer_id INT PRIMARY KEY
    , influencer_name VARCHAR
    , depth_0 INT
    , depth_1 INT
    , depth_2 INT
    , depth_3 INT
    , depth_4 INT
    , depth_5 INT
    , depth_6 INT
    , depth_7 INT
    , depth_8 INT
    , depth_9 INT
    , depth_10 INT
    , total_scaled FLOAT
);

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
Done.
Done.


[]

In [481]:
dim_agg_influence_insert = """
INSERT INTO dim_agg_influence 

WITH duplicate_df AS (
SELECT 
	  CAST (influencer_id AS INT)
	, influencer_name
	, CAST (depth_0 AS INT)
	, CAST (depth_1 AS INT)
	, CAST (depth_2 AS INT)
	, CAST (depth_3 AS INT)
	, CAST (depth_4 AS INT)
	, CAST (depth_5 AS INT)
	, CAST (depth_6 AS INT)
	, CAST (depth_7 AS INT)
	, CAST (depth_8 AS INT)
	, CAST (depth_9 AS INT)
	, CAST (depth_10 AS INT) 
	, CAST (total AS FLOAT)
	, ROW_NUMBER() OVER (PARTITION BY influencer_name ORDER BY influencer_id) AS row_num
FROM staging_influence_depth sid 
JOIN dim_artist da
	ON sid.influencer_name = da.name
)

SELECT
	  influencer_id
	, influencer_name
	, depth_0
	, depth_1
	, depth_2
	, depth_3
	, depth_4
	, depth_5
	, depth_6
	, depth_7
	, depth_8
	, depth_9
	, depth_10
	, total
FROM duplicate_df
WHERE 
	row_num = 1;

""".format('dim_agg_influence', ARN)

In [482]:
%sql $dim_agg_influence_insert

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
24 rows affected.


[]

In [483]:
%%sql
SELECT *
FROM dim_agg_influence
ORDER BY total_scaled DESC
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


influencer_id,influencer_name,depth_0,depth_1,depth_2,depth_3,depth_4,depth_5,depth_6,depth_7,depth_8,depth_9,depth_10,total_scaled
234,Elvis Presley,1,167,1975,3632,4162,4415,4533,4598,4619,4630,4631,1571.224609375
169,Cab Calloway,1,28,396,2594,4634,4847,4880,4884,4884,4884,4884,1029.90625
679,The Clash,1,137,897,1670,1972,2284,2541,2818,3197,3538,3818,783.69921875
927,Willie Nelson,1,67,487,1453,2495,3143,3532,3817,4126,4313,4462,708.294921875


----------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------------------------------------------------------------

----------------------------------------------------------------------------------------------------------------------------

## Looking at a few records from each of the final tables

In [493]:
%%sql
SELECT *
FROM dim_country
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


id_country,name
127,Ireland
159,Egypt
168,United Arab Emirates
115,Dominican Republic


In [494]:
%%sql
SELECT *
FROM dim_date
ORDER BY ds
LIMIT 9;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
9 rows affected.


ds,day,day_of_week,weekday_name,week,month,year
2017-01-01,1,0,Sunday,52,1,2017
2017-01-02,2,1,Monday,1,1,2017
2017-01-03,3,2,Tuesday,1,1,2017
2017-01-04,4,3,Wednesday,1,1,2017
2017-01-05,5,4,Thursday,1,1,2017
2017-01-06,6,5,Friday,1,1,2017
2017-01-07,7,6,Saturday,1,1,2017
2017-01-08,8,0,Sunday,1,1,2017
2017-01-09,9,1,Monday,2,1,2017


In [495]:
%%sql
SELECT *
FROM dim_chart
ORDER BY id_chart;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
2 rows affected.


id_chart,name
100,viral50
102,top200


In [496]:
%%sql
SELECT *
FROM dim_descriptors
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


id_song,descriptor_list,genre
4FvW8LcUhNPTc3sLLqtyti,['aggressive'],screamo
7vNiCVFghw336cXSXVqiGG,['aggressive'],deathcore
153E8CZ5kCe6KkCHS8mZtS,"['aggressive', 'confrontational', 'provocative']",punk
3Hiz0BttM3fIMfJeTw8utZ,['aggressive'],metal


In [257]:
%%sql
SELECT *
FROM fct_chart_movement
LIMIT 5;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
5 rows affected.


id_movement,id_song,id_artists,id_primary_artist,id_secondary_artist,id_chart,current_rank,movement,id_country,ds
109,4qjEkhgIdml3Bh2UswEzEW,['0znuUIjvP0LXEslfaq0Nor'],0znuUIjvP0LXEslfaq0Nor,,100,40,MOVE_DOWN,100,2017-08-01
125,3xXBsjrbG1xQIm1xv1cKOt,['6XyY86QOPPrYVGvF9ch6wz'],6XyY86QOPPrYVGvF9ch6wz,,100,44,MOVE_DOWN,102,2017-08-01
141,2UZtI2HUyLRzqBjodvcUmY,['74eX4C98E4FCrAMl39qRsJ'],74eX4C98E4FCrAMl39qRsJ,,100,20,SAME_POSITION,107,2017-08-01
157,0BP2o4rGe7nkQea1JOWMbl,['3kjuyTCjPG1WMFCiyc5IuB'],3kjuyTCjPG1WMFCiyc5IuB,,100,33,MOVE_UP,109,2017-08-01
173,2AY1UAimvTqjJC8vDJsOyy,"['4obzFoKoKRHIphyHzJ35G3', '4q3ewBCX7sLwd24euuV69X']",4obzFoKoKRHIphyHzJ35G3,4q3ewBCX7sLwd24euuV69X,100,47,MOVE_DOWN,110,2017-08-01


In [498]:
%%sql
SELECT *
FROM dim_artist
where genres != '[]'
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


id_artist,name,genres
2jw70GZXlAI8QzWeY2bgRc,Nelly Furtado,"['canadian latin', 'canadian pop', 'dance pop', 'pop', 'pop dance', 'pop rap']"
4S76LQXJD6N2uPcLhKejG8,Little Eva,"['brill building pop', 'doo-wop', 'merseybeat', 'rhythm and blues', 'rock-and-roll']"
64PJHZDQTPPVBCdwnv22Wz,Josef Salvat,['gauze pop']
25u4wHJWxCA9vO0CzxAbK7,Lukas Graham,"['danish pop', 'pop', 'scandipop']"


In [499]:
%%sql
SELECT *
FROM dim_indiv_influence
WHERE follower_name LIKE '%s'
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


id_influence,follower_name,follower_main_genre,follower_active_start,influencer_name,influencer_main_genre,influencer_active_start
389,Don Williams,Country,1960,Elvis Presley,Pop/Rock,1950
677,The Stranglers,Pop/Rock,1970,The Clash,Pop/Rock,1970
757,Enrique Iglesias,Latin,1990,Chris Isaak,Pop/Rock,1980
933,Enrique Iglesias,Latin,1990,Lionel Richie,R&B;,1970


In [500]:
%%sql
SELECT *
FROM dim_agg_influence
ORDER BY total_scaled DESC
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


influencer_id,influencer_name,depth_0,depth_1,depth_2,depth_3,depth_4,depth_5,depth_6,depth_7,depth_8,depth_9,depth_10,total_scaled
234,Elvis Presley,1,167,1975,3632,4162,4415,4533,4598,4619,4630,4631,1571.224609375
169,Cab Calloway,1,28,396,2594,4634,4847,4880,4884,4884,4884,4884,1029.90625
679,The Clash,1,137,897,1670,1972,2284,2541,2818,3197,3538,3818,783.69921875
927,Willie Nelson,1,67,487,1453,2495,3143,3532,3817,4126,4313,4462,708.294921875


In [516]:
%%sql
SELECT *
FROM dim_song
LIMIT 4;

 * postgresql://capstoneuser:***@capstone-proj.c0df8unkdobb.us-west-2.redshift.amazonaws.com:5439/dev
4 rows affected.


id_song,title,id_artists,id_album,release_date,explicit,duration_ms,tempo,key,time_signature,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
1SxMxIv3dwCLml5gjBHvwO,!@$# Disappearing Blocks,['4bSHri2JUOPBJvMG9UvFe8'],3IX1fKb5wQoEigRTyY9paw,2011-04-27,False,109091,132,6,4,0.34,0.84,-10.75,0.04,0.0,0.94,0.27,0.97
4qoPjxvqZnxKjpk1zisOMN,"""A Man and a Woman"" from A Man and a Woman - Instrumental","['0Lf3tvw0Rzwr38KDrShstn', '0TEjrjC7baC4jJTQiOMsxJ']",5NmuHqpl86h82HKYHVwxtO,1999-05-18,False,171410,131,9,3,0.21,0.02,-31.29,0.04,0.95,0.09,0.1,0.04
3kN1BVeAFHDJbDqhIZ5eQ5,"""Ah, Captain Bachetti...""","['7qGitvYBnbiDSaYGHp7TxR', '3QTDFxvTx8fhAb9FIMowKG', '6vr32iVlTxTUGGXiEHgiqe']",10m71wmOyuPRSB7EB8zKwf,2013-07-02,False,48293,177,3,5,0.62,0.36,-16.79,0.95,0.59,0.0,0.63,0.84
0o9AUoICjntZJWJ55pUbAh,"""Auf, schmetternde Töne der muntern Trompeten"" BWV 207a","['5aIqB5nVVvmFsvSdExz408', '7Fu8GId2mQkNzqDbZbcDWM', '5K6IkoVilt3TIylwWo2hE2', '6jMvI1wpDj9a0NTASTdIQv']",4K2qk6FbGokHBSesOHTi4m,2007-02-27,False,54733,88,6,5,0.53,0.01,-25.64,0.06,0.97,0.0,0.12,0.38
