In [53]:
# include dependencies for df manipulation and visualization
%matplotlib inline
from matplotlib import style
style.use('fivethirtyeight')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, MetaData, Table, Column, Numeric,insert, Integer, VARCHAR, update, text, delete, func, inspect

# import Flask
from flask import Flask

# import Scipy
import scipy.stats as st

In [35]:
rds_connection_string = "postgresql://songlyrics:<user password>@lyrical-analysis.cxe3nmyieexj.us-east-1.rds.amazonaws.com:5432/musicalDbanalytics"
engine = create_engine(rds_connection_string)

In [24]:
# get columns from the albumbestsellers table
inspector = inspect(engine)
inspector.get_columns("albumbestsellers")

[{'name': 'artist',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'album',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'released',
  'type': INTEGER(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'genre',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'total_certified_copies_from_available_markets',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'claimed_sales_millions',
  'type': INTEGER(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None}]

In [25]:
# get columns from the station table
inspector.get_columns("masterlyrics")

[{'name': 'artist',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'song',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None},
 {'name': 'lyrics',
  'type': TEXT(),
  'nullable': True,
  'default': None,
  'autoincrement': False,
  'comment': None}]

In [36]:
# pull records from masterlyrics
sql = text('SELECT * from masterlyrics')
  
# Fetch all the records
result = engine.execute(sql).fetchall()
print(result)



In [45]:
# pull records from album music box
sql_music_box = text("""SELECT albumbestsellers.artist,
albumbestsellers.album,
albumbestsellers.claimed_sales_millions,
masterlyrics.lyrics,
masterlyrics.song FROM albumbestsellers
INNER JOIN albumjoiner
ON albumjoiner.album2 = albumbestsellers.album
INNER JOIN masterlyrics
ON masterlyrics.song = albumjoiner.song
WHERE albumbestsellers.album = 'Music Box'""")
  
# Fetch all the records
result_music_box = engine.execute(sql_music_box).fetchall()
print(result_music_box)



[('Mariah Carey', 'Music Box', 28, "When I am lost You shine a light for me and set me free When I am low You wash away my tears And take me through The loneliness And emptiness Through ... (765 characters truncated) ... urrounding me And baby, all I have, I want to give to thee Want to give you all of my love Now and forever my love All I have, I want to give to thee", 'Music Box'), ('Mariah Carey', 'Music Box', 28, "Do you know How it feels Lying here without you, baby? You could never understand what's happening to me So alone Nothing's real I just dream about y ... (706 characters truncated) ... by And even though you're not my friend (Oh, no, no, no) I would give my all To have you here Just to hold you once again Just to hold you once again", 'Just To Hold You Once Again'), ('Mariah Carey', 'Music Box', 28, "If you only knew What I feel for you If you only lived for me The way I live for you I'd be in heaven My dreams would come true 'Cause all I've ever  ... (512 characters trun

In [38]:
# pull records from masterlyrics
sql_a = text('SELECT * from albumbestsellers')
  
# Fetch all the records
result_album = engine.execute(sql_a).fetchall()
print(result_album)

[('Michael Jackson', 'Thriller', 1982, 'Pop, post-disco, funk, rock', '50.2 US: 34 million[9] JPN: 100,000[10] UK: 4.5 million[11] GER: 1.5 million[12] FRA: 1 million[13] CAN: 2 million[14] AUS: 1.12 million[15] MEX: 2.6 ... (82 characters truncated) ... WI: 300,000[21] AUT: 400,000[22] FIN: 119,061[23] NZ: 180,000[24][25] HKG: 15,000[26] DEN: 480,000[27] DEN: 60,000[28] HUN: 6,000[29] POR: 40,000[30]', 70), ('AC/DC', 'Back in Black', 1980, 'Hard rock', '29.5 US: 25 million[9] UK: 600,000[11] GER: 1 million[12] FRA: 600,000[13] CAN: 1 million[14] AUS: 840,000[15] ITA: 150,000[18] ARG: 180,000[31] SWI: 100,000[21] AUT: 50,000[22]', 50), ('Whitney Houston / various artists', 'The Bodyguard', 1992, 'R&B, soul, pop, soundtrack', '32.4 US: 18 million[9] JPN: 3 million[33] UK: 2.25 million[11] GER: 1.75 million[12] FRA: 1.72 million[13] CAN: 1.4 million[14] AUS: 350,000[15] BRA: ... (3 characters truncated) ... 2 million[34] NLD: 400,000[17] SWE: 350,000[19] SPA: 800,000[35] ARG: 350,000[31]

In [47]:
# convert to dataframes
lyrics_df = pd.DataFrame(result)
lyrics_df.rename(columns={0: "artist", 1: "song", 2: "lyric"}, inplace=True)
lyrics_df.head()

Unnamed: 0,artist,song,lyric
0,Carly Rae Jepsen,Cup Of Tea,"A cup of tea, a cup of tea I've been driving a..."
1,Carly Rae Jepsen,Everywhere You Look (The Fuller House Theme),"""Whatever happened to predictability? The milk..."
2,XXXTENTACION,The Explanation,"17 My collection of nightmares, thoughts, and ..."
3,Wiz Khalifa,The Last,Y'll already know what it is We the last... At...
4,Wiz Khalifa,This Time Around,"""Just the wind in my hair from the top 'cause ..."


In [None]:
# tokenize the data


In [25]:
# session close
session.close()