# Notebook 02: Preprocessing

In [1]:
import json, time, re, requests, nltk, pickle
import pandas as pd
import psycopg2 as pg2

from psycopg2.extras import RealDictCursor, Json
from requests.packages.urllib3.util.retry import Retry
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
from nltk.tokenize import RegexpTokenizer
from nltk.stem import WordNetLemmatizer

In [2]:
%run ../assets/sql_cred.py
%run ../assets/spotify_cred.py

In [3]:
def con_cur_to_db(dbname=DBNAME, dict_cur=None):
    con = pg2.connect(host=IP_ADDRESS,
                  dbname=dbname,
                  user=USER,
                  password=PASSWORD)
    if dict_cur:
        cur = con.cursor(cursor_factory=RealDictCursor)
    else:
        cur = con.cursor()
    return con, cur
    
def execute_query(query, dbname=DBNAME, dict_cur=None, command=False):
    con, cur = con_cur_to_db(dbname, dict_cur)
    cur.execute(f'{query}')
    if not command:
        data = cur.fetchall()
        con.close()
        return data
    con.commit() #sends to server
    con.close() #closes server connection

In [4]:
query = '''SELECT * FROM track_list;'''
response = execute_query(query, dict_cur=True)
track_df = pd.DataFrame(response)
track_df.set_index('track_id', inplace=True)
track_df.head()

Unnamed: 0_level_0,album_name,artist_name,lyrics,playlist_id,playlist_name,playlist_owner,track_name
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0h7TlF8gKb61aSm874s3cV,I Can't Tell You How Much It Hurts,moow,\n\nIf your needle is near\nNeedle is near\nYo...,37i9dQZF1DXarebqD2nAVg,Tender,spotify,You'r in My Head
6koowTu9pFHPEcZnACLKbK,Coming Home,Leon Bridges,\n\n[Verse 1]\nBrown skin girl on the other si...,37i9dQZF1DX4adj7PFEBwf,Wedding Bells,spotify,Brown Skin Girl
1JkhKUXAoNivi87ipmV3rp,Back To Love (Deluxe Version),Anthony Hamilton,"\n\n[Verse 1]\nIt's simple, I love it\nHaving ...",37i9dQZF1DX4adj7PFEBwf,Wedding Bells,spotify,Best of Me
51lPx6ZCSalL2kvSrDUyJc,The Search for Everything,John Mayer,\n\n[Intro: Whistling]\n\n[Verse 1]\nA great b...,37i9dQZF1DX4adj7PFEBwf,Wedding Bells,spotify,You're Gonna Live Forever in Me
3vqlZUIT3rEmLaYKDBfb4Q,Songs In The Key Of Life,Stevie Wonder,\n\n[Verse 1]\nIsn't she lovely\nIsn't she won...,37i9dQZF1DX4adj7PFEBwf,Wedding Bells,spotify,Isn't She Lovely


In [5]:
track_df = track_df[['track_name','artist_name', 'album_name', 'playlist_name', 'playlist_id', 'lyrics']]
track_df.head()

Unnamed: 0_level_0,track_name,artist_name,album_name,playlist_name,playlist_id,lyrics
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0h7TlF8gKb61aSm874s3cV,You'r in My Head,moow,I Can't Tell You How Much It Hurts,Tender,37i9dQZF1DXarebqD2nAVg,\n\nIf your needle is near\nNeedle is near\nYo...
6koowTu9pFHPEcZnACLKbK,Brown Skin Girl,Leon Bridges,Coming Home,Wedding Bells,37i9dQZF1DX4adj7PFEBwf,\n\n[Verse 1]\nBrown skin girl on the other si...
1JkhKUXAoNivi87ipmV3rp,Best of Me,Anthony Hamilton,Back To Love (Deluxe Version),Wedding Bells,37i9dQZF1DX4adj7PFEBwf,"\n\n[Verse 1]\nIt's simple, I love it\nHaving ..."
51lPx6ZCSalL2kvSrDUyJc,You're Gonna Live Forever in Me,John Mayer,The Search for Everything,Wedding Bells,37i9dQZF1DX4adj7PFEBwf,\n\n[Intro: Whistling]\n\n[Verse 1]\nA great b...
3vqlZUIT3rEmLaYKDBfb4Q,Isn't She Lovely,Stevie Wonder,Songs In The Key Of Life,Wedding Bells,37i9dQZF1DX4adj7PFEBwf,\n\n[Verse 1]\nIsn't she lovely\nIsn't she won...


In [7]:
track_df.dropna(axis=0, inplace=True)

In [11]:
track_df.shape

(2276, 6)

In [8]:
lyrics_df = track_df[['track_name','artist_name','lyrics']]

In [9]:
lyrics_df.head()

Unnamed: 0_level_0,track_name,artist_name,lyrics
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0h7TlF8gKb61aSm874s3cV,You'r in My Head,moow,\n\nIf your needle is near\nNeedle is near\nYo...
6koowTu9pFHPEcZnACLKbK,Brown Skin Girl,Leon Bridges,\n\n[Verse 1]\nBrown skin girl on the other si...
1JkhKUXAoNivi87ipmV3rp,Best of Me,Anthony Hamilton,"\n\n[Verse 1]\nIt's simple, I love it\nHaving ..."
51lPx6ZCSalL2kvSrDUyJc,You're Gonna Live Forever in Me,John Mayer,\n\n[Intro: Whistling]\n\n[Verse 1]\nA great b...
3vqlZUIT3rEmLaYKDBfb4Q,Isn't She Lovely,Stevie Wonder,\n\n[Verse 1]\nIsn't she lovely\nIsn't she won...


In [10]:
lyrics_df.iloc[1].lyrics

"\n\n[Verse 1]\nBrown skin girl on the other side of the room\nBrown skin girl staring with her brown eyes\nOoh, baby, don't you know you're a cutie pie?\n\n[Chorus]\nPrincess, little honey with the polka-dot dress on\nRuby-lipped lady whose name I don't know\nLet me tell you, darling\nOoh ooh ooh\nOoh whoo-ooh\nOoh ooh ooh\nOoh whoo-ooh, OK\n\n[Verse 2]\nBrown skin girl with the white pearls 'round her neck\nBrown skin girl, let me hold you close under the white moon\nCause baby, I'm ready and you know that I'm waiting on you\n\n[Chorus]\nPrincess, little honey with the polka-dot dress on\nRuby-lipped lady whose name I don't know\nLet me tell you, darling\nOoh ooh ooh\nOoh whoo-ooh\nOoh ooh ooh\nOoh whoo-ooh\nOoh ooh ooh\nOoh whoo-ooh\nOoh ooh ooh\nOoh whoo-ooh\n\n"