# Load the data

In [1]:
# import necessary tools
import pandas as pd
import json
import os
import glob

In [2]:
# create file paths + dataframe
path = './lyrics_files'
lyrics_df = pd.DataFrame(columns=['name', 'album', 'year', 'lyrics'])

In [3]:
# add data to dataframe
for filename in glob.glob(os.path.join(path, '*.json')): #only process .JSON files in folder.      
    with open(filename, encoding='utf-8', mode='r') as json_file:
        data = json.load(json_file)
        data.keys()
        songs = data.get('songs')
        for x in songs:
            lyrics_df = lyrics_df.append({
                'name': x.get('title'),
                'album': x.get('album'),
                'year': x.get('year'),
                'lyrics': x.get('lyrics')
            }, ignore_index=True)

# Data overview

In [4]:
lyrics_df.head(10)

Unnamed: 0,name,album,year,lyrics
0,Good Ones Go (Interlude),Take Care,2011-11-15,"[Intro]\nOh yeah\nOh yeah, oh yeah, oh\nOh yea..."
1,Keep the Family Close,Views,2016-04-29,[Intro]\nIt's a little chilly out there\nHow y...
2,Using Me,,,Yeah\n6 shit\n\nBeen with so many women I can’...
3,Light Up,Thank Me Later,2010-06-15,"[Verse 1: Drake]\nUh, I've been up for four da..."
4,Catch No Feelings,,,"[Drake]\nI told you from the jump, me and you ..."
5,These Days,,2016-03-29,[Verse 1]\nI've been out walking\nI don't do t...
6,Overdose On Life,,2008-04-30,And I'm a leave on one more note\nOne more mot...
7,Talk Up,Scorpion,2018-06-29,"[Intro: Drake]\nDJ Paul\nTWhy\nAyy, ayy\nLot o..."
8,Cannonball,Heartbreak Drake,,"[Chorus: Colin Munroe]\nHow could I fly, but l..."
9,Thank You Note to 40,,2016-04-28,Before we do this I just wanna say thanks man....


In [6]:
lyrics_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 392 entries, 0 to 391
Data columns (total 4 columns):
name      392 non-null object
album     241 non-null object
year      280 non-null object
lyrics    392 non-null object
dtypes: object(4)
memory usage: 12.3+ KB


# Data cleaning

We will only use officially released songs (songs that are in an album) for this project; therefore, we remove the rows that have null values.

In [7]:
# remove all rows that have null values
lyrics_df = lyrics_df.dropna(how='any', axis=0)

In [8]:
lyrics_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 221 entries, 0 to 390
Data columns (total 4 columns):
name      221 non-null object
album     221 non-null object
year      221 non-null object
lyrics    221 non-null object
dtypes: object(4)
memory usage: 8.6+ KB


In [9]:
lyrics_df.head(10)

Unnamed: 0,name,album,year,lyrics
0,Good Ones Go (Interlude),Take Care,2011-11-15,"[Intro]\nOh yeah\nOh yeah, oh yeah, oh\nOh yea..."
1,Keep the Family Close,Views,2016-04-29,[Intro]\nIt's a little chilly out there\nHow y...
3,Light Up,Thank Me Later,2010-06-15,"[Verse 1: Drake]\nUh, I've been up for four da..."
7,Talk Up,Scorpion,2018-06-29,"[Intro: Drake]\nDJ Paul\nTWhy\nAyy, ayy\nLot o..."
10,Juice,Born Successful,2009-12-11,[Produced by Boi-1da]\n\n[Intro]\nBoi-1da... D...
13,Come Thru,Nothing Was the Same,2013-09-24,[Intro]\nYeah\nWe had the type of nights where...
14,Summer Games,Scorpion,2018-06-29,[Chorus]\nI think you're changin' your mind\nS...
15,9,Views,2016-04-29,"[Intro]\nYeah, for the dogs dem, you know\nMan..."
17,Glow,More Life,2017-03-18,"[Chorus: Kanye West]\nWatch out for me, I'm bo..."
19,Hotline Bling,Views,2015-07-25,[Intro]\nYou used to call me on my\nYou used t...


Let's make the 'year' column year only:

In [11]:
lyrics_df['year'] = pd.to_datetime(lyrics_df['year']).dt.strftime('%Y')
lyrics_df.head(10)

Unnamed: 0,name,album,year,lyrics
0,Good Ones Go (Interlude),Take Care,2011,"[Intro]\nOh yeah\nOh yeah, oh yeah, oh\nOh yea..."
1,Keep the Family Close,Views,2016,[Intro]\nIt's a little chilly out there\nHow y...
3,Light Up,Thank Me Later,2010,"[Verse 1: Drake]\nUh, I've been up for four da..."
7,Talk Up,Scorpion,2018,"[Intro: Drake]\nDJ Paul\nTWhy\nAyy, ayy\nLot o..."
10,Juice,Born Successful,2009,[Produced by Boi-1da]\n\n[Intro]\nBoi-1da... D...
13,Come Thru,Nothing Was the Same,2013,[Intro]\nYeah\nWe had the type of nights where...
14,Summer Games,Scorpion,2018,[Chorus]\nI think you're changin' your mind\nS...
15,9,Views,2016,"[Intro]\nYeah, for the dogs dem, you know\nMan..."
17,Glow,More Life,2017,"[Chorus: Kanye West]\nWatch out for me, I'm bo..."
19,Hotline Bling,Views,2015,[Intro]\nYou used to call me on my\nYou used t...


Re-sort the data alphabetically regarding the album names:

In [12]:
lyrics_df = lyrics_df.sort_values('album')
lyrics_df.head(10)

Unnamed: 0,name,album,year,lyrics
155,Right to Left,Born Successful,2009,[Intro]\n[?]\n\n[Chorus: Drake]\nBlue green je...
200,Forever (Born Successful),Born Successful,2009,[Chorus: Drake]\nIt may not mean nothing to y'...
189,The Winner,Born Successful,2009,[Verse 1]\nI’m performing tonight you know tha...
181,I Do This,Born Successful,2009,"[Verse 1: Drake]\nUh, shit's all good\nThe dea..."
120,Fallen,Born Successful,2009,"[Intro: Drake]\nYeah, it's Drake, KC\n\n[Verse..."
328,Do It Now,Born Successful,2009,"[Intro]\nUh, yeah, alright\nUh, well alright\n..."
286,The Search,Born Successful,2009,(Saukrates intro)\nThey say we killin' 'em all...
10,Juice,Born Successful,2009,[Produced by Boi-1da]\n\n[Intro]\nBoi-1da... D...
203,Man of the Year,Comeback Season,2007,"[Verse 1: Drake]\nDamn, I done walked in here\..."
168,Give Ya,Comeback Season,2007,"[Verse 1: Drake]\nCheck, look\nAnd I ain't try..."


Finally, we reset the index:

In [13]:
lyrics_df = lyrics_df.reset_index(drop=True)
lyrics_df.head(10)

Unnamed: 0,name,album,year,lyrics
0,Right to Left,Born Successful,2009,[Intro]\n[?]\n\n[Chorus: Drake]\nBlue green je...
1,Forever (Born Successful),Born Successful,2009,[Chorus: Drake]\nIt may not mean nothing to y'...
2,The Winner,Born Successful,2009,[Verse 1]\nI’m performing tonight you know tha...
3,I Do This,Born Successful,2009,"[Verse 1: Drake]\nUh, shit's all good\nThe dea..."
4,Fallen,Born Successful,2009,"[Intro: Drake]\nYeah, it's Drake, KC\n\n[Verse..."
5,Do It Now,Born Successful,2009,"[Intro]\nUh, yeah, alright\nUh, well alright\n..."
6,The Search,Born Successful,2009,(Saukrates intro)\nThey say we killin' 'em all...
7,Juice,Born Successful,2009,[Produced by Boi-1da]\n\n[Intro]\nBoi-1da... D...
8,Man of the Year,Comeback Season,2007,"[Verse 1: Drake]\nDamn, I done walked in here\..."
9,Give Ya,Comeback Season,2007,"[Verse 1: Drake]\nCheck, look\nAnd I ain't try..."


Looks good! Now we save the data as a csv file and move on.

In [15]:
# add data to csv file
lyrics_path = "./lyrics.csv"
lyrics_df.to_csv(lyrics_path, index=False)