In [None]:
#######################################
############ DOCUMENTATION ############
#######################################
# How to Get started with Genius API: https://towardsdatascience.com/song-lyrics-genius-api-dcc2819c29
# Genius API documentation: https://docs.genius.com/#referents-h2
# Official lyricsgenius documentation: https://lyricsgenius.readthedocs.io/en/master/index.html

#######################################
############ PROJECT README ###########
#######################################
# The following code is used to obtain English Translated lyrics of non-english songs
# Pre-Requisites: A list of the 'English Translated' Album titles
# These can be found on genius.com (e.g. https://genius.com/albums/Genius-english-translations/Bts-love-yourself-her-english-translation)

# Note: I didn't find an automated way to filter artist songs on keywords. It did not seem that the
#     English translated songs were returned with genius.search_artist

In [1]:
# install lyricsgenius
!pip install lyricsgenius

Collecting lyricsgenius
  Downloading lyricsgenius-3.0.1-py3-none-any.whl (59 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 kB[0m [31m856.1 kB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
Installing collected packages: lyricsgenius
Successfully installed lyricsgenius-3.0.1


In [16]:
import lyricsgenius as lg
genius = lg.Genius('your_Client_Access_Token_Goes_Here',verbose=False, skip_non_songs=False, remove_section_headers=False)

In [166]:
# Retrieve a list of songs associated with a specific artist; can also sort by 'popularity'
song_search=(genius.search_artist("BTS", max_songs=100, sort='title')).songs

In [167]:
# Iterate through object to retrieve song names found
song_name_list=[song.title for song in song_search]

In [168]:
# List of song titles that you can now filter or search for specific keywords
song_name_list[0:5]

['00:00 (Zero O’Clock)',
 '134340',
 '2018 United Nations General Assembly Speech',
 '21세기 소녀 (21st Century Girl)',
 '2학년 (2nd Grade / Second Grade / So 4 More)']

In [137]:
len(song_name_list)

312

In [170]:
# Search for a keyword in all the list elements
# Tried to filter song list to only the English Translated songs but no song titles with English were returned
import re
english_songs=[x for x in song_name_list if re.findall("English",x)]

In [171]:
english_songs[0:5]

[]

In [169]:
# Testing to see if English Translated songs exist and it looks like they do
english_songs=genius.search_song("BTS - Go (Go Go) (English Translation)", "BTS")

In [141]:
english_songs.title

'BTS - 고민보다 Go (Go Go) (English Translation)'

In [165]:
# Final approach: search for the English Translated album which will retrieve all the English translated songs
# Good approach for a finite and specific list of albums
# Get album names from manual search on genius.com

album = genius.search_album("BTS- Love Yourself ‘Her’ (English Translation)", "BTS")
print(album.name)

'BTS- Love Yourself 承 ‘Her’ (English Translation)'

In [173]:
# Retreieve song titles on the album to confirm they are English Translated
album_tracks=album.tracks
[track.song.title for track in album_tracks]

['BTS - Intro: Serendipity (세렌디피티) (English Translation)',
 'BTS - DNA (English Translation)',
 'BTS - Best Of Me (English Translation)',
 'BTS - 보조개 (\u200bDimple) (English Translation)',
 'BTS - Pied Piper (English Translation)',
 'BTS - MIC Drop (English Translation)',
 'BTS - 고민보다 Go (Go Go) (English Translation)',
 'BTS - Outro: Her (English Translation)',
 'BTS - 바다 (Sea) (English Translation)']

In [190]:
# Save lyrics of all tracks on album into JSON file (will save to working directory)
album.save_lyrics()

Lyrics_BTSLoveYourself承HerEnglishTranslation.json already exists. Overwrite?
(y/n): n
Skipping file save.



In [184]:
# View of JSON file data
data

{'_type': 'album',
 'api_path': '/albums/443067',
 'cover_art_thumbnail_url': 'https://images.genius.com/9c2a9724c862553826e624d4782085c6.300x300x1.png',
 'cover_art_url': 'https://images.genius.com/9c2a9724c862553826e624d4782085c6.1000x1000x1.png',
 'full_title': 'BTS- Love Yourself 承 ‘Her’ (English Translation) by Genius English Translations',
 'id': 443067,
 'name': 'BTS- Love Yourself 承 ‘Her’ (English Translation)',
 'name_with_artist': 'BTS- Love Yourself 承 ‘Her’ (English Translation) (artist: Genius English Translations)',
 'release_date_components': {'year': 2017, 'month': 9, 'day': 18},
 'url': 'https://genius.com/albums/Genius-english-translations/Bts-love-yourself-her-english-translation',
 'artist': {'_type': 'artist',
  'api_path': '/artists/196943',
  'header_image_url': 'https://images.genius.com/38f8fc30844014c124b4e4a28a39ec40.39x35x1.png',
  'id': 196943,
  'image_url': 'https://images.genius.com/03131b2068adac6028ecefed2c657d97.222x222x1.png',
  'index_character': 'g'

In [201]:
# Extract JSON data into dataframe
from pandas.io.json import json_normalize
import json

with open('Lyrics_BTSLoveYourself承HerEnglishTranslation.json') as data_file:    
    d= json.load(data_file)  

df = json_normalize(d, 'tracks')
df['album']=d['name']
df.head()

  df = json_normalize(d, 'tracks')


Unnamed: 0,number,song._type,song.annotation_count,song.api_path,song.artist_names,song.full_title,song.header_image_thumbnail_url,song.header_image_url,song.id,song.instrumental,...,song.primary_artist.image_url,song.primary_artist.index_character,song.primary_artist.is_meme_verified,song.primary_artist.is_verified,song.primary_artist.name,song.primary_artist.slug,song.primary_artist.url,song.artist,song.lyrics,album
0,1,song,8,/songs/3821249,Genius English Translations,BTS - Intro: Serendipity (세렌디피티) (English Tran...,https://images.genius.com/9c2a9724c862553826e6...,https://images.genius.com/9c2a9724c862553826e6...,3821249,False,...,https://images.genius.com/03131b2068adac6028ec...,g,False,False,Genius English Translations,Genius-english-translations,https://genius.com/artists/Genius-english-tran...,Genius English Translations,BTS - Intro: Serendipity (세렌디피티) (English Tran...,BTS- Love Yourself 承 ‘Her’ (English Translation)
1,2,song,4,/songs/3804151,Genius English Translations,BTS - DNA (English Translation) by Genius Engl...,https://images.genius.com/defe85d4b2185eb430b3...,https://images.genius.com/defe85d4b2185eb430b3...,3804151,False,...,https://images.genius.com/03131b2068adac6028ec...,g,False,False,Genius English Translations,Genius-english-translations,https://genius.com/artists/Genius-english-tran...,Genius English Translations,BTS - DNA (English Translation) Lyrics[Verse 1...,BTS- Love Yourself 承 ‘Her’ (English Translation)
2,3,song,1,/songs/3821250,Genius English Translations,BTS - Best Of Me (English Translation) by Geni...,https://images.genius.com/9c2a9724c862553826e6...,https://images.genius.com/9c2a9724c862553826e6...,3821250,False,...,https://images.genius.com/03131b2068adac6028ec...,g,False,False,Genius English Translations,Genius-english-translations,https://genius.com/artists/Genius-english-tran...,Genius English Translations,BTS - Best Of Me (English Translation) Lyrics[...,BTS- Love Yourself 承 ‘Her’ (English Translation)
3,4,song,3,/songs/3821251,Genius English Translations,BTS - 보조개 (​Dimple) (English Translation) by G...,https://images.genius.com/9c2a9724c862553826e6...,https://images.genius.com/9c2a9724c862553826e6...,3821251,False,...,https://images.genius.com/03131b2068adac6028ec...,g,False,False,Genius English Translations,Genius-english-translations,https://genius.com/artists/Genius-english-tran...,Genius English Translations,BTS - 보조개 (​Dimple) (English Translation) Lyri...,BTS- Love Yourself 承 ‘Her’ (English Translation)
4,5,song,8,/songs/3808586,Genius English Translations,BTS - Pied Piper (English Translation) by Geni...,https://images.genius.com/9c2a9724c862553826e6...,https://images.genius.com/9c2a9724c862553826e6...,3808586,False,...,https://images.genius.com/03131b2068adac6028ec...,g,False,False,Genius English Translations,Genius-english-translations,https://genius.com/artists/Genius-english-tran...,Genius English Translations,BTS - Pied Piper (English Translation) Lyrics[...,BTS- Love Yourself 承 ‘Her’ (English Translation)


In [222]:
# Inspect Lyrics field
# NOTE: there's noise in the lyrics such as Related Songs24Embed', 'Related Songs', '26Embed' 
# that will need to be removed
# create a flag in our final dataset to see what songs have this noise

pd.set_option('display.max_colwidth', None)
df['song.lyrics'][0]

'BTS - Good Day (English Translation) Lyrics[Chorus: Jung Kook, Jimin]\nI said it\'s alright, oh yeah\nOne day it\'ll be a good day, for sure\nYou won\'t be alone, always\nI\'ll be by your side, we\'ll be okay\nIf we\'re connected, (sunny light) surely it\'ll be brighter tomorrow\n\n[Verse 1: SUGA, j-hope, Jung Kook]\nAs you share happiness, it multiplies\nBut if you share sadness also, it multiplies too\nSo always laugh\nAnd rest on my shoulder\nWe may argue sometimes, but we make up quickly\nI understand your feelings too, well~ it\'s written on your face\nThings like appearance, hmm hmm it\'s as if it doesn\'t matter\nIt has no relationship with our "relationship"\nIt\'s finе even if it\'s evеryday\nShowing off at this point, no thanks\nEven without money, just seeing you is OK\nSo don\'t worry, let\'s laugh again\nYep, bro (Yeo, bro), always the same\nYep, bro (Yep, bro), by your side\n(Yeah) At anytime, we happy together\nThis unchanging feeling is the same as from back then (Oh, 

In [205]:
pd.reset_option('display.max_colwidth')

In [189]:
# List of columns in dataframe
for col in df.columns:
    print(col)

number
song._type
song.annotation_count
song.api_path
song.artist_names
song.full_title
song.header_image_thumbnail_url
song.header_image_url
song.id
song.instrumental
song.lyrics_owner_id
song.lyrics_state
song.lyrics_updated_at
song.path
song.pyongs_count
song.relationships_index_url
song.release_date_components.year
song.release_date_components.month
song.release_date_components.day
song.release_date_for_display
song.song_art_image_thumbnail_url
song.song_art_image_url
song.stats.unreviewed_annotations
song.stats.hot
song.stats.pageviews
song.title
song.title_with_featured
song.updated_by_human_at
song.url
song.featured_artists
song.primary_artist._type
song.primary_artist.api_path
song.primary_artist.header_image_url
song.primary_artist.id
song.primary_artist.image_url
song.primary_artist.index_character
song.primary_artist.is_meme_verified
song.primary_artist.is_verified
song.primary_artist.name
song.primary_artist.slug
song.primary_artist.url
song.artist
song.lyrics


In [215]:
#########################################
##### Loop through all BTS Albums #######
#########################################
# Use above process to get all BTS English Translated song lyrics

album_list=['BTS - Love Yourself 結 ‘Answer’ (English Translation)',
           'BTS - MAP OF THE SOUL: 7 (English Translation)',
           'BTS - Love Yourself 轉 ‘Tear’ (English Translation)',
           'BTS - You Never Walk Alone (English Translation)',
           'BTS - 화양연화 Young Forever (The Most Beautiful Moment In Life: Young Forever) (English Translation)',
           'BTS - MAP OF THE SOUL: PERSONA (English Translation)',
           'BTS - WINGS (English Translation)',
           'BTS - BE (English Translation)',
           'BTS - BTS, The Best (English Translation)',
           'BTS - 화양연화 pt.2 (The Most Beautiful Moment In Life, Pt.2) (English Translation)',
           'BTS - Skool Luv Affair (English Translation)',
           'BTS - Dark&Wild (English Translation)',
           'BTS - MAP OF THE SOUL: 7 ~ The Journey ~ (English Translation)',
           'BTS - 2 Cool 4 Skool (English Translation)',
           'BTS - Face Yourself (English Translation)',
           'BTS - O!RUL8,2? (English Translation)',
           'BTS - YOUTH (English Translation)']

bts_songs_df=pd.DataFrame()
counter=0
for album in album_list:
    album = genius.search_album(album, "BTS")   #search by album name and artist
    file_name='lyrics_bts_album'+str(counter)+'.json'
    album.save_lyrics(filename=file_name) #save album info into JSON
    
    with open(file_name) as data_file:    
        d= json.load(data_file)
    
    df = json_normalize(d, 'tracks')
    df['album']=d['name']
    bts_songs_df=bts_songs_df.append(df)
    counter+=1

Wrote lyrics_bts_album0.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album1.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album2.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album3.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album4.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album5.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album6.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album7.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album8.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album9.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album10.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album11.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album12.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album13.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album14.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album15.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


Wrote lyrics_bts_album16.json.


  df = json_normalize(d, 'tracks')
  bts_songs_df=bts_songs_df.append(df)


In [219]:
# Inspect dataframe
bts_songs_df['song.release_date_for_display'].tail()

8    September 11, 2013
9    September 11, 2013
0     September 7, 2016
1     September 7, 2016
2     September 7, 2016
Name: song.release_date_for_display, dtype: object

In [220]:
len(bts_songs_df)

201

In [221]:
bts_songs_df.to_pickle("bts_lyrics_dataset.pkl")