In [56]:
import pandas as pd
from datetime import timedelta
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt

import lyricsgenius
from fuzzywuzzy import fuzz

In [57]:
#function to convert strings to dates :)
def string_to_datetime(string):
    result = datetime.strptime(string, "%Y-%m-%d")       
    return result

In [61]:
#read in radio, streaming, sales data
radio = pd.read_csv('subcharts - Radio_Kpop.csv')
streaming = pd.read_csv('subcharts - Streaming_Kpop.csv')
sales = pd.read_csv('subcharts - Sales_Kpop.csv')
hot100 = pd.read_csv('BBSHRK - Spotify_BBH100_Kpop.csv')

In [62]:
##standardize artist and song names 
for chart in [sales, streaming, radio, hot100]:
    #drop random unnamed columns
    drop_columns = [c for c in chart.columns if 'Unnamed' in c ]
    chart.drop(columns=drop_columns, inplace=True)

    #remove wiki citations, standardize whitespace, remove quotation marks
    #regex patterns and replacement strings
    regex_pats = [[r"(\[\d+\])", ""], [r"(\s)", " "], [r"(\")", ""]]
    for pat in regex_pats:
        chart["artist"] = chart["artist"].str.replace(pat[0], pat[1])
        chart["song_title"] = chart["song_title"].str.replace(pat[0], pat[1])
        
    #separate out main artist: split by feat. and and
    chart['main_artist'] = chart['artist'].str.replace(r"(feat.*)", '')
    #handle suga/agust d specifically because this errror comes up a lot
    chart['main_artist'] = ['Agust D' if 'Agust D' in a else a for a in chart['main_artist']]
    chart['main_artist'] = chart['main_artist'].str.replace(r"(and.*)", '')
    chart['main_artist'] = chart['main_artist'].str.replace(r"(,.*)", '')
    chart['main_artist'] = chart['main_artist'].str.replace(r"(&.*)", '')
    chart['main_artist'] = chart['main_artist'].str.replace(r"(:.*)", '')

    #remove parentheticals from song name and artist name
    chart['search_title'] = chart['search_title'].str.replace(r"(\(.*\))", "")
    chart['main_artist'] = chart['main_artist'].str.replace(r"(\(.*\))", "")

    #strip out whitespace
    chart['search_title'] = chart['search_title'].str.strip()
    chart['main_artist'] = chart['main_artist'].str.strip()

    #convert chart_date to datetime object
    #chart["chart_date"] = [string_to_datetime(d) for d in chart["chart_date"]]

    #standardize peak positions (remove parentheticals) + remove trailing whitespace
    chart["peak_position"] = chart["peak_position"].str.replace(r"(\(.*)", "")
    chart["peak_position"] = chart["peak_position"].str.strip()

#sales.rename(columns={'chart_date':'chart_date_sales', 'peak_position':'peak_position_sales', 'chart_weeks':'chart_weeks_sales'}, inplace=True)
#streaming.rename(columns={'chart_date':'chart_date_streaming', 'peak_position':'peak_position_streaming', 'chart_weeks':'chart_weeks_streaming'}, inplace=True)
#radio.rename(columns={'chart_date':'chart_date_radio','peak_position':'peak_position_radio', 'chart_weeks':'chart_weeks_radio'}, inplace=True)

In [63]:
## merge other charts into the hot 100 data
hot100_radio = hot100.merge(radio, how='outer', on=['search_title', 'main_artist', 'artist'])
hot100_radio_streaming = hot100_radio.merge(streaming, how='outer', on=['search_title', 'main_artist', 'artist'])
hot100_radio_streaming_sales = hot100_radio_streaming.merge(sales, how='outer', on=['search_title', 'main_artist', 'artist'])

In [67]:
##fill in info for songs not on hot 100 (dont already have lyrics and other info identified)

#identify songs with missing info
hot100_radio_streaming_sales['missing_lyrics'] = pd.isna(hot100_radio_streaming_sales['original_lyrics'])
to_fill = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['missing_lyrics'] == True]

In [123]:
### scrape lyrics from genius

genius = lyricsgenius.Genius('')

for artist_name in to_fill['main_artist'].unique():
    print(artist_name)
    #locate all songs the artist has in the top songs
    artist_songs = to_fill.loc[to_fill['main_artist'] == artist_name]

    try: #search for artist on genius, skip if not found
        #search for top songs for artist on genius, sorted by popularity -- should mostly match song chart positions
        artist_genius = genius.search_artist(artist_name, max_songs=1, sort="popularity")

        ##search for each song on genius and store lyrics
        for i, r in artist_songs.iterrows():
            print(r['search_title'])
            song_genius = artist_genius.song(r['search_title'])

            #store song lyrics if possible + exceeds similarity threshold, otherwise skip
            try:
                #partial ratio similarity, lowercase everything to avoid case mismatches
                similarity = fuzz.partial_ratio(r['search_title'].lower(), song_genius.title.lower())
                print(r['main_artist'], r['search_title'], " | ", song_genius.title, " | ", similarity)
        
                #if similarity is above threshold, store lyrics + url
                if similarity > 60:
                    lyrics_raw = song_genius.lyrics
                    print(lyrics_raw[:30])
                    hot100_radio_streaming_sales.at[i, 'original_lyrics'] = lyrics_raw
                    hot100_radio_streaming_sales.at[i, 'Original URL'] = song_genius.url
            except:
                print('song genius error')
    except:
        print('artist genius error')

K/DA
Searching for songs by K/DA...

Song 1: "POP/STARS"

Reached user-specified song limit (1).
Done. Found 1 songs.
Pop/Stars
Searching for "Pop/Stars" by K/DA...
Done.
K/DA Pop/Stars  |  POP/STARS  |  100
[Intro: Jaira Burns, Miyeon, M
The Baddest
Searching for "The Baddest" by K/DA...
Done.
K/DA The Baddest  |  THE BADDEST  |  100
[Intro: Wolftyla]
Baddest do w
BTS
Searching for songs by BTS...

Changing artist name to 'BTS (방탄소년단)'
Song 1: "Dynamite"

Reached user-specified song limit (1).
Done. Found 1 songs.
2! 3!
Searching for "2! 3!" by BTS (방탄소년단)...
Done.
BTS 2! 3!  |  둘! 셋! (그래도 좋은 날이 더 많기를) [2! 3! (Still Wishing For More Good Days)]  |  100
방탄소년단의 둘! 셋! (그래도 좋은 날이 더 많기를)
Make It Right
Searching for "Make It Right" by BTS (방탄소년단)...
Done.
BTS Make It Right  |  Make It Right  |  100
[Verse 1: V]
내가 날 눈치챘던 순간
떠나야만
Make It Right remix
Searching for "Make It Right remix" by BTS (방탄소년단)...
Done.
BTS Make It Right remix  |  Make It Right (Remix)  |  95
[Verse 1: Lauv]
Yeah, I was

In [77]:
### scrape translated lyrics from genius

## access artist object for all genius songs translated into english
translations = genius.search_artist('Genius English Translations', sort='popularity', max_songs=1)

## search for each song on the english translations page
for i, r in to_fill.iterrows():
    title = r['search_title']
    artist = r['main_artist']
    #only run for songs that don't have translated lyrics stored (for re-runs)
    print(title)
    song_trans = translations.song(artist + ' - ' + title)

    #only run if results turn something up and song object isn't empty
    if song_trans:
        #partial ratio similarity, lowercase everything to avoid case mismatches
        similarity = fuzz.partial_ratio((artist + ' - ' + title).lower(), song_trans.title.lower())
        print(artist, title, " | ", song_trans.title, " | ", similarity)

        #if similarity is above threshold, store translated lyrics
        if similarity > 60:
            hot100_radio_streaming_sales.at[i, 'translated_lyrics'] = song_trans.lyrics
            hot100_radio_streaming_sales.at[i, 'English Translation URL'] = song_trans.url

Searching for songs by Genius English Translations...

Song 1: "Luis Fonsi & Daddy Yankee - Despacito (Remix) ft. Justin Bieber (English Translation)"

Reached user-specified song limit (1).
Done. Found 1 songs.
Pop/Stars
Searching for "K/DA - Pop/Stars" by Genius English Translations...
Done.
K/DA Pop/Stars  |  K/DA - POP/STARS (English Translation)  |  100
2! 3!
Searching for "BTS - 2! 3!" by Genius English Translations...
Done.
BTS 2! 3!  |  BTS - 둘! 셋! (그래도 좋은 날이 더 많기를) [2! 3! (Still Wishing For More Good Days)] (English Translation)  |  82
Make It Right
Searching for "BTS - Make It Right" by Genius English Translations...
Done.
BTS Make It Right  |  BTS - Make It Right (English Translation)  |  100
Make It Right remix
Searching for "BTS - Make It Right remix" by Genius English Translations...
Done.
BTS Make It Right remix  |  BTS - Make It Right (Remix) ft. Lauv (English Translation)  |  96
Mikrokosmos
Searching for "BTS - Mikrokosmos" by Genius English Translations...
Done.
BTS M

In [84]:
### scrape musical element data from spotify
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import time

##setup spotify api interface
client_id = ''
client_secret = ''

client_credentials_manager = SpotifyClientCredentials(client_id, client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

#new column forr spotify id
hot100_radio_streaming_sales['spotify_id'] = ['' for s in hot100_radio_streaming_sales['artist']]

#rerun for all songs because I want to store spotify ids now
for i, r in hot100_radio_streaming_sales.iterrows():
    song = r['search_title']
    artist = r['main_artist']
    print(song, artist)

    #search spotify
    spotify_search = sp.search('track:'+song+' artist:'+artist+'', type='track')
    time.sleep(6)

    #if it successfully finds a song, print out validation info
    if len(spotify_search['tracks']['items']) > 0:
        #print song id for test query - just take top return
        this_id = spotify_search['tracks']['items'][0]['id']
        this_name = spotify_search['tracks']['items'][0]['name']
        this_artist = spotify_search['tracks']['items'][0]['album']['artists'][0]['name']
        print(this_name, this_artist)

        this_features = sp.audio_features(this_id)[0]

        hot100_radio_streaming_sales.at[i, 'valence'] = this_features['valence']
        hot100_radio_streaming_sales.at[i, 'danceability'] = this_features['danceability']
        hot100_radio_streaming_sales.at[i, 'spotify_id'] = this_id

    else:
        print('No results :(')
        hot100_radio_streaming_sales.at[i, 'valence'] = np.nan
        hot100_radio_streaming_sales.at[i, 'danceability'] = np.nan

    print('___________')

Baby Shark Pinkfong
Baby Shark Pinkfong
___________
Nobody Wonder Girls
Nobody Wonder Girls
___________
Gangnam Style Psy
Gangnam Style (강남스타일) PSY
___________
Gentleman Psy
Gentleman Various Artists
___________
Hangover Psy
Hangover PSY
___________
Daddy Psy
DADDY PSY
___________
Lifted CL
You're Love Lifted Me Clarence Carter
___________
DNA BTS
DNA BTS
___________
MIC Drop BTS
MIC Drop (Steve Aoki Remix) [Full Length Edition] BTS
___________
Fake Love BTS
FAKE LOVE BTS
___________
Ddu-Du Ddu-Du Blackpink
DDU-DU DDU-DU BLACKPINK
___________
Idol BTS
IDOL BTS
___________
Kiss and Make Up Dua Lipa
Kiss and Make Up Dua Lipa
___________
Waste It on Me Steve Aoki
Waste It On Me Steve Aoki
___________
Kill This Love Blackpink
Kill This Love BLACKPINK
___________
Boy with Luv BTS
Boy With Luv (feat. Halsey) BTS
___________
Make It Right  Make It Right remix BTS
Make It Right (feat. Lauv) [Acoustic Remix] BTS
___________
Chicken Noodle Soup J-Hope
Chicken Noodle Soup (feat. Becky G) j-hope
_

In [91]:
#there are a few unsuccessful searches and mismatches i will correct manually

#### spotify mistakes
##mismatched on by bts and life goes on by bts
on_features = sp.audio_features('2QyuXBcV1LJ2rq01KhreMF')[0]
on_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'On'].index[0]
hot100_radio_streaming_sales.at[on_index, 'valence'] = on_features['valence']
hot100_radio_streaming_sales.at[on_index, 'danceability'] = on_features['danceability']
hot100_radio_streaming_sales.at[on_index, 'spotify_id'] = '2QyuXBcV1LJ2rq01KhreMF'

##mismatched lifted by cl and You're Love Lifted Me Clarence Carter
lifted_features = sp.audio_features('2DHWM8Bm4zuzuH7UOXscJw')[0]
lifted_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'Lifted'].index[0]
hot100_radio_streaming_sales.at[lifted_index, 'valence'] = lifted_features['valence']
hot100_radio_streaming_sales.at[lifted_index, 'danceability'] = lifted_features['danceability']
hot100_radio_streaming_sales.at[lifted_index, 'spotify_id'] = '2DHWM8Bm4zuzuH7UOXscJw'

In [125]:
#### original lyrics mistakes

## missed sweet night by v
sn_genius = genius.search_song("Sweet Night", "V")
sn_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'Sweet Night'].index[0]
hot100_radio_streaming_sales.at[sn_index, 'Original URL'] = sn_genius.url
hot100_radio_streaming_sales.at[sn_index, 'original_lyrics'] = sn_genius.lyrics

Searching for "Sweet Night" by V...
Done.


In [127]:
## missed Dream Glow by BTS and Charli XCX
dg_genius = genius.search_song("Dream Glow", "BTS and Charli XCX")
dg_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'Dream Glow'].index[0]
hot100_radio_streaming_sales.at[dg_index, 'Original URL'] = dg_genius.url
hot100_radio_streaming_sales.at[dg_index, 'original_lyrics'] = dg_genius.lyrics

Searching for "Dream Glow" by BTS and Charli XCX...
Done.


In [128]:
## missed A Brand New Day by BTS and Zara Larsson	
bnd_genius = genius.search_song("A Brand New Day", "BTS and Zara Larsson")
bnd_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'A Brand New Day'].index[0]
hot100_radio_streaming_sales.at[bnd_index, 'Original URL'] = bnd_genius.url
hot100_radio_streaming_sales.at[bnd_index, 'original_lyrics'] = bnd_genius.lyrics

Searching for "A Brand New Day" by BTS and Zara Larsson...
Done.


In [129]:
## missed All Night by BTS and Juice Wrld	
an_genius = genius.search_song("All Night", "BTS and Juice Wrld")
an_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'All Night'].index[0]
hot100_radio_streaming_sales.at[an_index, 'Original URL'] = an_genius.url
hot100_radio_streaming_sales.at[an_index, 'original_lyrics'] = an_genius.lyrics

Searching for "All Night" by BTS and Juice Wrld...
Done.


In [118]:
#### translated lyrics mistakes

## sweet night is entirely in english
sn_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'Sweet Night'].index[0]
hot100_radio_streaming_sales.at[sn_index, 'English Translation URL'] = hot100_radio_streaming_sales.at[sn_index, 'Original URL']
hot100_radio_streaming_sales.at[sn_index, 'translated_lyrics'] = hot100_radio_streaming_sales.at[sn_index, 'original_lyrics']

## who is also entirely in english
who_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'Who'].index[0]
hot100_radio_streaming_sales.at[who_index, 'English Translation URL'] = hot100_radio_streaming_sales.at[who_index, 'Original URL']
hot100_radio_streaming_sales.at[who_index, 'translated_lyrics'] = hot100_radio_streaming_sales.at[who_index, 'original_lyrics']

In [132]:
## missed translation for strange
strange_genius = genius.search_song("Agust D - 이상하지않은가 (Strange) ft. RM (English Translation)", "Genius English Translations")
strange_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'Strange'].index[0]
hot100_radio_streaming_sales.at[strange_index, 'English Translation URL'] = strange_genius.url
hot100_radio_streaming_sales.at[strange_index, 'translated_lyrics'] = strange_genius.lyrics

Searching for "Agust D - 이상하지않은가 (Strange) ft. RM (English Translation)" by Genius English Translations...
Done.


In [134]:
## missed translation for blueberry eyes
bbe_genius = genius.search_song("MAX - Blueberry Eyes ft. SUGA (English Translation)", "Genius English Translations")
bbe_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'Blueberry Eyes'].index[0]
hot100_radio_streaming_sales.at[bbe_index, 'English Translation URL'] = bbe_genius.url
hot100_radio_streaming_sales.at[bbe_index, 'translated_lyrics'] = bbe_genius.lyrics

Searching for "MAX - Blueberry Eyes ft. SUGA (English Translation)" by Genius English Translations...
Done.


In [139]:
## genius does not have english translaltion of gbtb
gbtb_index = hot100_radio_streaming_sales.loc[hot100_radio_streaming_sales['search_title'] == 'G.B.T.B.'].index[0]
hot100_radio_streaming_sales.at[gbtb_index, 'English Translation URL'] = 'https://lyricstranslate.com/en/gbtb-gbtb.html'
hot100_radio_streaming_sales.at[gbtb_index, 'translated_lyrics'] = open("Manual Lyrics/GBTB.txt", "r").read()

In [167]:
#### re-process data now that it is all collected

#scale spotify valence values
hot100_radio_streaming_sales['scaled_valence'] = [s*2 - 1 for s in hot100_radio_streaming_sales['valence']]

##process text 
#remove romanized lyrics that are in some genius pages
hot100_radio_streaming_sales["original_lyrics"] = [lyrics.split("Romanization")[0] for lyrics in hot100_radio_streaming_sales["original_lyrics"]]

#get rid of bracketed sections like verse indicators
hot100_radio_streaming_sales["original_lyrics"] = hot100_radio_streaming_sales["original_lyrics"].str.replace(r"(\[.*\])", "")
hot100_radio_streaming_sales["translated_lyrics"] = hot100_radio_streaming_sales["translated_lyrics"].str.replace(r"(\[.*\])", "")

#delete trailing whitespace
hot100_radio_streaming_sales["original_lyrics"] = hot100_radio_streaming_sales["original_lyrics"].str.strip()
hot100_radio_streaming_sales["translated_lyrics"] = hot100_radio_streaming_sales["translated_lyrics"].str.strip()

#clean spacing: add space before linebreaks (so theyre counted separately)
hot100_radio_streaming_sales["translated_lyrics"] = hot100_radio_streaming_sales["translated_lyrics"].str.replace(r"(\n+)", " \n")
hot100_radio_streaming_sales["translated_lyrics"] = hot100_radio_streaming_sales["translated_lyrics"].str.replace(r"(\s+)", " ") #restandardize whitespace
hot100_radio_streaming_sales["translated_lyrics"] = hot100_radio_streaming_sales["translated_lyrics"].str.replace(r"(\s[, | \' | \? | \! | \. | \-]+)", " ")  #delete hanging punctuation
hot100_radio_streaming_sales["original_lyrics"] = hot100_radio_streaming_sales["original_lyrics"].str.replace(r"(\n+)", " \n")
hot100_radio_streaming_sales["original_lyrics"] = hot100_radio_streaming_sales["original_lyrics"].str.replace(r"(\s+)", " ") #restandardize whitespace
hot100_radio_streaming_sales["original_lyrics"] = hot100_radio_streaming_sales["original_lyrics"].str.replace(r"(\s[, | ' | \? | \! | \. | \-]+)", " ")  #delete hanging punctuation

#only roman characters and white space and punctuation
hot100_radio_streaming_sales["english_lyrics"] = hot100_radio_streaming_sales["original_lyrics"].str.replace(r"([^a-z | A-Z | \s | , | ' | \? | \! | \. | \- ])", " ")
hot100_radio_streaming_sales["english_lyrics"] = hot100_radio_streaming_sales["english_lyrics"].str.replace(r"(\s+)", " ") #restandardize whitespace
hot100_radio_streaming_sales["english_lyrics"] = hot100_radio_streaming_sales["english_lyrics"].str.replace(r"(\s[, | ' | \? | \! | \. | \-]+)", " ")  #delete hanging punctuation

##count number of english lyrics and total lyrics
hot100_radio_streaming_sales['english_count'] = [len(lyr.split(" ")) for lyr in hot100_radio_streaming_sales['english_lyrics']]
hot100_radio_streaming_sales['total_count'] = [len(lyr.split(" ")) for lyr in hot100_radio_streaming_sales['original_lyrics']]

##count number of unique english and total lyrics
hot100_radio_streaming_sales['english_unique'] = [len(set(lyr.split(" "))) for lyr in hot100_radio_streaming_sales['english_lyrics']]
hot100_radio_streaming_sales['total_unique'] = [len(set(lyr.split(" "))) for lyr in hot100_radio_streaming_sales['original_lyrics']]

In [168]:
hot100_radio_streaming_sales

Unnamed: 0,chart_date,artist,song_title_x,Original URL,original_lyrics,English Translation URL,translated_lyrics,peak_position,chart_weeks,english_lyrics,...,chart_weeks_streaming,chart_date_sales,song_title_y,peak_position_sales,chart_weeks_sales,missing_lyrics,spotify_id,missing_lyrics_post,english_unique,total_unique
0,2019-01-12,Pinkfong,Baby Shark,https://genius.com/Pinkfong-baby-shark-lyrics,"Baby shark, doo doo doo doo doo doo Baby shark...",https://genius.com/Pinkfong-baby-shark-lyrics,"Baby shark, doo doo doo doo doo doo Baby shark...",32.0,20.0,"Baby shark, doo doo doo doo doo doo Baby shark...",...,6 (R) 59,,,,,False,5ygDXis42ncn6kYG14lEVG,False,24,23
1,2009-10-31,Wonder Girls,Nobody (English release),https://genius.com/Wonder-girls-nobody-english...,You know I still love you baby And it will nev...,https://genius.com/Wonder-girls-nobody-english...,You know I still love you baby And it will nev...,76.0,1.0,You know I still love you baby And it will nev...,...,,,,,,False,0TBNg82Zqi0k1VslEe2ooq,False,119,123
2,2012-09-22,Psy,Gangnam Style,https://genius.com/Psy-gangnam-style-lyrics,Hangul 오빤 강남 스타일 강남 스타일 낮에는 따사로운 인간적인 여자 커피 한잔...,https://genius.com/Genius-english-translations...,Oppa Gangnam Style Gangnam style Warm human wo...,2.0,3.0,Hangul Hey Hey Hey Hey sexy lady sexy lady Hey...,...,,2012-10-06,Gangnam Style,1,32,False,03UrZgTINDqvnUMbbIMhql,False,12,83
3,2013-04-27,Psy,Gentleman,https://genius.com/Psy-gentleman-lyrics,알랑가몰라 왜 화끈해야 하는건지 알랑가몰라 왜 말끔해야 하는건지 알랑가몰라 아리까리...,https://lyricstranslate.com/en/gentleman-gentl...,I don’t know if you know why it needs to be ho...,5.0,1.0,"We like We-we-we like party Damn, girl You so...",...,,2013-05-04,Gentleman,20,4,False,26spalP6x2gUAab8rwB8dQ,False,37,75
4,2014-06-28,Psy feat. Snoop Dogg,Hangover,https://genius.com/Psy-hangover-lyrics,"Hangover, hangover, hangover, hangover-over-ov...",https://colorcodedlyrics.com/2014/06/psy-ssai-...,"Hangover, hangover, hangover, hangover-over-ov...",26.0,1.0,"Hangover, hangover, hangover, hangover-over-ov...",...,,,,,,False,4ngypPwfcqpHBxQjJIOgvr,False,170,211
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,,BTS,,https://genius.com/Bts-stay-gold-lyrics,Ooh ooh In a world where you feel cold You got...,https://genius.com/Genius-english-translations...,"Oh, woah, oh In a world where you feel cold Yo...",,,Ooh ooh In a world where you feel cold You got...,...,,2020-07-04,Stay Gold,6,1,True,3Ys2PYl1wyPKQIwyqhP9cQ,False,41,91
74,,BTS,,https://genius.com/Bts-your-eyes-tell-lyrics,何故、こんなにも 涙が溢れるの ねえ、側にいて そして笑ってよ 君のいない未来は 色のない世...,https://genius.com/Genius-english-translations...,"Why are my eyes filled with tears? Hey, stay b...",,,So beautiful follow me I ll find you So color...,...,,2020-07-25,Your Eyes Tell,12,1,True,3TZ7NHkMT82AhwuYsd00Hz,False,16,57
75,,K/DA: (G)I-dle & Wolftyla feat. Bea Miller,,https://genius.com/K-da-the-baddest-lyrics,Baddest do what the baddest do (Hey) The badde...,https://genius.com/Genius-english-translations...,Baddest do what the baddest do (Hey) The badde...,,,Baddest do what the baddest do Hey The baddest...,...,,2020-09-12,The Baddest,28,1,True,2V4Fx72svQRxrFvNT1eq5f,False,180,220
76,,Max Schneider feat. Suga,,https://genius.com/Max-blueberry-eyes-lyrics,"Mmm, mmm Da-da, ooh woah-oh, yeah MAX, baby Le...",https://genius.com/Genius-english-translations...,"Mmm, mmm Da-da, ooh woah-oh, yeah MAX, baby Le...",,,"Mmm, mmm Da-da, ooh woah-oh, yeah MAX, baby Le...",...,,2020-09-26,Blueberry Eyes,22,1,True,5dn6QANKbf76pANGjMBida,True,87,131


In [157]:
#### get sentiment data for all songs
from textblob import TextBlob

for i, r in hot100_radio_streaming_sales.iterrows():
    print(r['search_title'])

    eng_blob = TextBlob(r['english_lyrics'])
    print('english', eng_blob.sentiment.polarity)
    hot100_radio_streaming_sales.at[i, 'english_sentiment'] = eng_blob.sentiment.polarity

    trans_blob = TextBlob(r['translated_lyrics'])
    print('translated', trans_blob.sentiment.polarity)
    hot100_radio_streaming_sales.at[i, 'translated_sentiment'] = trans_blob.sentiment.polarity

Baby Shark
english 0.25
translated 0.25
Nobody
english 5.153576582148497e-05
translated 5.153576582148497e-05
Gangnam Style
english 0.5
translated 0.3921874999999999
Gentleman
english 0.11999999999999993
translated 0.08669270833333328
Hangover
english -0.18954575517075511
translated -0.16963059163059158
Daddy
english 0.48333333333333334
translated 0.2841036414565826
Lifted
english -0.13373015873015875
translated -0.13373015873015875
DNA
english 0.5349999999999999
translated 0.15014880952380955
MIC Drop
english -0.07285889785889788
translated -0.046976063152533776
Fake Love
english -0.07193877551020408
translated 0.046141041162227586
Ddu-Du Ddu-Du
english -0.2954036458333333
translated 0.13778582317073174
Idol
english 0.438425925925926
translated 0.4078088578088578
Kiss and Make Up
english -0.17391304347826084
translated -0.16296296296296292
Waste It on Me
english -0.008143939393939384
translated -0.008143939393939384
Kill This Love
english 0.268031746031746
translated 0.216068660022148

In [169]:
#write to csv
#radio.to_csv('subcharts - Radio_Kpop.csv')
#streaming.to_csv('subcharts - Streaming_Kpop.csv')
#sales.to_csv('subcharts - Sales_Kpop.csv')
hot100_radio_streaming_sales.to_csv('hot100_radio_streaming_sales_Kpop.csv')