In [1]:
import pandas as pd
import json
import numpy as np

LYRIC_FOLDER_PATH = './data/lyric-fetching-datasets'
IS_INSTRUMENTAL_RETURN_FLAG = 'THIS_IS_AN_INSTRUMENTAL'

# 1. Load the latest data

In [2]:
lyrics_df = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{"2.3 OUTPUT of lyrics by song id with urls"}.csv')
lyrics_df

Unnamed: 0,song_id,lyrics,urls
0,(I Can't Get No) SatisfactionThe Rolling Stones,(I Can’t Get No) Satisfaction Lyrics[Instrumen...,"[""https://genius.com/The-rolling-stones-i-cant..."
1,(I Just) Died In Your ArmsCutting Crew,(I Just) Died in Your Arms Lyrics[Intro]\nOh I...,"[""https://genius.com/Cutting-crew-i-just-died-..."
2,(I Know) I'm Losing YouThe Temptations,(I Know) I’m Losing You Lyrics[Intro]\nOoh\nYo...,"[""https://genius.com/The-temptations-i-know-im..."
3,(I've Had) The Time Of My LifeBill Medley & Je...,(I’ve Had) The Time of My Life Lyrics[Chorus: ...,"[""https://genius.com/Bill-medley-and-jennifer-..."
4,1 ThingAmerie,1 Thing Lyrics[Intro]\nWoo!\nUh\nWoo!\nNa-na-n...,"[""https://genius.com/Amerie-1-thing-lyrics"", ""..."
...,...,...,...
5002,"Saving Forever For You (From ""Beverly Hills, 9...",Saving Forever For You Lyrics[Verse 1]\nI've n...,"[""https://genius.com/Shanice-saving-forever-fo..."
5003,"Shakedown (From ""Beverly Hills Cop II"")Bob Seger",Shakedown Lyrics[Verse 1]\nNo matter what you ...,"[""https://genius.com/Bob-seger-shakedown-lyrics""]"
5004,Superstar/Bless The Beasts And ChildrenCarpenters,Superstar Lyrics[Verse 1]\nLong ago and oh so ...,"[""https://genius.com/Carpenters-superstar-lyri..."
5005,Travelin' Band/Who'll Stop The RainCreedence C...,Travelin’ Band Lyrics[Verse 1]\n737 coming out...,"[""https://genius.com/Creedence-clearwater-revi..."


# 2. Drop any new instrumentals found

In [3]:
NEW_INSTRUMENTALS_FOUND = [
    'Our Winter LoveBill Pursell'
]

lyrics_sans_instrumentals = lyrics_df[
    lyrics_df.song_id.apply(lambda song_id: song_id not in NEW_INSTRUMENTALS_FOUND)
]
lyrics_sans_instrumentals

Unnamed: 0,song_id,lyrics,urls
0,(I Can't Get No) SatisfactionThe Rolling Stones,(I Can’t Get No) Satisfaction Lyrics[Instrumen...,"[""https://genius.com/The-rolling-stones-i-cant..."
1,(I Just) Died In Your ArmsCutting Crew,(I Just) Died in Your Arms Lyrics[Intro]\nOh I...,"[""https://genius.com/Cutting-crew-i-just-died-..."
2,(I Know) I'm Losing YouThe Temptations,(I Know) I’m Losing You Lyrics[Intro]\nOoh\nYo...,"[""https://genius.com/The-temptations-i-know-im..."
3,(I've Had) The Time Of My LifeBill Medley & Je...,(I’ve Had) The Time of My Life Lyrics[Chorus: ...,"[""https://genius.com/Bill-medley-and-jennifer-..."
4,1 ThingAmerie,1 Thing Lyrics[Intro]\nWoo!\nUh\nWoo!\nNa-na-n...,"[""https://genius.com/Amerie-1-thing-lyrics"", ""..."
...,...,...,...
5002,"Saving Forever For You (From ""Beverly Hills, 9...",Saving Forever For You Lyrics[Verse 1]\nI've n...,"[""https://genius.com/Shanice-saving-forever-fo..."
5003,"Shakedown (From ""Beverly Hills Cop II"")Bob Seger",Shakedown Lyrics[Verse 1]\nNo matter what you ...,"[""https://genius.com/Bob-seger-shakedown-lyrics""]"
5004,Superstar/Bless The Beasts And ChildrenCarpenters,Superstar Lyrics[Verse 1]\nLong ago and oh so ...,"[""https://genius.com/Carpenters-superstar-lyri..."
5005,Travelin' Band/Who'll Stop The RainCreedence C...,Travelin’ Band Lyrics[Verse 1]\n737 coming out...,"[""https://genius.com/Creedence-clearwater-revi..."


# 3. Apply manually fixed songs

In [4]:
fixes_raw = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{"lyric_corrections_by_hand_on_v0.7_nomic_embeddings"}.csv')
fixes = fixes_raw.copy()

fixes['song_id'] = fixes.song + fixes.performer

fixes['correct_url'] = fixes.correct_genius_url
fixes = fixes[['song_id', 'correct_url']]
fixes

Unnamed: 0,song_id,correct_url
0,PeachesJustin Bieber Featuring Daniel Caesar &...,https://genius.com/Justin-bieber-peaches-lyrics
1,In The MoodErnie Fields & Orch.,https://genius.com/Ernie-fields-in-the-mood-ly...
2,Willie And The Hand JiveThe Johnny Otis Show,https://genius.com/Johnny-otis-willie-and-the-...
3,"Sittin' Up In My Room (From ""Waiting To Exhale...",https://genius.com/Brandy-sittin-up-in-my-room...
4,"Streets Of Philadelphia (From ""Philadelphia"")B...",https://genius.com/Lil-wayne-dont-cry-lyrics
5,Don't CryLil Wayne Featuring XXXTENTACION,https://genius.com/Lil-wayne-dont-cry-lyrics
6,Lucky YouEminem Featuring Joyner Lucas,https://genius.com/Eminem-lucky-you-lyrics
7,Wild OnesFlo Rida Featuring Sia,https://genius.com/Flo-rida-wild-ones-lyrics
8,LoyalChris Brown Featuring Lil Wayne & French ...,https://genius.com/Chris-brown-loyal-lyrics
9,ZEZEKodak Black Featuring Travis Scott & Offset,https://genius.com/Kodak-black-zeze-lyrics


In [5]:
import lyricsgenius
GENIUS_CLIEN_API_KEY = '8ZiCNVQqbSF6NM0nwVnVv6Y0EBR7BsVQ9HaVgPdqJOZR1l6XlymR-pUUi6z3GHPx' # be nice :)
genius = lyricsgenius.Genius(GENIUS_CLIEN_API_KEY)

In [6]:
def _get_song(url):
    try: 
        print(url, '...')
        return [genius.lyrics(song_url=url), [url]]
    except:
        print(f'OOPS: {url}')
        
def refetch_lyrics_from_url(songs_subset_df):
    return songs_subset_df.apply(
        lambda row: _get_song(row['correct_url']),
        axis=1
    )

# fetched = refetch_lyrics_from_url(fixes)
# fetched

In [7]:
def convert_fetched_lyrics_to_df(fetched_lyrics_and_url_options_arrays, original_df):
    just_lyrics = [x and x[0] for x in fetched_lyrics_and_url_options_arrays]
    just_stringified_urls = [x and json.dumps(x[1]) for x in fetched_lyrics_and_url_options_arrays]
    save_to_disk = pd.DataFrame({
        'song_id': original_df.song_id, 'lyrics': just_lyrics, 'urls': just_stringified_urls
    })
    return save_to_disk

# fixes_fetched_df = convert_fetched_lyrics_to_df(fetched, fixes)
# fixes_fetched_df

In [8]:
def save_lyrics(lyrics_subset, file_name):
    lyrics_subset.to_csv(f'./data/lyric-fetching-datasets/{file_name}.csv', index=False)
    return lyrics_subset

In [9]:
FIXES_TITLE = 'FETCHED LYRICS from lyric_corrections_by_hand_on_v0.7_nomic_embeddings'
# save_lyrics(fixes_fetched_df, FIXES_TITLE)

fixes_fetched_df = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{FIXES_TITLE}.csv')
fixes_fetched_df

Unnamed: 0,song_id,lyrics,urls
0,PeachesJustin Bieber Featuring Daniel Caesar &...,TranslationsTürkçeEspañolPortuguês日本語ItalianoD...,"[""https://genius.com/Justin-bieber-peaches-lyr..."
1,In The MoodErnie Fields & Orch.,In The Mood LyricsWho's the livin' dolly with ...,"[""https://genius.com/Ernie-fields-in-the-mood-..."
2,Willie And The Hand JiveThe Johnny Otis Show,Willie and the Hand Jive LyricsI know a cat na...,"[""https://genius.com/Johnny-otis-willie-and-th..."
3,"Sittin' Up In My Room (From ""Waiting To Exhale...",Sittin’ Up In My Room Lyrics[Verse 1]\nSeems l...,"[""https://genius.com/Brandy-sittin-up-in-my-ro..."
4,"Streets Of Philadelphia (From ""Philadelphia"")B...",TranslationsTürkçeРусскийFrançaisPortuguêsDon’...,"[""https://genius.com/Lil-wayne-dont-cry-lyrics""]"
5,Don't CryLil Wayne Featuring XXXTENTACION,TranslationsTürkçeРусскийFrançaisPortuguêsDon’...,"[""https://genius.com/Lil-wayne-dont-cry-lyrics""]"
6,Lucky YouEminem Featuring Joyner Lucas,TranslationsTürkçeEminem - Lucky You ft. Joyne...,"[""https://genius.com/Eminem-lucky-you-lyrics""]"
7,Wild OnesFlo Rida Featuring Sia,TranslationsPortuguêsWild Ones Lyrics[Chorus: ...,"[""https://genius.com/Flo-rida-wild-ones-lyrics""]"
8,LoyalChris Brown Featuring Lil Wayne & French ...,"Loyal Lyrics[Intro: Lil Wayne, Chris Brown]\nY...","[""https://genius.com/Chris-brown-loyal-lyrics""]"
9,ZEZEKodak Black Featuring Travis Scott & Offset,TranslationsPortuguêsZEZE Lyrics[Intro]\nD.A. ...,"[""https://genius.com/Kodak-black-zeze-lyrics""]"


In [10]:
JUST_SONG_ID_AND_LYRICS_COLS = ['song_id', 'lyrics']
SONG_ID = 'song_id'

def remove_overlap(base_df, overlap_df_to_remove):
    return base_df[
        base_df.song_id.apply(
            lambda i: i not in overlap_df_to_remove.song_id.values
        )
    ]

In [11]:
# sanity check
fixes_fetched_df[
    fixes_fetched_df.song_id.apply(lambda i: i not in lyrics_sans_instrumentals.song_id.values)
]

Unnamed: 0,song_id,lyrics,urls


### swap back into main dataset

In [12]:
lyrics_sans_ins_with_fixes = pd.concat([
    remove_overlap(lyrics_sans_instrumentals, fixes_fetched_df),
    fixes_fetched_df
]).reset_index(drop=True)
lyrics_sans_ins_with_fixes

Unnamed: 0,song_id,lyrics,urls
0,(I Can't Get No) SatisfactionThe Rolling Stones,(I Can’t Get No) Satisfaction Lyrics[Instrumen...,"[""https://genius.com/The-rolling-stones-i-cant..."
1,(I Just) Died In Your ArmsCutting Crew,(I Just) Died in Your Arms Lyrics[Intro]\nOh I...,"[""https://genius.com/Cutting-crew-i-just-died-..."
2,(I Know) I'm Losing YouThe Temptations,(I Know) I’m Losing You Lyrics[Intro]\nOoh\nYo...,"[""https://genius.com/The-temptations-i-know-im..."
3,(I've Had) The Time Of My LifeBill Medley & Je...,(I’ve Had) The Time of My Life Lyrics[Chorus: ...,"[""https://genius.com/Bill-medley-and-jennifer-..."
4,1 ThingAmerie,1 Thing Lyrics[Intro]\nWoo!\nUh\nWoo!\nNa-na-n...,"[""https://genius.com/Amerie-1-thing-lyrics"", ""..."
...,...,...,...
5001,MickeyToni Basil,"Mickey Lyrics[Refrain]\nOh, Mickey\nYou're so ...","[""https://genius.com/Toni-basil-mickey-lyrics""]"
5002,LightersBad Meets Evil Featuring Bruno Mars,Lighters Lyrics[Chorus: Bruno Mars]\nThis one'...,"[""https://genius.com/Bad-meets-evil-lighters-l..."
5003,Somebody That I Used To KnowGotye Featuring Ki...,TranslationsTürkçeSomebody That I Used to Know...,"[""https://genius.com/Gotye-somebody-that-i-use..."
5004,Maria MariaSantana Featuring The Product G&B,Maria Maria Lyrics[Intro: Wyclef Jean]\nLadies...,"[""https://genius.com/Santana-maria-maria-lyrics""]"


# 4. Refetching lyrics that are likely wrong

### Here: we demonstrate how `/` songs are usually older tracks that had a physical A & B side when sold, & break when trying to be fetched from genius.com

In [13]:
lyrics_sans_ins_with_fixes[
    lyrics_sans_ins_with_fixes.song_id.str.contains('/') 
#     lyrics_sans_ins_with_fixes.song_id.str.contains("Lookin' Out My Back Door/Long As I Can See The Light")
#     lyrics_sans_ins_with_fixes.lyrics.str.contains("Greatest Songs")
    
]

Unnamed: 0,song_id,lyrics,urls
70,Dear Mama/Old School2Pac,TranslationsPortuguêsDeutschEnglish2Pac - Dear...,"[""https://genius.com/2pac-dear-mama-lyrics"", ""..."
155,How Do U Want It/California Love2Pac Featuring...,Hot Rap Songs Chart 25th Anniversary: Top 100 ...,"[""https://genius.com/Billboard-hot-rap-songs-c..."
190,I Am...I Said/Done Too SoonNeil Diamond,Songs of Eulogy and Bereavement LyricsListed i...,"[""https://genius.com/Genius-lists-songs-of-eul..."
226,I Don't Wanna Live Forever (Fifty Shades Darke...,New Music Friday 12/16/16 LyricsZayn & Taylor ...,"[""https://genius.com/Spotify-new-music-friday-..."
245,I Got Id/Long RoadPearl Jam,"Even Flow - Live in Seattle, WA 06-December-20...","[""https://genius.com/Pearl-jam-even-flow-live-..."
...,...,...,...
4975,Down On The Corner/Fortunate SonCreedence Clea...,Down on the Corner Lyrics[Verse 1]\nEarly in t...,"[""https://genius.com/Creedence-clearwater-revi..."
4978,Rainy Night In Georgia/Rubberneckin'Brook Benton,Rainy Night in Georgia Lyrics[Verse 1]\nHoveri...,"[""https://genius.com/Brook-benton-rainy-night-..."
4981,Superstar/Bless The Beasts And ChildrenCarpenters,Superstar Lyrics[Verse 1]\nLong ago and oh so ...,"[""https://genius.com/Carpenters-superstar-lyri..."
4982,Travelin' Band/Who'll Stop The RainCreedence C...,Travelin’ Band Lyrics[Verse 1]\n737 coming out...,"[""https://genius.com/Creedence-clearwater-revi..."


In [14]:
import json

In [15]:
has_url = lyrics_sans_ins_with_fixes[
    lyrics_sans_ins_with_fixes.urls.notna()
].copy().reset_index(drop=True)
has_url['url'] = has_url.urls.apply(lambda url: url and json.loads(url)[0])
has_url

Unnamed: 0,song_id,lyrics,urls,url
0,(I Can't Get No) SatisfactionThe Rolling Stones,(I Can’t Get No) Satisfaction Lyrics[Instrumen...,"[""https://genius.com/The-rolling-stones-i-cant...",https://genius.com/The-rolling-stones-i-cant-g...
1,(I Just) Died In Your ArmsCutting Crew,(I Just) Died in Your Arms Lyrics[Intro]\nOh I...,"[""https://genius.com/Cutting-crew-i-just-died-...",https://genius.com/Cutting-crew-i-just-died-in...
2,(I Know) I'm Losing YouThe Temptations,(I Know) I’m Losing You Lyrics[Intro]\nOoh\nYo...,"[""https://genius.com/The-temptations-i-know-im...",https://genius.com/The-temptations-i-know-im-l...
3,(I've Had) The Time Of My LifeBill Medley & Je...,(I’ve Had) The Time of My Life Lyrics[Chorus: ...,"[""https://genius.com/Bill-medley-and-jennifer-...",https://genius.com/Bill-medley-and-jennifer-wa...
4,1 ThingAmerie,1 Thing Lyrics[Intro]\nWoo!\nUh\nWoo!\nNa-na-n...,"[""https://genius.com/Amerie-1-thing-lyrics"", ""...",https://genius.com/Amerie-1-thing-lyrics
...,...,...,...,...
4933,MickeyToni Basil,"Mickey Lyrics[Refrain]\nOh, Mickey\nYou're so ...","[""https://genius.com/Toni-basil-mickey-lyrics""]",https://genius.com/Toni-basil-mickey-lyrics
4934,LightersBad Meets Evil Featuring Bruno Mars,Lighters Lyrics[Chorus: Bruno Mars]\nThis one'...,"[""https://genius.com/Bad-meets-evil-lighters-l...",https://genius.com/Bad-meets-evil-lighters-lyrics
4935,Somebody That I Used To KnowGotye Featuring Ki...,TranslationsTürkçeSomebody That I Used to Know...,"[""https://genius.com/Gotye-somebody-that-i-use...",https://genius.com/Gotye-somebody-that-i-used-...
4936,Maria MariaSantana Featuring The Product G&B,Maria Maria Lyrics[Intro: Wyclef Jean]\nLadies...,"[""https://genius.com/Santana-maria-maria-lyrics""]",https://genius.com/Santana-maria-maria-lyrics


In [16]:
songs_df = pd.read_csv('./data/1 DONE RIGHT OUTPUT unique songs.csv')

In [17]:
songs_df

Unnamed: 0,song_id,chart_position,chart_debut,song,performer,artist_array,generic_genre
0,#9 DreamJohn Lennon,9,1974-12-21,#9 Dream,John Lennon,['John Lennon'],ROCK
1,'03 Bonnie & ClydeJay-Z Featuring Beyonce Knowles,4,2002-10-26,'03 Bonnie & Clyde,Jay-Z Featuring Beyonce Knowles,"['Jay-Z', 'Beyonce Knowles']",HIP_HOP
2,'65 Love AffairPaul Davis,6,1982-02-27,'65 Love Affair,Paul Davis,['Paul Davis'],ROCK
3,('til) I Kissed YouThe Everly Brothers,4,1959-08-15,('til) I Kissed You,The Everly Brothers,['The Everly Brothers'],POP
4,(Can't Live Without Your) Love And AffectionNe...,1,1990-07-07,(Can't Live Without Your) Love And Affection,Nelson,['Nelson'],COUNTRY
...,...,...,...,...,...,...,...
5061,everything i wantedBillie Eilish,8,2019-11-23,everything i wanted,Billie Eilish,['Billie Eilish'],POP
5062,iSpyKYLE Featuring Lil Yachty,4,2017-01-14,iSpy,KYLE Featuring Lil Yachty,"['KYLE', 'Lil Yachty']",POP
5063,interludeJ. Cole,8,2021-05-22,interlude,J. Cole,['J. Cole'],HIP_HOP
5064,"my.lifeJ. Cole, 21 Savage & Morray",2,2021-05-29,my.life,"J. Cole, 21 Savage & Morray","['J. Cole', '21 Savage', 'Morray']",HIP_HOP


### Here, find songs that are `likely_wrong` simply because they don't end with `-lyrics`

In [18]:
likely_wrong = has_url[
    ~has_url.url.str.endswith('-lyrics')
]
likely_wrong = likely_wrong.merge(on='song_id', how='left', right=songs_df)[['song_id', 'song', 'performer']]
likely_wrong

Unnamed: 0,song_id,song,performer
0,"Hungry Eyes (From ""Dirty Dancing"")Eric Carmen","Hungry Eyes (From ""Dirty Dancing"")",Eric Carmen
1,HungryPaul Revere & The Raiders Featuring Mark...,Hungry,Paul Revere & The Raiders Featuring Mark Lindsay
2,I Am...I Said/Done Too SoonNeil Diamond,I Am...I Said/Done Too Soon,Neil Diamond
3,"I Believe In You And Me (From ""The Preacher's ...","I Believe In You And Me (From ""The Preacher's ...",Whitney Houston
4,I Can't Stand ItEric Clapton And His Band,I Can't Stand It,Eric Clapton And His Band
...,...,...,...
174,"In The Still Of The Nite (From ""The Jacksons"")...","In The Still Of The Nite (From ""The Jacksons"")",Boyz II Men
175,Junior's Farm/Sally GPaul McCartney And Wings,Junior's Farm/Sally G,Paul McCartney And Wings
176,"Keep On, Keepin' On (From ""Sunset Park"")MC Lyt...","Keep On, Keepin' On (From ""Sunset Park"")",MC Lyte Featuring Xscape
177,O-o-h Child/Dear PrudenceThe 5 Stairsteps,O-o-h Child/Dear Prudence,The 5 Stairsteps


In [19]:
BANNED_CHARACTERS = ['[', ']', '"', "'", '+', ',', '.', '(', ')', '/', '\\', '-', '&', '?', '!']
def clean_it(s):
    cleaned = s
    for naughty_char in BANNED_CHARACTERS:
        cleaned = cleaned.replace(naughty_char, '')
    return cleaned.strip()

def clean_artists(artist_str):
    s = artist_str.split('(')[0]
    s = s.split(' and ')[0]
    s = s.split(' with ')[0]
    s = s.split(' With ')[0]
    s = s.split(' And ')[0]
    s = s.split(' Featuring ')[0]
    return clean_it(s)

def get_search_term(_song, _performer):
    song = clean_it(_song.split('(')[0].split('/')[0])
    performer = clean_artists(_performer)
    return f'{performer} {song}'

likely_wrong.apply(lambda row: get_search_term(row['song'], row['performer']), axis=1).values

array(['Eric Carmen Hungry Eyes', 'Paul Revere  The Raiders Hungry',
       'Neil Diamond I AmI Said',
       'Whitney Houston I Believe In You And Me',
       'Eric Clapton I Cant Stand It',
       'Archie Bell  The Drells I Cant Stop Dancing',
       'Paul Anka I Dont Like To Sleep Alone',
       'Zayn  Taylor Swift I Dont Wanna Live Forever',
       'The Black Eyed Peas I Gotta Feeling', 'P Diddy I Need A Girl',
       'The Partridge Family Starring Shirley Jones I Think I Love You',
       'Paul Anka ', 'Logic 18002738255',
       'Ariana Grande Feat Doja Cat  Megan Thee Stallion 3435',
       'Cher  Peter Cetera After All', 'Don McLean American Pie',
       'The Guess Who American Woman', 'Paul McCartney Another Day',
       'Adam Wade As If I Didnt Know',
       'Justin Bieber As Long As You Love Me', 'Inner Circle Bad Boys',
       'Machine Gun Kelly x Camila Cabello Bad Things',
       'Monica Before You Walk Out Of My Life', 'Chicago Beginnings',
       'Jon Bon Jovi Blaze Of 

## Refetch likely wrong songs, but more smartly

In [20]:
def search_song_chose_first_then_get_lyrics(song, performer):
    try: 
        print(get_search_term(song, performer))
        songs = genius.search_songs(get_search_term(song, performer))
        urls = [song['result']['url'] for song in songs['hits']]
        print(urls)
        return [genius.lyrics(song_url=urls[0]), urls]
    except:
        print(f'OOPS: {performer} {song}')
        
        
def fetch_lyrics_and_urls(songs_subset_df):
    return songs_subset_df.apply(
        lambda row: search_song_chose_first_then_get_lyrics(song=row['song'], performer=row['performer']),
        axis=1
    )

In [21]:
# refetch_res = fetch_lyrics_and_urls(likely_wrong)

In [22]:
# _refetched_df = convert_fetched_lyrics_to_df(refetch_res, likely_wrong)
_TITLE = 'initial refetch of 179 from full that did not end in -lyrics'
# save_lyrics(_refetched_df, _TITLE)

_refetched_df = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{_TITLE}.csv')
def convert_to_url(urls):
    try:
        return json.loads(urls)
    except:
        return None
_refetched_df['urls'] = _refetched_df.urls.apply(lambda urls: convert_to_url(urls))
_refetched_df

Unnamed: 0,song_id,lyrics,urls
0,"Hungry Eyes (From ""Dirty Dancing"")Eric Carmen",Hungry Eyes Lyrics[Verse 1]\nI've been meaning...,[https://genius.com/Eric-carmen-hungry-eyes-ly...
1,HungryPaul Revere & The Raiders Featuring Mark...,"Hungry Lyrics[Verse 1]\nGirl, you got this nee...",[https://genius.com/Paul-revere-and-the-raider...
2,I Am...I Said/Done Too SoonNeil Diamond,"I Am... I Said Lyrics[Verse 1]\nL.A.'s fine, t...",[https://genius.com/Neil-diamond-i-am-i-said-l...
3,"I Believe In You And Me (From ""The Preacher's ...",I Believe In You and Me Lyrics[Verse 1]\nI bel...,[https://genius.com/Whitney-houston-i-believe-...
4,I Can't Stand ItEric Clapton And His Band,I Can’t Stand It Lyrics[Verse 1]\nYou've been ...,[https://genius.com/Eric-clapton-i-cant-stand-...
...,...,...,...
174,"In The Still Of The Nite (From ""The Jacksons"")...",In the Still of the Nite (I’ll Remember) Lyric...,[https://genius.com/Boyz-ii-men-in-the-still-o...
175,Junior's Farm/Sally GPaul McCartney And Wings,"Junior’s Farm (DJ edit) LyricsAh, you should h...",[https://genius.com/Paul-mccartney-juniors-far...
176,"Keep On, Keepin' On (From ""Sunset Park"")MC Lyt...",Keep On Keepin’ On Lyrics[Verse 1: MC Lyte]\nB...,[https://genius.com/Mc-lyte-keep-on-keepin-on-...
177,O-o-h Child/Dear PrudenceThe 5 Stairsteps,The 500 Greatest Songs of All Time Lyrics1. Li...,[https://genius.com/Rolling-stone-the-500-grea...


## Filter out *just* the `probably_correct_now` data: (1) fetched succeeded (2) URL ends with `-lyrics`

In [23]:
_worked = _refetched_df[
    _refetched_df.urls.notna()
]
probably_correct_now = _worked[
    _worked.urls.apply(lambda urls: urls and urls[0].endswith('-lyrics'))
]
probably_correct_now

Unnamed: 0,song_id,lyrics,urls
0,"Hungry Eyes (From ""Dirty Dancing"")Eric Carmen",Hungry Eyes Lyrics[Verse 1]\nI've been meaning...,[https://genius.com/Eric-carmen-hungry-eyes-ly...
1,HungryPaul Revere & The Raiders Featuring Mark...,"Hungry Lyrics[Verse 1]\nGirl, you got this nee...",[https://genius.com/Paul-revere-and-the-raider...
2,I Am...I Said/Done Too SoonNeil Diamond,"I Am... I Said Lyrics[Verse 1]\nL.A.'s fine, t...",[https://genius.com/Neil-diamond-i-am-i-said-l...
3,"I Believe In You And Me (From ""The Preacher's ...",I Believe In You and Me Lyrics[Verse 1]\nI bel...,[https://genius.com/Whitney-houston-i-believe-...
4,I Can't Stand ItEric Clapton And His Band,I Can’t Stand It Lyrics[Verse 1]\nYou've been ...,[https://genius.com/Eric-clapton-i-cant-stand-...
...,...,...,...
173,"I'd Die Without You (From ""Boomerang"")P.M. Dawn",I’d Die Without You Lyrics[Verse 1]\nIs it my ...,[https://genius.com/Pm-dawn-id-die-without-you...
174,"In The Still Of The Nite (From ""The Jacksons"")...",In the Still of the Nite (I’ll Remember) Lyric...,[https://genius.com/Boyz-ii-men-in-the-still-o...
175,Junior's Farm/Sally GPaul McCartney And Wings,"Junior’s Farm (DJ edit) LyricsAh, you should h...",[https://genius.com/Paul-mccartney-juniors-far...
176,"Keep On, Keepin' On (From ""Sunset Park"")MC Lyt...",Keep On Keepin’ On Lyrics[Verse 1: MC Lyte]\nB...,[https://genius.com/Mc-lyte-keep-on-keepin-on-...


## ... now merge in JUST the songs that we (likely) corrected succesfully

In [24]:
ready_for_manual_fixes = pd.concat([
    remove_overlap(lyrics_sans_ins_with_fixes, likely_wrong),
    probably_correct_now
]).reset_index(drop=True)
ready_for_manual_fixes

Unnamed: 0,song_id,lyrics,urls
0,(I Can't Get No) SatisfactionThe Rolling Stones,(I Can’t Get No) Satisfaction Lyrics[Instrumen...,"[""https://genius.com/The-rolling-stones-i-cant..."
1,(I Just) Died In Your ArmsCutting Crew,(I Just) Died in Your Arms Lyrics[Intro]\nOh I...,"[""https://genius.com/Cutting-crew-i-just-died-..."
2,(I Know) I'm Losing YouThe Temptations,(I Know) I’m Losing You Lyrics[Intro]\nOoh\nYo...,"[""https://genius.com/The-temptations-i-know-im..."
3,(I've Had) The Time Of My LifeBill Medley & Je...,(I’ve Had) The Time of My Life Lyrics[Chorus: ...,"[""https://genius.com/Bill-medley-and-jennifer-..."
4,1 ThingAmerie,1 Thing Lyrics[Intro]\nWoo!\nUh\nWoo!\nNa-na-n...,"[""https://genius.com/Amerie-1-thing-lyrics"", ""..."
...,...,...,...
4945,"I'd Die Without You (From ""Boomerang"")P.M. Dawn",I’d Die Without You Lyrics[Verse 1]\nIs it my ...,[https://genius.com/Pm-dawn-id-die-without-you...
4946,"In The Still Of The Nite (From ""The Jacksons"")...",In the Still of the Nite (I’ll Remember) Lyric...,[https://genius.com/Boyz-ii-men-in-the-still-o...
4947,Junior's Farm/Sally GPaul McCartney And Wings,"Junior’s Farm (DJ edit) LyricsAh, you should h...",[https://genius.com/Paul-mccartney-juniors-far...
4948,"Keep On, Keepin' On (From ""Sunset Park"")MC Lyt...",Keep On Keepin’ On Lyrics[Verse 1: MC Lyte]\nB...,[https://genius.com/Mc-lyte-keep-on-keepin-on-...


### We'll deal with these below:

In [25]:
ready_for_manual_fixes.urls.isna().sum()

68

## Inspect the "fishy" non `-lyric` ending urls

In [26]:
_non_lyrics_word_url_ending = remove_overlap(_worked, probably_correct_now)
_non_lyrics_word_url_ending
# [print(x) for x in _non_lyrics_word_url_ending.apply(lambda row: row['song_id'] + '\n' + row['urls'][0] + '\n', axis=1).values]
# _non_lyrics_word_url_ending.urls.apply(lambda urls: urls[0]).values

Unnamed: 0,song_id,lyrics,urls
5,I Can't Stop DancingArchie Bell & The Drells,"I Can’t Stop Dancing Lyrics(Oh, no, I just can...",[https://genius.com/Archie-bell-and-the-drells...
8,I Gotta FeelingThe Black Eyed Peas,Soundtrack Of My Life Lyrics1. I am born (1998...,[https://genius.com/Celedon-cq-soundtrack-of-m...
10,I Think I Love YouThe Partridge Family Starrin...,June 2021 Singles Release Calendar Lyrics6/1\n...,[https://genius.com/Genius-june-2021-singles-r...
13,34+35Ariana Grande Feat. Doja Cat & Megan Thee...,Doja Cat [Discography List] LyricsStudio Album...,[https://genius.com/Doja-cat-doja-cat-discogra...
18,As If I Didn't KnowAdam Wade,Lady Chatterley’s Lover (Chap. 16) LyricsConni...,[https://genius.com/D-h-lawrence-lady-chatterl...
21,Bad ThingsMachine Gun Kelly x Camila Cabello,This Is Machine Gun Kelly LyricsUpdate Playlis...,[https://genius.com/Spotify-this-is-machine-gu...
26,Boom Boom PowThe Black Eyed Peas,Nostalgic Songs LyricsElementary School and Yo...,[https://genius.com/Formerly-vinylzombie-mcr-n...
33,Convention '72The Delegates,State of the Union 1858 Lyrics ...,[https://genius.com/James-buchanan-state-of-th...
39,Do We Have A Problem?Nicki Minaj X Lil Baby,Today’s Top Hits 2/4/22 (ft. Lil Nas X) Lyrics...,[https://genius.com/Spotify-todays-top-hits-2-...
40,Doesn't Somebody Want To Be WantedThe Partridg...,Angela’s Ashes LyricsAngela's Ashes\n\nA Memoi...,[https://genius.com/Frank-mccourt-angelas-ashe...


#### Export them so my girl Giana can fix them (🥹 )

In [27]:
_2for_giana_to_correct = _non_lyrics_word_url_ending.merge(
    on='song_id', 
    right=songs_df[['song_id', 'song', 'performer', 'chart_debut']],
).drop(columns=['lyrics', 'song_id'])
_2for_giana_to_correct
save_lyrics(_2for_giana_to_correct, 'TEMP: for Giana to correct 2')

Unnamed: 0,urls,song,performer,chart_debut
0,[https://genius.com/Archie-bell-and-the-drells...,I Can't Stop Dancing,Archie Bell & The Drells,1968-07-20
1,[https://genius.com/Celedon-cq-soundtrack-of-m...,I Gotta Feeling,The Black Eyed Peas,2009-06-27
2,[https://genius.com/Genius-june-2021-singles-r...,I Think I Love You,The Partridge Family Starring Shirley Jones Fe...,1970-10-10
3,[https://genius.com/Doja-cat-doja-cat-discogra...,34+35,Ariana Grande Feat. Doja Cat & Megan Thee Stal...,2021-01-02
4,[https://genius.com/D-h-lawrence-lady-chatterl...,As If I Didn't Know,Adam Wade,1961-07-22
5,[https://genius.com/Spotify-this-is-machine-gu...,Bad Things,Machine Gun Kelly x Camila Cabello,2016-11-05
6,[https://genius.com/Formerly-vinylzombie-mcr-n...,Boom Boom Pow,The Black Eyed Peas,2009-03-28
7,[https://genius.com/James-buchanan-state-of-th...,Convention '72,The Delegates,1972-10-21
8,[https://genius.com/Spotify-todays-top-hits-2-...,Do We Have A Problem?,Nicki Minaj X Lil Baby,2022-02-19
9,[https://genius.com/Frank-mccourt-angelas-ashe...,Doesn't Somebody Want To Be Wanted,The Partridge Family Starring Shirley Jones Fe...,1971-02-13


# 5.  Manually (thanks, Giana!) fix missing / problematic URLs

### We found 179 likely wrong (cuz of url). POST RE-FETCH: 123 likely right now => 56 not covered. 44 "non `-lyrics`" urls, 12 failed

### ALSO: in the larger dataset there are 68 just missing URLS in general

### SO: (A) have Giana help with the 80 URL-less songs (+ 4 I manually found) (B) have her do the 44 "non `-lyrics`" URLS   

## A. Fix the 80 URL-less songs (existing + failed fetch)

### Ok, so these ones *could* be right... they just don't have a URL...

In [28]:
never_got_assigned_urls = lyrics_sans_instrumentals[
    lyrics_sans_instrumentals.urls.isna()
]
never_got_assigned_urls

Unnamed: 0,song_id,lyrics,urls
4574,Drop It Like It's HotSnoop Dogg Featuring Phar...,TranslationsPortuguêsDrop It Like It’s Hot Lyr...,
4575,Flashdance...What A FeelingIrene Cara,Flashdance... What a Feeling Lyrics[Intro]\nFi...,
4576,I Love ItIcona Pop Featuring Charli XCX,TranslationsPortuguêsI Love It Lyrics[Chorus: ...,
4577,Let Me Blow Ya MindEve Featuring Gwen Stefani,"Let Me Blow Ya Mind Lyrics[Intro: Eve]\nUh, uh...",
4578,Moves Like JaggerMaroon 5 Featuring Christina ...,Moves Like Jagger Lyrics[Intro: Adam Levine]\n...,
...,...,...,...
4637,We Are Youngfun. Featuring Janelle Monae,We Are Young Lyrics[Verse 1: Nate Ruess]\nGive...,
4638,We Found LoveRihanna Featuring Calvin Harris,TranslationsPortuguêsEnglishFrançaisWe Found L...,
4639,We R Who We RKe$ha,TranslationsPortuguêsWe R Who We R Lyrics[Vers...,
4640,Young ForeverJay-Z + Mr. Hudson,Young Forever Lyrics[Intro: Mr. Hudson]\nLet's...,


### These DEFINITELY need URLs cuz the fetch just failed on them

In [29]:
_refetched_df.urls.isna().sum()

12

In [30]:
_refetch_completely_failed = _refetched_df[
    _refetched_df.lyrics.isna()
]
_refetch_completely_failed.reset_index(drop=True)

Unnamed: 0,song_id,lyrics,urls
0,1-800-273-8255Logic Featuring Alessia Cara & K...,,
1,American Pie (Parts I & II)Don McLean,,
2,American Woman/No Sugar TonightThe Guess Who,,
3,As Long As You Love MeJustin Bieber Featuring ...,,
4,FancyIggy Azalea Featuring Charli XCX,,
5,"It Must Have Been Love (From ""Pretty Woman"")Ro...",,
6,Love Machine (Part 1)The Miracles,,
7,Mood24kGoldn Featuring iann dior,,
8,One DanceDrake Featuring WizKid & Kyla,,
9,Say Say SayPaul McCartney And Michael Jackson,,


### What I want Giana to correct part A

In [31]:
_for_giana_to_correct = pd.concat([
    never_got_assigned_urls,
    _refetch_completely_failed
]).drop_duplicates(subset=['song_id']).merge(
    on='song_id', 
    right=songs_df[['song_id', 'song', 'performer', 'chart_debut']],
).drop(columns=['lyrics', 'song_id'])
_for_giana_to_correct
# save_lyrics(_for_giana_to_correct, 'TEMP: for Giana to correct')

Unnamed: 0,urls,song,performer,chart_debut
0,,Drop It Like It's Hot,Snoop Dogg Featuring Pharrell,2004-10-02
1,,Flashdance...What A Feeling,Irene Cara,1983-04-02
2,,I Love It,Icona Pop Featuring Charli XCX,2013-02-16
3,,Let Me Blow Ya Mind,Eve Featuring Gwen Stefani,2001-04-28
4,,Moves Like Jagger,Maroon 5 Featuring Christina Aguilera,2011-07-09
...,...,...,...,...
75,,Mood,24kGoldn Featuring iann dior,2020-08-22
76,,One Dance,Drake Featuring WizKid & Kyla,2016-04-23
77,,Say Say Say,Paul McCartney And Michael Jackson,1983-10-15
78,,"Take My Breath Away (Love Theme From ""Top Gun"")",Berlin,1986-06-21


### ✅ (A) NEW URLS ADDED BY GIANA. Now download the lyrics & merge them!

Note: I found 80 problematic URLs progamatically, and added 7 found via viz inspection

### note... WE'RE DROPPING some gems: 
# NEW NOTE: actually I'm going to add these back in a later notebook lol
- "Convention '72" is this hilarious mashup of a political inteview & random / irreverent snippets of songs lol https://www.youtube.com/watch?v=gofKPwPDAg8

In [32]:
urls_fixed_by_giana_RAW = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{"urls corrected by Giana Ded 14 2022"}.csv')

urls_fixed_by_giana_RAW['song_id'] = urls_fixed_by_giana_RAW.song + urls_fixed_by_giana_RAW.performer
urls_fixed_by_giana_RAW = urls_fixed_by_giana_RAW.drop(
        columns=['song', 'performer', 'chart_debut']
    ).rename(
        columns={'PASTE LYRICS URL HERE': 'urls'}
    )

urls_fixed_by_giana = urls_fixed_by_giana_RAW[
    urls_fixed_by_giana_RAW.urls != IS_INSTRUMENTAL_RETURN_FLAG
]
urls_fixed_by_giana

Unnamed: 0,urls,song_id
0,https://genius.com/Snoop-dogg-drop-it-like-its...,Drop It Like It's HotSnoop Dogg Featuring Phar...
1,https://genius.com/Irene-cara-flashdance-what-...,Flashdance...What A FeelingIrene Cara
2,https://genius.com/Icona-pop-i-love-it-lyrics,I Love ItIcona Pop Featuring Charli XCX
3,https://genius.com/Eve-let-me-blow-ya-mind-lyrics,Let Me Blow Ya MindEve Featuring Gwen Stefani
4,https://genius.com/Maroon-5-moves-like-jagger-...,Moves Like JaggerMaroon 5 Featuring Christina ...
...,...,...
82,https://genius.com/The-four-seasons-ive-got-yo...,"The 4 Seasons Featuring the ""Sound of Frankie ..."
83,https://genius.com/Linda-ronstadt-and-james-in...,"Somewhere Out There (From ""An American Tail"")L..."
84,https://genius.com/Dram-broccoli-lyrics,BroccoliD.R.A.M. Featuring Lil Yachty
85,https://genius.com/Fabolous-into-you-lyrics,Into YouFabolous Featuring Tamia Or Ashanti


In [33]:
def _fetch_lyrics_for_song(urls):
    try: 
        print(urls[0])
        return [genius.lyrics(song_url=urls[0]), urls]
    except:
        print(f'OOPS!')
        
        
def _fetch_lyrics_via_url(songs_subset_df):
    return songs_subset_df.apply(
        lambda row: _fetch_lyrics_for_song(urls=[row['urls']]),
        axis=1
    )

In [34]:
# _4giana_refetch = _fetch_lyrics_via_url(urls_fixed_by_giana)

In [35]:
# fetched_giana_urls = convert_fetched_lyrics_to_df(_4giana_refetch, urls_fixed_by_giana)
# fetched_giana_urls

In [36]:
# save_lyrics(fetched_giana_urls, 'URLs from Giana, with the lyrics fetched for them')
fetched_giana_urls1 = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{"URLs from Giana, with the lyrics fetched for them"}.csv')
fetched_giana_urls1

Unnamed: 0,song_id,lyrics,urls
0,Drop It Like It's HotSnoop Dogg Featuring Phar...,TranslationsPortuguêsDrop It Like It’s Hot Lyr...,"[""https://genius.com/Snoop-dogg-drop-it-like-i..."
1,Flashdance...What A FeelingIrene Cara,Flashdance... What a Feeling Lyrics[Intro]\nFi...,"[""https://genius.com/Irene-cara-flashdance-wha..."
2,I Love ItIcona Pop Featuring Charli XCX,TranslationsPortuguêsI Love It Lyrics[Chorus: ...,"[""https://genius.com/Icona-pop-i-love-it-lyrics""]"
3,Let Me Blow Ya MindEve Featuring Gwen Stefani,"Let Me Blow Ya Mind Lyrics[Intro: Eve]\nUh, uh...","[""https://genius.com/Eve-let-me-blow-ya-mind-l..."
4,Moves Like JaggerMaroon 5 Featuring Christina ...,Moves Like Jagger Lyrics[Intro: Adam Levine]\n...,"[""https://genius.com/Maroon-5-moves-like-jagge..."
...,...,...,...
79,"The 4 Seasons Featuring the ""Sound of Frankie ...",I’ve Got You Under My Skin LyricsI've got you ...,"[""https://genius.com/The-four-seasons-ive-got-..."
80,"Somewhere Out There (From ""An American Tail"")L...",Somewhere Out There Lyrics[Verse 1: Linda Rons...,"[""https://genius.com/Linda-ronstadt-and-james-..."
81,BroccoliD.R.A.M. Featuring Lil Yachty,Broccoli Lyrics[Intro: D.R.A.M.]\nAin't no tel...,"[""https://genius.com/Dram-broccoli-lyrics""]"
82,Into YouFabolous Featuring Tamia Or Ashanti,Into You Lyrics[Intro: Fabolous & Ashanti]\nBa...,"[""https://genius.com/Fabolous-into-you-lyrics""]"


## PART (B): refetch lyrics (using Giana's correct URLs 2) for the 44 songs whose URL did *not* previously end in `-lyrics`

In [37]:
urls2_fixed_by_giana_RAW = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{"urls corrected by Giana 2 Dec 15 2022"}.csv')

urls2_fixed_by_giana_RAW['song_id'] = urls2_fixed_by_giana_RAW.song + urls2_fixed_by_giana_RAW.performer
urls2_fixed_by_giana_RAW = urls2_fixed_by_giana_RAW.drop(
        columns=['song', 'performer', 'chart_debut']
    ).rename(
        columns={'ADD URL HERE': 'urls'}
    )

urls2_fixed_by_giana = urls2_fixed_by_giana_RAW[urls2_fixed_by_giana_RAW.urls != IS_INSTRUMENTAL_RETURN_FLAG]
urls2_fixed_by_giana

Unnamed: 0,urls,song_id
0,https://genius.com/Archie-bell-i-cant-stop-dan...,I Can't Stop DancingArchie Bell & The Drells
1,https://genius.com/Black-eyed-peas-i-gotta-fee...,I Gotta FeelingThe Black Eyed Peas
2,https://genius.com/The-partridge-family-i-thin...,I Think I Love YouThe Partridge Family Starrin...
3,https://genius.com/Ariana-grande-34-35-lyrics,34+35Ariana Grande Feat. Doja Cat & Megan Thee...
4,https://genius.com/Adam-wade-as-if-i-didnt-kno...,As If I Didn't KnowAdam Wade
5,https://genius.com/Machine-gun-kelly-and-camil...,Bad ThingsMachine Gun Kelly x Camila Cabello
6,https://genius.com/Black-eyed-peas-boom-boom-p...,Boom Boom PowThe Black Eyed Peas
8,https://genius.com/Nicki-minaj-and-lil-baby-do...,Do We Have A Problem?Nicki Minaj X Lil Baby
9,https://genius.com/The-partridge-family-doesnt...,Doesn't Somebody Want To Be WantedThe Partridg...
10,https://genius.com/Imagine-dragons-and-jid-ene...,EnemyImagine Dragons X JID


In [38]:
# _giana2_refetch = _fetch_lyrics_via_url(urls2_fixed_by_giana)

In [39]:
# fetched2_giana_urls = convert_fetched_lyrics_to_df(_giana2_refetch, urls2_fixed_by_giana)
# fetched2_giana_urls

In [40]:
# save_lyrics(fetched2_giana_urls, 'URLs from Giana 2, with the lyrics fetched for them')
fetched_giana_urls2 = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{"URLs from Giana 2, with the lyrics fetched for them"}.csv')
fetched_giana_urls2

Unnamed: 0,song_id,lyrics,urls
0,I Can't Stop DancingArchie Bell & The Drells,"I Can’t Stop Dancing Lyrics(Oh, no, I just can...","[""https://genius.com/Archie-bell-i-cant-stop-d..."
1,I Gotta FeelingThe Black Eyed Peas,TranslationsEnglishI Gotta Feeling Lyrics[Chor...,"[""https://genius.com/Black-eyed-peas-i-gotta-f..."
2,I Think I Love YouThe Partridge Family Starrin...,"I Think I Love You Lyrics[Intro]\nBaaa, ba ba ...","[""https://genius.com/The-partridge-family-i-th..."
3,34+35Ariana Grande Feat. Doja Cat & Megan Thee...,TranslationsPolskiTürkçeEspañolHrvatskiPortugu...,"[""https://genius.com/Ariana-grande-34-35-lyrics""]"
4,As If I Didn't KnowAdam Wade,As If I Didn’t Know Lyrics[Verse 1]\nYou swore...,"[""https://genius.com/Adam-wade-as-if-i-didnt-k..."
5,Bad ThingsMachine Gun Kelly x Camila Cabello,Bad Things Lyrics[Chorus: Camila Cabello]\nAm ...,"[""https://genius.com/Machine-gun-kelly-and-cam..."
6,Boom Boom PowThe Black Eyed Peas,Boom Boom Pow Lyrics[Album Intro]\nWelcome\nWe...,"[""https://genius.com/Black-eyed-peas-boom-boom..."
7,Do We Have A Problem?Nicki Minaj X Lil Baby,TranslationsTürkçeEspañolРусскийPortuguêsDo We...,"[""https://genius.com/Nicki-minaj-and-lil-baby-..."
8,Doesn't Somebody Want To Be WantedThe Partridg...,Doesn’t Somebody Want To Be Wanted Lyrics[Intr...,"[""https://genius.com/The-partridge-family-does..."
9,EnemyImagine Dragons X JID,TranslationsEspañolEnglishDeutschTagalog / ᜆᜄᜎ...,"[""https://genius.com/Imagine-dragons-and-jid-e..."


# 7. Merge Giana-assisted fixes into the main dataset! (and update the list of instrumentals)

## First, identify the instrumentals from this notebook, update total record

In [41]:
NEW_INSTRUMENTALS_FOUND

['Our Winter LoveBill Pursell']

In [42]:
instrumentals_from_giana = urls_fixed_by_giana_RAW[urls_fixed_by_giana_RAW.urls == IS_INSTRUMENTAL_RETURN_FLAG].song_id.values


In [43]:
instrumentals2_from_giana = urls2_fixed_by_giana_RAW[urls2_fixed_by_giana_RAW.urls == IS_INSTRUMENTAL_RETURN_FLAG].song_id.values


In [44]:
ALL_NEW_INSTRUMENTALS = [*NEW_INSTRUMENTALS_FOUND, *list(instrumentals_from_giana), *list(instrumentals2_from_giana)]
ALL_NEW_INSTRUMENTALS

['Our Winter LoveBill Pursell',
 'Near YouRoger Williams',
 'An Open Letter To My Teenage SonVictor Lundberg',
 'Peter GunnRay Anthony and His Orchestra',
 "Convention '72The Delegates",
 'MoreKai Winding & Orchestra',
 'Quiet VillageThe Exotic Sounds of Martin Denny',
 'Stranger On The ShoreMr. Acker Bilk',
 'The Theme From Hill Street BluesMike Post featuring Larry Carlton']

In [45]:
new_instrumentals_df = pd.DataFrame({'song_id': ALL_NEW_INSTRUMENTALS, 'lyrics': IS_INSTRUMENTAL_RETURN_FLAG, 'urls': None})
new_instrumentals_df


Unnamed: 0,song_id,lyrics,urls
0,Our Winter LoveBill Pursell,THIS_IS_AN_INSTRUMENTAL,
1,Near YouRoger Williams,THIS_IS_AN_INSTRUMENTAL,
2,An Open Letter To My Teenage SonVictor Lundberg,THIS_IS_AN_INSTRUMENTAL,
3,Peter GunnRay Anthony and His Orchestra,THIS_IS_AN_INSTRUMENTAL,
4,Convention '72The Delegates,THIS_IS_AN_INSTRUMENTAL,
5,MoreKai Winding & Orchestra,THIS_IS_AN_INSTRUMENTAL,
6,Quiet VillageThe Exotic Sounds of Martin Denny,THIS_IS_AN_INSTRUMENTAL,
7,Stranger On The ShoreMr. Acker Bilk,THIS_IS_AN_INSTRUMENTAL,
8,The Theme From Hill Street BluesMike Post feat...,THIS_IS_AN_INSTRUMENTAL,


In [46]:
INSTR_FILE = "INSTRUMENTAL SONGS final"
_existing_istrumentals = pd.read_csv(f'{LYRIC_FOLDER_PATH}/{INSTR_FILE}.csv')
updated_instr = pd.concat([_existing_istrumentals, new_instrumentals_df]).drop_duplicates()
updated_instr
save_lyrics(updated_instr, INSTR_FILE)

Unnamed: 0,song_id,lyrics,urls
0,95.southJ. Cole,THIS_IS_AN_INSTRUMENTAL,"[""https://genius.com/8-bit-arcade-95-south-8-b..."
1,A Fifth Of BeethovenWalter Murphy & The Big Ap...,THIS_IS_AN_INSTRUMENTAL,"[""https://genius.com/Walter-murphy-a-fifth-of-..."
2,Axel FHarold Faltermeyer,THIS_IS_AN_INSTRUMENTAL,"[""https://genius.com/Harold-faltermeyer-axel-f..."
3,Can't Hold UsMacklemore & Ryan Lewis Featuring...,THIS_IS_AN_INSTRUMENTAL,"[""https://genius.com/Cant-hold-us-cant-hold-us..."
4,Classical GasMason Williams,THIS_IS_AN_INSTRUMENTAL,"[""https://genius.com/Mason-williams-classical-..."
...,...,...,...
64,Quiet VillageThe Exotic Sounds of Martin Denny,THIS_IS_AN_INSTRUMENTAL,
65,Stranger On The ShoreMr. Acker Bilk,THIS_IS_AN_INSTRUMENTAL,
66,The Theme From Hill Street BluesMike Post feat...,THIS_IS_AN_INSTRUMENTAL,
67,Percolator (Twist)Billy Joe & The Checkmates,THIS_IS_AN_INSTRUMENTAL,


## Second, merge the fixed data in

In [47]:
giana_updates_all = pd.concat([fetched_giana_urls1, fetched_giana_urls2]).drop_duplicates()
giana_updates_all

Unnamed: 0,song_id,lyrics,urls
0,Drop It Like It's HotSnoop Dogg Featuring Phar...,TranslationsPortuguêsDrop It Like It’s Hot Lyr...,"[""https://genius.com/Snoop-dogg-drop-it-like-i..."
1,Flashdance...What A FeelingIrene Cara,Flashdance... What a Feeling Lyrics[Intro]\nFi...,"[""https://genius.com/Irene-cara-flashdance-wha..."
2,I Love ItIcona Pop Featuring Charli XCX,TranslationsPortuguêsI Love It Lyrics[Chorus: ...,"[""https://genius.com/Icona-pop-i-love-it-lyrics""]"
3,Let Me Blow Ya MindEve Featuring Gwen Stefani,"Let Me Blow Ya Mind Lyrics[Intro: Eve]\nUh, uh...","[""https://genius.com/Eve-let-me-blow-ya-mind-l..."
4,Moves Like JaggerMaroon 5 Featuring Christina ...,Moves Like Jagger Lyrics[Intro: Adam Levine]\n...,"[""https://genius.com/Maroon-5-moves-like-jagge..."
...,...,...,...
34,"Theme From ""Greatest American Hero"" (Believe I...",Believe It or Not LyricsLook at what's happene...,"[""https://genius.com/Joey-scarbury-believe-it-..."
35,Where Is The Love?The Black Eyed Peas,TranslationsPortuguêsEnglishWhere Is the Love?...,"[""https://genius.com/Black-eyed-peas-where-is-..."
36,Will You Love Me TomorrowThe Shirelles,Will You Still Love Me Tomorrow Lyrics[Verse 1...,"[""https://genius.com/The-shirelles-will-you-st..."
37,Yellow BirdArthur Lyman Group,"Yellow Bird LyricsYellow bird, yellow bird\nI'...","[""https://genius.com/Arthur-lyman-yellow-bird-..."


In [48]:
_merged_giana = pd.concat([
    remove_overlap(ready_for_manual_fixes, giana_updates_all),
    giana_updates_all
]).reset_index(drop=True)

ready_for_export = _merged_giana[
    _merged_giana.song_id.apply(lambda song_id: song_id not in updated_instr.song_id.values)
].drop_duplicates(subset='song_id') # to prove it to ourselves

ready_for_export['urls'] = ready_for_export['urls'].apply(
    lambda urls: urls if isinstance(urls, list) else json.loads(urls)
)
ready_for_export

Unnamed: 0,song_id,lyrics,urls
0,(I Can't Get No) SatisfactionThe Rolling Stones,(I Can’t Get No) Satisfaction Lyrics[Instrumen...,[https://genius.com/The-rolling-stones-i-cant-...
1,(I Just) Died In Your ArmsCutting Crew,(I Just) Died in Your Arms Lyrics[Intro]\nOh I...,[https://genius.com/Cutting-crew-i-just-died-i...
2,(I Know) I'm Losing YouThe Temptations,(I Know) I’m Losing You Lyrics[Intro]\nOoh\nYo...,[https://genius.com/The-temptations-i-know-im-...
3,(I've Had) The Time Of My LifeBill Medley & Je...,(I’ve Had) The Time of My Life Lyrics[Chorus: ...,[https://genius.com/Bill-medley-and-jennifer-w...
4,1 ThingAmerie,1 Thing Lyrics[Intro]\nWoo!\nUh\nWoo!\nNa-na-n...,"[https://genius.com/Amerie-1-thing-lyrics, htt..."
...,...,...,...
4996,"Theme From ""Greatest American Hero"" (Believe I...",Believe It or Not LyricsLook at what's happene...,[https://genius.com/Joey-scarbury-believe-it-o...
4997,Where Is The Love?The Black Eyed Peas,TranslationsPortuguêsEnglishWhere Is the Love?...,[https://genius.com/Black-eyed-peas-where-is-t...
4998,Will You Love Me TomorrowThe Shirelles,Will You Still Love Me Tomorrow Lyrics[Verse 1...,[https://genius.com/The-shirelles-will-you-sti...
4999,Yellow BirdArthur Lyman Group,"Yellow Bird LyricsYellow bird, yellow bird\nI'...",[https://genius.com/Arthur-lyman-yellow-bird-l...


# Sanity checks:

In [49]:
#sanity check
ready_for_export.lyrics.isna().sum() + ready_for_export.urls.isna().sum()

0

### Manual inspection of these non `-lyrics` URLS shows they are correct 😊  

In [50]:
ready_for_export[
    ready_for_export.urls.apply(lambda url: not url[0].endswith('-lyrics'))
].urls.apply(lambda urls: urls[0]).values

array(['https://genius.com/Tom-jones-ill-never-fall-in-love-again-annotated',
       'https://genius.com/Major-harris-love-wont-let-me-wait-annotated'],
      dtype=object)

# Export

In [51]:
LYRICS_DATASET = '2.4 OUTPUT lyrics for all songs sans instrumentals'
save_lyrics(ready_for_export, LYRICS_DATASET)

Unnamed: 0,song_id,lyrics,urls
0,(I Can't Get No) SatisfactionThe Rolling Stones,(I Can’t Get No) Satisfaction Lyrics[Instrumen...,[https://genius.com/The-rolling-stones-i-cant-...
1,(I Just) Died In Your ArmsCutting Crew,(I Just) Died in Your Arms Lyrics[Intro]\nOh I...,[https://genius.com/Cutting-crew-i-just-died-i...
2,(I Know) I'm Losing YouThe Temptations,(I Know) I’m Losing You Lyrics[Intro]\nOoh\nYo...,[https://genius.com/The-temptations-i-know-im-...
3,(I've Had) The Time Of My LifeBill Medley & Je...,(I’ve Had) The Time of My Life Lyrics[Chorus: ...,[https://genius.com/Bill-medley-and-jennifer-w...
4,1 ThingAmerie,1 Thing Lyrics[Intro]\nWoo!\nUh\nWoo!\nNa-na-n...,"[https://genius.com/Amerie-1-thing-lyrics, htt..."
...,...,...,...
4996,"Theme From ""Greatest American Hero"" (Believe I...",Believe It or Not LyricsLook at what's happene...,[https://genius.com/Joey-scarbury-believe-it-o...
4997,Where Is The Love?The Black Eyed Peas,TranslationsPortuguêsEnglishWhere Is the Love?...,[https://genius.com/Black-eyed-peas-where-is-t...
4998,Will You Love Me TomorrowThe Shirelles,Will You Still Love Me Tomorrow Lyrics[Verse 1...,[https://genius.com/The-shirelles-will-you-sti...
4999,Yellow BirdArthur Lyman Group,"Yellow Bird LyricsYellow bird, yellow bird\nI'...",[https://genius.com/Arthur-lyman-yellow-bird-l...
