In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from IPython.display import display, Markdown

import pandas as pd
import numpy as np

import json

np.__version__

'1.26.3'

# Pull in data

In [4]:
PREVIOUS_OUTPUT_PATH = './data/22.2-v2-OUTPUT-data-through-sep-2023-ALL-SONGS-LABELLED-WITH-FINAL-VERSION.csv'
prev_df = pd.read_csv(PREVIOUS_OUTPUT_PATH)
prev_df['song_id'] = prev_df['song'] + prev_df['performer']
prev_df

Unnamed: 0,song_id,chart_debut,song,performer,date_as_decimal,popularity_score,total_weeks_in_top_10,love_song_sub_type
0,#9 DreamJohn Lennon,1975-02-15,#9 Dream,John Lennon,1975.13,3,2,
1,'03 Bonnie & ClydeJay-Z Featuring Beyonce Knowles,2002-12-07,'03 Bonnie & Clyde,Jay-Z Featuring Beyonce Knowles,2002.93,56,11,It's Complicated
2,'65 Love AffairPaul Davis,1982-04-24,'65 Love Affair,Paul Davis,1982.31,23,7,
3,('til) I Kissed YouThe Everly Brothers,1959-09-07,('til) I Kissed You,The Everly Brothers,1959.68,44,7,Serenade
4,(Can't Live Without Your) Love And AffectionNe...,1990-09-08,(Can't Live Without Your) Love And Affection,Nelson,1990.69,39,7,Longing & Heartbreak
...,...,...,...,...,...,...,...,...
5136,amariJ. Cole,2021-05-29,amari,J. Cole,2021.41,6,1,
5137,iSpyKYLE Featuring Lil Yachty,2017-04-01,iSpy,KYLE Featuring Lil Yachty,2017.25,31,8,Sexual Conquest
5138,interludeJ. Cole,2021-05-22,interlude,J. Cole,2021.39,3,1,
5139,"my.lifeJ. Cole, 21 Savage & Morray",2021-05-29,my.life,"J. Cole, 21 Savage & Morray",2021.41,9,1,


# Pull out the data which needs labeling: all `Good Riddance` songs

In [5]:
# get a df of songs where song_id is in songs_df but not in prev_df
new_songs_df = prev_df[
    prev_df.love_song_sub_type == 'Good Riddance'
]
new_songs_df = new_songs_df.sort_values(by='date_as_decimal')

new_songs_df

Unnamed: 0,song_id,chart_debut,song,performer,date_as_decimal,popularity_score,total_weeks_in_top_10,love_song_sub_type
3325,Poor Little FoolRicky Nelson,1958-08-04,Poor Little Fool,Ricky Nelson,1958.59,43,6,Good Riddance
3807,Smoke Gets In Your EyesThe Platters,1958-12-15,Smoke Gets In Your Eyes,The Platters,1958.95,85,10,Good Riddance
4142,Tell Him NoTravis & Bob,1959-04-27,Tell Him No,Travis & Bob,1959.32,6,2,Good Riddance
2555,Lipstick On Your CollarConnie Francis,1959-06-22,Lipstick On Your Collar,Connie Francis,1959.47,25,6,Good Riddance
3232,Paper RosesAnita Bryant,1960-05-23,Paper Roses,Anita Bryant,1960.39,23,6,Good Riddance
...,...,...,...,...,...,...,...,...
2381,Kill BillSZA,2022-12-24,Kill Bill,SZA,2022.98,209,28,Good Riddance
601,"Bzrp Music Sessions, Vol. 53Bizarrap & Shakira",2023-01-28,"Bzrp Music Sessions, Vol. 53",Bizarrap & Shakira,2023.08,2,1,Good Riddance
538,"Boy's A Liar, Pt. 2PinkPantheress & Ice Spice",2023-02-25,"Boy's A Liar, Pt. 2",PinkPantheress & Ice Spice,2023.15,55,12,Good Riddance
4087,TQGKarol G x Shakira,2023-03-11,TQG,Karol G x Shakira,2023.19,4,1,Good Riddance


# Fetch new labels using GPT4

In [6]:
import os
from openai import OpenAI

# First, activate the './.env' file which sets the OPENAI_API_KEY environment variable using python-dotenv
from dotenv import load_dotenv
load_dotenv()

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.getenv("OPENAI_API_KEY"),
)

In [7]:
import ast

def eval_this_str(s):
    return ast.literal_eval(s)

In [23]:
LOVE_SONG_LABELING_PROMPT = """
You are an expert pop music critic, able to label pop songs as different kinds of love songs using subtle context, emotions, & relational implications.

For each Billboard Top 10 hit in the form [song, performer] in the larger input array, I want you to respond with a valid JSON array of:

[performer, song, justification, love_song_subtype_label]

The justification is a short snippet explaining why the song best fits in the chosen category.

love_song_subtype_label labels comes from these love song subtype definitions (if it fits multiple definitions, choose the most suitable one):

"Serenade": A song directed at someone the singer loves, expressing their deep affection, undying love, infatuation, or praising someone as a soulmate.

"Sexual Conquest": Songs that are less about the object of affection and more about the singer's own prowess or success in romantic pursuits.

"Love Song for the Self": Songs that express self-love and affirmation, whether for one's character or physical qualities, sometimes following a period of growth or learning from past relationships.

"Good Riddance": when heartbreak has resurrected into righteous power & anger. typically after a breakup with someone who now seems unsuitable. songs about having had someone's love, but being glad now you've moved on (or realizing you should), often involving self-empowerment, anger, revenge, relief, or bitterness, often because of betrayal or potentially cheating.

"Longing & Heartbreak": songs where you love or miss someone, but they are unable to return your love. Either because you broke up and the relationship is over, or because you were never together and the love was unrequited. COuld be full of regret, longing, frustration, sadness, hopelessness, even grief.

"Courtship & Anticipation": the speaker is courting or seeking someone's romantic or sexual love, but they don't have it yet. The love interest does not yet know the speaker well, but the speaker hopes they will want to get to know them better and or please them the way they want.

"It's Complicated": Songs about complicated relationships, often with conflicting emotions like love, hate, frustration, and desire intermingling, whether light-hearted or profound in tone.

"Platonic Love": Songs about non-romantic yet strong and meaningful relationships, such as friendships or familial bonds.

"NA": song is not a love song at all. This would include any non-love song, but also instrumentals, for example.

Here's an example input:

```json
[
	["'N Sync", "This I Promise You"],
	["John Legend", "All of Me"],
	["Beyonce", "Halo"],
	["T-Pain Featuring Lil Wayne", "Can't Believe It"],
	["Adele", "Someone Like You"],
	["Mariah Carey", "Without You"],
	["Dolly Parton", "Jolene"],
	["Bill Withers", "Ain't No Sunshine"],
	["The Rolling Stones", "Miss You"],
	["Whitney Houston", "I Will Always Love You"],
	["Taylor Swift", "Teardrops on My Guitar"],
	["Lesley Gore", "She's A Fool"],
	["Taylor Swift", "You Belong With Me"],
	["Justin Timberlake", "Suit & Tie"],
	["T-Pain Featuring Mike Jones", "I'm N Luv (Wit A Stripper)"],
	["Rihanna", "Only Girl (In the World)"],
	["Britney Spears", "Hold It Against Me"],
	["T-Pain", "Buy U A Drank (Shawty Snappin')"],
	["2Pac", "Dear Mama"],
	["Saweetie", "Best Friend (feat. Doja Cat)"],
	["James Taylor", "You've Got a Friend"],
	["Bill Withers", "Lean On Me"],
	["Little Mix and Jason Derulo", "Secret Love Song"],
	["Katy Perry", "Hot N Cold"],
	["Pat Benatar", "Love Is a Battlefield"],
	["Lauryn Hill", "Ex-Factor"],
	["24kGoldn", "Mood"],
	["T-Pain", "I'm Sprung"],
	["Kelly Clarkson", "Since U Been Gone"],
	["Blu Cantrell", "Hit 'Em Up Style (Oops!)"],
	["Selena Gomez", "Look at Her Now"],
	["Taylor Swift", "Picture To Burn"],
	["Christina Aguilera", "Fighter"],
	["Jamie Foxx Featuring T-Pain", "Blame It"],
	["6ix9ine Featuring Nicki Minaj & Murda Beatz", "FEFE"],
	["The Beach Boys", "I Get Around"],
	["Robin Thicke ft. Pharrell & T.I.", "Blurred Lines"],
	["112", "Peaches & Cream"],
	["Lady Gaga", "Poker Face"],
	["Ariana Grande", "Thank U, Next"],
	["Lizzo", "Good As Hell"],
	["Christina Aguilera", "Beautiful"],
	["Britney Spears", "Womanizer"],
	["TLC", "No Scrubs"],
	["Meghan Trainor", "NO"],
	["Radiohead", "Creep"],
	["Rick Springfield", "Jessie's Girl"],
	["21 Savage & Metro Boomin", "Runnin"],
	["twenty one pilots", "Heathens"],
	["will.i.am & Britney Spears", "Scream & Shout"],
    ["3OH!3 Featuring Ke$ha","My First Kiss"],["Anna Kendrick","Cups (Pitch Perfect\'s When I\'m Gone)"],["Ava Max","Sweet But Psycho"],["Bruno Mars & Cardi B","Finesse"],["Calvin Harris","Summer"],["Calvin Harris Featuring Rihanna","This Is What You Came For"],["Camila Cabello Featuring Young Thug","Havana"],["Capital Cities","Safe And Sound"],["Chris Brown","Don\'t Wake Me Up"],["Cobra Starship Featuring Sabi","You Make Me Feel..."],["Elton John & Dua Lipa","Cold Heart (PNAU Remix)"],["Fetty Wap","Trap Queen"],["Harry Styles","As It Was"],["Hozier","Take Me To Church"],["Katy Perry Featuring Kanye West","E.T."],["Kendrick Lamar & SZA","All The Stars"],["Lil Nas X","Panini"],["Lil Wayne","How To Love"],["Lil Wayne Featuring Kendrick Lamar","Mona Lisa"],["Michael Jackson & Justin Timberlake","Love Never Felt So Good"],["Owl City","Fireflies"],["P!nk","Try"],["Pitbull Featuring Ne-Yo, Afrojack & Nayer","Give Me Everything"],["Soko","We Might Be Dead By Tomorrow"],["Taylor Swift","Blank Space"],["The Chainsmokers","Paris"]
    ["The Weeknd & Ariana Grande","Save Your Tears"],
    ["You Don't Bring Me Flowers", "Barbra Streisand & Neil Diamond"],
    ["Heartbreaker", "Mariah Carey ft Jay-Z"],
    ["Kill Bill","SZA"],
    ["What Kind Of Fool","Barbera Streisand & Barry Gibb"],
    ["Stoney End","Barbra Streisand"],
]
```

Here's the sample corresponding output:

```json
{"output":[
    ["'N Sync","This I Promise You","promising to always love someone","Serenade"],["John Legend","All of Me","praises lover, who's affection makes any struggle worth it, promises to give all of them","Serenade"],["Beyonce","Halo","celebrates the angelic quality of a lover and testifies to the positive changes as a result of this love","Serenade"],["T-Pain Featuring Lil Wayne","Can't Believe It","praises love interest, telling her of all the things he'll do for her","Serenade"],["Adele","Someone Like You","bemoans that a lover has moved on to a happy life with another partner while the speaker struggles to move on","Longing & Heartbreak"],["Mariah Carey","Without You","at the moment of a breakup, she expresses despair over everything she will lose","Longing & Heartbreak"],["Dolly Parton","Jolene","her heart is breaking because she fears her male lover will be easily stolen by another woman","Longing & Heartbreak"],["Bill Withers","Ain't No Sunshine","when a lover is away, he expresses that his world seems to become darker","Longing & Heartbreak"],["The Rolling Stones","Miss You","expresses how deeply he misses the lover, and wishes he didn't have to wait","Longing & Heartbreak"],["Whitney Houston","I Will Always Love You","wishes her now gone lover well, reminisces, will never forget","Longing & Heartbreak"],["Taylor Swift","Teardrops on My Guitar","she's in love with a friend who's in love with someone else, she has to pretend she doesn't love him but secretly she's suffering","Longing & Heartbreak"],["Lesley Gore","She's A Fool","her love is with another woman who doesn't appreciate his virtues, longs to some day be noticed by him","Longing & Heartbreak"],["Taylor Swift","You Belong With Me","she's making an empassioned argument that her love object should be with her instead of who he's currently with","Courtship & Anticipation"],["Justin Timberlake","Suit & Tie","looking forward to a night of dancing with his beloved","Courtship & Anticipation"],["T-Pain Featuring Mike Jones","I'm N Luv (Wit A Stripper)","he has strong romantic and sexual feelings for a stripper who doesn't yet know him personally","Courtship & Anticipation"],["Rihanna","Only Girl (In the World)","telling her new love how she wants him to make her feel","Courtship & Anticipation"],["Britney Spears","Hold It Against Me","she's interested in a stragner and approaches them hoping to see if they might get together","Courtship & Anticipation"],["T-Pain","Buy U A Drank (Shawty Snappin')","he spots a woman at the bar and offers to buy her a drink as a prelude to potentially getting together","Courtship & Anticipation"],["2Pac","Dear Mama","love and appreciation for his mother","Platonic Love"],["Saweetie","Best Friend (feat. Doja Cat)","boasts about a close friend who is succesful and strong","Platonic Love"],["James Taylor","You've Got a Friend","promises a close friend they'll be there thick and thin","Platonic Love"],["Bill Withers","Lean On Me","invites a friend to rely on him when they are struggling, as everyone needs platonic support at times","Platonic Love"],["Little Mix and Jason Derulo","Secret Love Song","the speakers long to be united with their lover in public, but must remain in secret","It's Complicated"],["Katy Perry","Hot N Cold","about navigating a lover who cannot make up their mind and is inconsistent","It's Complicated"],["Pat Benatar","Love Is a Battlefield","the relationship she is in alternates between being loving and painful and she doesn't feel like she can leave","It's Complicated"],["Lauryn Hill","Ex-Factor","she feels trapped in a relationship where she feels she's giving more than she's recieving, but whenever she tries to leave, she can't","It's Complicated"],["24kGoldn","Mood","bemoans why his female love interest is always difficult, moody, and inconsistent","It's Complicated"],["T-Pain","I'm Sprung","knows his love interest doesn't deserve him, yet he can't stay away, he vascilates between feelings of deep love and wanting to get away as fast as possible","It's Complicated"],["Kelly Clarkson","Since U Been Gone","ending a bad relationship leaves her feeling better and more in control","Good Riddance"],["Blu Cantrell","Hit 'Em Up Style (Oops!)","reacts to being cheated on by gleefully spending her ex-lover's money, as an act of revenge","Good Riddance"],["Selena Gomez","Look at Her Now","after being cheated on, she has now moved on and is better than ever","Good Riddance"],["Taylor Swift","Picture To Burn","after realizing how self-centered and unsuitable her ex-lover is, she vents about all of his negative qualities","Good Riddance"],["Christina Aguilera","Fighter","she thanks the lover because his abuse actually ended up making her stronger and wiser","Good Riddance"],["Jamie Foxx Featuring T-Pain","Blame It","speaker brags about his sexual power & alure, confident his love interest will want to hook up with him","Sexual Conquest"],["6ix9ine Featuring Nicki Minaj & Murda Beatz","FEFE","about sexual prowess with many sexual references","Sexual Conquest"],["The Beach Boys","I Get Around","brag about constantly seeking out new women and never being rejected","Sexual Conquest"],["Robin Thicke ft. Pharrell & T.I.","Blurred Lines","speaker brags about being about to 'get nasty' with a good girl who he claims wants sex","Sexual Conquest"],["112","Peaches & Cream","about sex & being addicted to a tasty woman","Sexual Conquest"],["Lady Gaga","Poker Face","brags about a male sex partner who does not know she is sleeping with a women as well","Sexual Conquest"],["Ariana Grande","Thank U, Next","brags about being unfased and even improved by past relationships ending, and in fact is now thriving in life","Love Song for the Self"],["Lizzo","Good As Hell","an anthem for leaving behidn negativity or men who don't love you anymore, and instead enjoying who you are with pride","Love Song for the Self"],["Christina Aguilera","Beautiful","an anthem proclaiming ones beauty, even in the face of deragatory words or spiteful ex-lovers","Love Song for the Self"],["Britney Spears","Womanizer","in her prowess, she recognizes a womanizer and rejects his advances","Love Song for the Self"],["TLC","No Scrubs","speakers asserts that they will reject so-called scrubs because she wants a higher quality man","Love Song for the Self"],["Meghan Trainor","NO","an anthem for women to confidently reject unwanted male advances, because the men aren't needed for the women to be in the zone","Love Song for the Self"],["Radiohead","Creep","in contrast to his love interest's angelic nature, he feels ugly and invisible, and hates on himself, which does not fit in Longing & Hearbreak or even Good Riddance","It's Complicated"],["Rick Springfield","Jessie's Girl","praises his friends lover and expresses he's confused she isn't into him too, and he's jealous: themes to unusual to fit into the given love song subtypes.","It's Complicated"],["21 Savage & Metro Boomin","Runnin","a gangster rap, not romance","NA"],["twenty one pilots","Heathens","about social outcasts, not romance","NA"],["will.i.am & Britney Spears","Scream & Shout","about dancing in the club, with no real elements of romance directed toward a specific love interest","NA"],
    ["3OH!3 Featuring Ke$ha","My First Kiss","reminiscing about a first kiss, which is a fond memory but not necessarily a song professing love","Courtship & Anticipation"],["Anna Kendrick","Cups (Pitch Perfect\'s When I\'m Gone)","a song about leaving and moving on to pursue one\'s dreams, ambiguous enough that it doesn\'t quite capture a specific romantic mood","NA"],["Ava Max","Sweet But Psycho","portrays a woman in love who is misunderstood to be \'psycho\'","It\'s Complicated"],["Bruno Mars & Cardi B","Finesse","a song celebrating mutual attraction and good times, though not directly focused on love or relationship depth","Love Song for the Self"],["Calvin Harris","Summer","captures a summertime feeling of freedom and excitement, but not expressly a love song","It\'s Complicated"],["Calvin Harris Featuring Rihanna","This Is What You Came For","a party song about an electric connection with someone, possibly romantic, but undefined","Courtship & Antisipation"],["Camila Cabello Featuring Young Thug","Havana","a narrative about being romantically entangled with a suave character named \'East Atlanta\', though not deeply emotional","Longing & Heartbreak"],["Capital Cities","Safe And Sound","an upbeat melody offering support and security, can be interpreted as platonic or romantic","Serenade"],["Chris Brown","Don\'t Wake Me Up","talks about a dream of a perfect relationship he doesn\'t want to end","Serenade"],["Cobra Starship Featuring Sabi","You Make Me Feel...","about the exhilarating feeling someone gives you, could be interpreted as a romantic song","Courtship & Anticipation"],["Elton John & Dua Lipa","Cold Heart (PNAU Remix)","mixes lyrics from previous Elton John\'s songs, dealing with longing for warmth in a cold-hearted world","NA"],["Fetty Wap","Trap Queen","about a woman helping her man in his drug business, love within a criminal context","Serenade"],["Harry Styles","As It Was","reflects on changes and how someone fits into the singer\'s life","NA"],["Hozier","Take Me To Church","metaphorically compares loving a person to religion, and critiques institutions that oppose his love","Serenade"],["Katy Perry Featuring Kanye West","E.T.","compares a lover to an alien, otherworldly love interest","Courtship & Anticipation"],["Kendrick Lamar & SZA","All The Stars","mentions a relationship but in the context of larger themes of life and legacy","NA"],["Lil Nas X","Panini","about fans\' attachment and his own personal journey, not directly a love song","NA"],["Lil Wayne","How To Love","a narrative about a woman\\u2019s life and her struggle to understand love","Courtship & Anticipation"],["Lil Wayne Featuring Kendrick Lamar","Mona Lisa","describes using women to set up a robbery, trust and love are manipulated for personal gain, which does not fit cleanly into defined love song subcategories","NA"],["Michael Jackson & Justin Timberlake","Love Never Felt So Good","remembers when love was good and pure, sort of a celebration of past love","Serenade"],["Owl City","Fireflies","about wonder at the world more than a love song, though could suggest a sense of platonic connection","NA"],["P!nk","Try","about the inevitability of getting hurt when looking for love but encourages to try again","It\'s Complicated"],["Pitbull Featuring Ne-Yo, Afrojack & Nayer","Give Me Everything","about seizing the moment, with a blend of party and romantic elements","Courtship & Anticipation"],["Soko","We Might Be Dead By Tomorrow","emphasizing living in the current moment with the person they care about, has a romantic undertone","Longing & Heartbreak"],["Taylor Swift","Blank Space","satirical view of the media\'s perception of her dating life, embodying a character","It\'s Complicated"],["The Chainsmokers","Paris","reflects on an escape with a partner, hinting at a deeper connection but not overtly romantic","Serenade"],
    ["The Weeknd & Ariana Grande","Save Your Tears","explores the conflict of not realizing how deeply someone had loved you, of not being sure why you run away, wishing you could love them but also feeling they deserve someone better","It's Complicated"],
    ["You Don't Bring Me Flowers", "Barbra Streisand & Neil Diamond", "a duet about a couple who have grown apart and no longer show affection, but still have feelings for each other and hope it could work", "It's Complicated"],
    ["Heartbreaker", "Mariah Carey ft Jay-Z", "her love interest is a heartbreaker, but she can't resist him, and she's torn between wanting to be with him and knowing he's bad for her", "It's Complicated"],
    ["Kill Bill","SZA","she still loves him, but she also wants to kill him, she's torn between love and murderous sociopathy","It's Complicated"],
    ["What Kind Of Fool","Barbera Streisand & Barry Gibb","a couple reflects that there were moments where they could have saved their relationship, but they let the relationship fall apart, and they are both sorry for what they've lost","Longing & Heartbreak"],
    ["Stoney End","Barbra Streisand","a song about her life struggles and journey, which briefly mentions love, but is not primarily about love","NA"]
    ]}
```

Ok, now generate the output for this input:


"""

In [24]:
# pretty print the JSON string
# display(Markdown(LOVE_SONG_LABELING_PROMPT))
# LOVE_SONG_LABELING_PROMPT

In [25]:
def get_love_song_labels_for_rows(row_subset_str):
    try: 
        completion = client.chat.completions.create(
          model="gpt-4o",
          response_format={'type': "json_object"},
          messages=[
            {"role": "user", "content": LOVE_SONG_LABELING_PROMPT},
            {"role": "user", "content": row_subset_str},
          ]
        )
        content = completion.choices[0].message.content
        # return content
        response_content = eval_this_str(content)['output']
        # print(f"Response contains {completion['usage']['completion_tokens']} tokens out of 4096 maximum allowed tokens.")
        return response_content
    except Exception as e:
        print('ERROR FETCHING', e)
        return []

### Fetch in batches

In [26]:

def fetch_all_data_in_batches(df, increment=25):
    gpt_labeled_result = []
    for i in range(101):
        subset = df[['performer', 'song']][i*increment : (i+1)*increment]
        if len(subset):
            subset_as_json_string = subset.to_json(orient='values')
            print([i*increment, (i+1)*increment])
            print(subset_as_json_string, '\n')
            fetched_and_formatted_res = get_love_song_labels_for_rows(subset_as_json_string)
            print(fetched_and_formatted_res)
            gpt_labeled_result = [*gpt_labeled_result, *fetched_and_formatted_res]
    return gpt_labeled_result

In [27]:
def get_file_path(postfix):
    return f'./data/22-TEMP-gpt4o-re-relabel-good-riddance-{postfix}.csv'

In [28]:
def save_to_temp_csv(gpt_labeled_result, run_num):
    first_label_pass_df = pd.DataFrame(
        gpt_labeled_result, 
        columns=['performer', 'song', 
                 'justification',
                 'love_song_sub_type']
    )
    first_label_pass_df.to_csv(get_file_path(run_num), index=False)

# (run a test call to be sure things look ok)
For 100 rows at once:
For GPT4 turbo: 3min, about 3400 tokens out of 4096 maximum allowed tokens 

In [29]:
new_songs_df[
            new_songs_df.performer.str.contains('Barbra Streisand')
        ]

Unnamed: 0,song_id,chart_debut,song,performer,date_as_decimal,popularity_score,total_weeks_in_top_10,love_song_sub_type
3954,Stoney EndBarbra Streisand,1971-01-09,Stoney End,Barbra Streisand,1971.02,9,4,Good Riddance
5030,You Don't Bring Me FlowersBarbra Streisand & N...,1978-11-18,You Don't Bring Me Flowers,Barbra Streisand & Neil Diamond,1978.88,76,10,Good Riddance
3061,No More Tears (Enough Is Enough)Barbra Streisa...,1979-11-03,No More Tears (Enough Is Enough),Barbra Streisand/Donna Summer,1979.84,53,9,Good Riddance
4782,What Kind Of FoolBarbra Streisand & Barry Gibb,1981-03-21,What Kind Of Fool,Barbra Streisand & Barry Gibb,1981.22,3,3,Good Riddance


In [30]:
%%time
TEST_POSTFIX = 'JUST A TEST, DELETE ME'
save_to_temp_csv(
    fetch_all_data_in_batches(
        # new_songs_df.iloc[0:3]
        new_songs_df[
            new_songs_df.performer.str.contains('Barbra Streisand')
        ]
    ), 
    TEST_POSTFIX
)

[0, 25]
[["Barbra Streisand","Stoney End"],["Barbra Streisand & Neil Diamond","You Don't Bring Me Flowers"],["Barbra Streisand\/Donna Summer","No More Tears (Enough Is Enough)"],["Barbra Streisand & Barry Gibb","What Kind Of Fool"]] 

[['Barbra Streisand', 'Stoney End', 'a song about her life struggles and journey, which briefly mentions love, but is not primarily about love', 'NA'], ['Barbra Streisand & Neil Diamond', "You Don't Bring Me Flowers", 'a duet about a couple who have grown apart and no longer show affection, but still have feelings for each other and hope it could work', "It's Complicated"], ['Barbra Streisand/Donna Summer', 'No More Tears (Enough Is Enough)', 'a powerful duet where two women declare that they have had enough of the disrespect in their relationships and are moving on', 'Good Riddance'], ['Barbra Streisand & Barry Gibb', 'What Kind Of Fool', "a couple reflects that there were moments where they could have saved their relationship, but they let the relations

In [32]:
pd.read_csv(get_file_path(TEST_POSTFIX))#.iloc[1].justification

Unnamed: 0,performer,song,justification,love_song_sub_type
0,Barbra Streisand,Stoney End,"a song about her life struggles and journey, w...",
1,Barbra Streisand & Neil Diamond,You Don't Bring Me Flowers,a duet about a couple who have grown apart and...,It's Complicated
2,Barbra Streisand/Donna Summer,No More Tears (Enough Is Enough),a powerful duet where two women declare that t...,Good Riddance
3,Barbra Streisand & Barry Gibb,What Kind Of Fool,a couple reflects that there were moments wher...,Longing & Heartbreak


# Ok, run the actual fetch

In [37]:
FILE_KEY = 'relabeling-all-good-riddance-songs'
save_to_temp_csv(
    fetch_all_data_in_batches(
        new_songs_df.iloc[0:1000],
        25
    ), 
    FILE_KEY
)

[0, 25]
[["Ricky Nelson","Poor Little Fool"],["The Platters","Smoke Gets In Your Eyes"],["Travis & Bob","Tell Him No"],["Connie Francis","Lipstick On Your Collar"],["Anita Bryant","Paper Roses"],["Bobby Vee","Rubber Ball"],["Del Shannon","Hats Off To Larry"],["Bobby Vee","Take Good Care Of My Baby"],["Ray Charles and his Orchestra","Hit The Road Jack"],["Dion","Runaround Sue"],["Bobby Vee","Run To Him"],["Ray Charles and his Orchestra","Unchain My Heart"],["The Marvelettes","Playboy"],["Joe Henderson","Snap Your Fingers"],["The 4 Seasons","Big Girls Don't Cry"],["Esther Phillips \"Little Esther\"","Release Me"],["Steve Lawrence","Go Away Little Girl"],["The 4 Seasons","Walk Like A Man"],["The Shirelles","Foolish Little Girl"],["Ray Charles","Take These Chains From My Heart"],["Lesley Gore","Judy's Turn To Cry"],["Peter, Paul & Mary","Don't Think Twice, It's All Right"],["Bobby Rydell","Forget Him"],["Lesley Gore","You Don't Own Me"],["The Tams","What Kind Of Fool (Do You Think I Am)"]]

# Process results (and correct mistakes where songs were rewritten wrong by GPT eg `'` instead of `"`)

In [124]:
love_song_subtypes_df = pd.concat([
    pd.read_csv(get_file_path(FILE_KEY)),
])

love_song_subtypes_df = love_song_subtypes_df.drop_duplicates(subset=['performer', 'song'])
love_song_subtypes_df['song_id'] = love_song_subtypes_df['song'] + love_song_subtypes_df['performer']

# replace "Platonic Love" with np.nan since we're ignoring this category
love_song_subtypes_df['love_song_sub_type'] = love_song_subtypes_df['love_song_sub_type'].replace('Platonic Love', np.nan)

assert(
    len(love_song_subtypes_df) == len(new_songs_df)
)

# weirdly there are non-overlapping songs... even though the lengths are the same... ?

# by merging on `song` and `performer`, show songs that are on new_songs_df but NOT in love_song_subtypes_df
missmatched_songs = new_songs_df.merge(
    love_song_subtypes_df,
    on=['song_id'],
    how='left',
    indicator=True
).query('_merge == "left_only"')

from Levenshtein import distance

# Iterate over each mismatched song and find the closest match in love_song_subtypes_df using Levenshtein distance
corrections = {}
for i, row in missmatched_songs.iterrows():
    # print(f"MISSING: {row['song_x']}")
    # print(f"Closest matches:")
    for j, row2 in love_song_subtypes_df.iterrows():
        dist = distance(row['song_x'], row2['song'])
        if dist < 5:
            # print(f"    {row2['song']}: {row['song_x']}")
            corrections[row2['song']] = row['song_x']

# update love_song_subtypes_df with the song corrections
for wrong_song, correct_song in corrections.items():
    love_song_subtypes_df.loc[love_song_subtypes_df.song == wrong_song, 'song'] = correct_song

assert(len(new_songs_df.merge(
    love_song_subtypes_df,
    on=['song', 'performer'],
    how='left',
    indicator=True
).query('_merge == "left_only"')) == 0)


In [125]:
love_song_subtypes_df.love_song_sub_type.value_counts()

love_song_sub_type
Good Riddance               125
Longing & Heartbreak         52
It's Complicated             30
Love Song for the Self       11
Courtship & Anticipation      8
Serenade                      2
Name: count, dtype: int64

In [126]:
# non love songs that were caught
love_song_subtypes_df[love_song_subtypes_df.love_song_sub_type.isnull()]

Unnamed: 0,performer,song,justification,love_song_sub_type,song_id
26,Roger Miller,Dang Me,a whimsical song about a man dealing with the ...,,Dang MeRoger Miller
27,The Newbeats,Bread And Butter,playful song about loving simple domestic plea...,,Bread And ButterThe Newbeats
39,Tommy Roe,Hooray For Hazel,a playful song about a girl who wins everyone'...,,Hooray For HazelTommy Roe
41,The Five Americans,Western Union,using a telegram metaphor to communicate feeli...,,Western UnionThe Five Americans
57,Bobby Sherman,"Easy Come, Easy Go",describes a carefree attitude toward relations...,,"Easy Come, Easy GoBobby Sherman"
59,Barbra Streisand,Stoney End,"about her life struggles and journey, which br...",,Stoney EndBarbra Streisand
93,Queen,Another One Bites The Dust,focuses on the end of various relationships as...,,Another One Bites The DustQueen
173,Kanye West Featuring Jamie Foxx,Gold Digger,warns men about women who are only interested ...,,Gold DiggerKanye West Featuring Jamie Foxx
213,Drake,Fake Love,about discovering false friendships and love a...,,Fake LoveDrake


# Merge a complete dataset using old & new fetched metadata

In [138]:
# Concat prev & new fetched results
NEEDED_COLS = ['performer', 'song', 'love_song_sub_type']
prev_metadata_df = prev_df[ # drop the rows that are in the new_songs_df
    prev_df.love_song_sub_type != 'Good Riddance'
]

merged_df = pd.concat([
    prev_metadata_df,
    love_song_subtypes_df
])[NEEDED_COLS]

merged_df['song_id'] = merged_df['song'] + merged_df['performer']

assert(len(prev_metadata_df) + len(new_songs_df) == len(merged_df))

# merged_df = merged_df.drop(columns=['justification'])
merged_df.love_song_sub_type.value_counts()
merged_df

Unnamed: 0,performer,song,love_song_sub_type,song_id
0,John Lennon,#9 Dream,,#9 DreamJohn Lennon
1,Jay-Z Featuring Beyonce Knowles,'03 Bonnie & Clyde,It's Complicated,'03 Bonnie & ClydeJay-Z Featuring Beyonce Knowles
2,Paul Davis,'65 Love Affair,,'65 Love AffairPaul Davis
3,The Everly Brothers,('til) I Kissed You,Serenade,('til) I Kissed YouThe Everly Brothers
4,Nelson,(Can't Live Without Your) Love And Affection,Longing & Heartbreak,(Can't Live Without Your) Love And AffectionNe...
...,...,...,...,...
232,SZA,Kill Bill,It's Complicated,Kill BillSZA
233,Bizarrap & Shakira,"Bzrp Music Sessions, Vol. 53",Good Riddance,"Bzrp Music Sessions, Vol. 53Bizarrap & Shakira"
234,PinkPantheress & Ice Spice,"Boy's A Liar, Pt. 2",It's Complicated,"Boy's A Liar, Pt. 2PinkPantheress & Ice Spice"
235,Karol G x Shakira,TQG,Good Riddance,TQGKarol G x Shakira


### Contrast the love song updates we've made:

In [139]:
prev_df.love_song_sub_type.value_counts()

love_song_sub_type
Serenade                    1040
Longing & Heartbreak         776
It's Complicated             456
Courtship & Anticipation     438
Sexual Conquest              311
Good Riddance                237
Platonic Love                 98
Love Song for the Self        98
Name: count, dtype: int64

In [140]:
merged_df.love_song_sub_type.value_counts()

love_song_sub_type
Serenade                    1042
Longing & Heartbreak         828
It's Complicated             486
Courtship & Anticipation     446
Sexual Conquest              311
Good Riddance                125
Love Song for the Self       109
Platonic Love                 98
Name: count, dtype: int64

In [141]:
# Print out the change in value counts for each love song subtype
for love_song_subtype in [*merged_df.love_song_sub_type.unique()]:
    prev_count = prev_df.love_song_sub_type.value_counts().get(love_song_subtype, 0)
    new_count = merged_df.love_song_sub_type.value_counts().get(love_song_subtype, 0)
    print(f'{love_song_subtype}: {new_count - prev_count} ({new_count} total now)')
    # NOTE: value_counts() ignores np.nan, so they don't show up in these counts

nan: 0 (0 total now)
It's Complicated: 30 (486 total now)
Serenade: 2 (1042 total now)
Longing & Heartbreak: 52 (828 total now)
Courtship & Anticipation: 8 (446 total now)
Sexual Conquest: 0 (311 total now)
Platonic Love: 0 (98 total now)
Love Song for the Self: 11 (109 total now)
Good Riddance: -112 (125 total now)


# export

In [143]:
WITH_LOVE_SONG_TYPES_PATH = './data/22.2-v3-OUTPUT-data-through-sep-2023-ALL-SONGS-LABELLED-with-good-riddance-corrections.csv'
merged_df.to_csv(WITH_LOVE_SONG_TYPES_PATH, index=False)