In [36]:
import pandas as pd
import os
import random
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" # needed for making torch work?
import torch
from transformers import DistilBertTokenizer, DistilBertModel
import numpy as np
from openai import OpenAI
from dotenv import load_dotenv

In [37]:
# store API tokens in environment variables
load_dotenv()

# Read authentication keys from environmental variables
_open_ai_tkn = os.environ.get('OPENAI_KEY')
_project_tkn = os.environ.get('OPENAI_PROJECT')
_organisation_tkn = os.environ.get('OPENAI_ORG')

In [38]:
# load test dataset
test_song_df = pd.read_csv('data/test_songs.csv')
test_song_df.head()

Unnamed: 0.1,Unnamed: 0,artist,song_name,lyrics,mood
0,0,Keane,Perfect Symmetry,I shake through the wreckage for signs of life...,sad
1,1,Creedence Clearwater Revival,Have_You_Ever_Seen_The_Rain,\nSomeone told me long ago\nThere's a calm bef...,sad
2,2,Britney Spears,out_from_under,"\nBreathe you out, breathe you in\nYou keep co...",sad
3,3,Blondie,Good Boys,\nSatellites are falling down tonight\nI see y...,sad
4,4,Backstreet Boys,Incomplete,\nEmpty spaces fill me up with holes\nDistant ...,sad


In [39]:
client = OpenAI(
  organization=_organisation_tkn,
  project=_project_tkn,
  api_key=_open_ai_tkn
)

## All the prompts

In [40]:
def prompt_song_1(test_song):
    test_song_lines = test_song.split('\n')
    test_song_lines = [s for s in test_song_lines if len(s) > 0]
    old_lines = []
    new_lines = []
    prompts = []
    for i in range(0, len(test_song_lines) - 7):
        question = "Please write an original one-line lyric between these two sequence of three consecutive lines: \n"
        group_1 = '\n'.join(test_song_lines[i:i+3])
        group_2 = '\n'.join(test_song_lines[i+4: i+7])
        prompt = f"{question}{group_1}\n\n{group_2}\n"
        #print(prompt)
        response = client.chat.completions.create(
            messages=[{
                "role": "user",
                "content": prompt
            }],
            model="gpt-4o-mini",
            max_completion_tokens=100
        )
        new_line = response.choices[0].message.content.split('\n')[0] # parsing the output
        old_lines.append(test_song_lines[i+3])
        new_lines.append(new_line)
        prompts.append(prompt)
    return prompts, new_lines, old_lines

def prompt_song_2(test_song):
    test_song_lines = test_song.split('\n')
    test_song_lines = [s for s in test_song_lines if len(s) > 0]
    old_lines = []
    new_lines = []
    prompts = []
    for i in range(0, len(test_song_lines) - 7):
        question = "Please write an original one-line lyric to come directly after this sequence of six consecutive lines: \n"
        group_1 = '\n'.join(test_song_lines[i:i+6])
        prompt = f"{question}{group_1}\n"
        #print(prompt)
        response = client.chat.completions.create(
            messages=[{
                "role": "user",
                "content": prompt
            }],
            model="gpt-4o-mini",
            max_completion_tokens=100
        )
        new_line = response.choices[0].message.content.split('\n')[0] # parsing the output
        #print(f"response: {new_line}")
        old_lines.append(test_song_lines[i+6])
        new_lines.append(new_line)
        prompts.append(prompt)
    return prompts, new_lines, old_lines

def prompt_song_3(test_song):
    test_song_lines = test_song.split('\n')
    test_song_lines = [s for s in test_song_lines if len(s) > 0]
    old_lines = []
    new_lines = []
    prompts = []
    for i in range(1, len(test_song_lines) - 6):
        question = "Please write an original one-line lyric to come directly before this sequence of six consecutive lines: \n"
        group_1 = '\n'.join(test_song_lines[i:i+6])
        prompt = f"{question}{group_1}\n"
        #print(prompt)
        response = client.chat.completions.create(
            messages=[{
                "role": "user",
                "content": prompt
            }],
            model="gpt-4o-mini",
            max_completion_tokens=100
        )
        new_line = response.choices[0].message.content.split('\n')[0] # parsing the output
        #print(f"response: {new_line}")
        old_lines.append(test_song_lines[i-1])
        new_lines.append(new_line)
        prompts.append(prompt)
    return prompts, new_lines, old_lines

## Collect all prompting results in a dataframe

In [45]:
results = pd.DataFrame({'artist': [], 'song_name': [], 'original_lyrics': [], 'generated_lyrics': [], 'prompt_id': [], 'prompt': [], 'mood': []})
i = 1
for row in test_song_df.itertuples():
    prompts_1, new_lines_1, old_lines_1 = prompt_song_1(row.lyrics)
    prompts_2, new_lines_2, old_lines_2 = prompt_song_2(row.lyrics)
    prompts_3, new_lines_3, old_lines_3 = prompt_song_3(row.lyrics)
    results_1 = pd.DataFrame({'artist': [row.artist] * len(prompts_1), 'song_name': [row.song_name] * len(prompts_1), 'original_lyrics': old_lines_1, 'generated_lyrics': new_lines_1, 'prompt_id': [1] * len(prompts_1), 'prompt': prompts_1, 'mood': [row.mood] * len(prompts_1)})
    results_2 = pd.DataFrame({'artist': [row.artist] * len(prompts_2), 'song_name': [row.song_name] * len(prompts_2), 'original_lyrics': old_lines_2, 'generated_lyrics': new_lines_2, 'prompt_id': [2] * len(prompts_2), 'prompt': prompts_2, 'mood': [row.mood] * len(prompts_2)})
    results_3 = pd.DataFrame({'artist': [row.artist] * len(prompts_3), 'song_name': [row.song_name] * len(prompts_3), 'original_lyrics': old_lines_3, 'generated_lyrics': new_lines_3, 'prompt_id': [3] * len(prompts_3), 'prompt': prompts_3, 'mood': [row.mood] * len(prompts_3)})
    results = pd.concat([results, results_1], ignore_index=True)
    results = pd.concat([results, results_2], ignore_index=True)
    results = pd.concat([results, results_3], ignore_index=True)
    print(f'built row {i} / {len(test_song_df)}')
    i += 1
results.head()

built row 1 / 20
built row 2 / 20
built row 3 / 20
built row 4 / 20
built row 5 / 20
built row 6 / 20
built row 7 / 20
built row 8 / 20
built row 9 / 20
built row 10 / 20
built row 11 / 20
built row 12 / 20
built row 13 / 20
built row 14 / 20
built row 15 / 20
built row 16 / 20
built row 17 / 20
built row 18 / 20
built row 19 / 20
built row 20 / 20


Unnamed: 0,artist,song_name,original_lyrics,generated_lyrics,prompt_id,prompt,mood
0,Keane,Perfect Symmetry,I wish I could make sense of what we do,Reaching for the echoes of a distant dream,1.0,Please write an original one-line lyric betwee...,sad
1,Keane,Perfect Symmetry,Burning down the capitals,Yet every pixel whispers stories lost anew,1.0,Please write an original one-line lyric betwee...,sad
2,Keane,Perfect Symmetry,The wisest of the animals,"Caught in the echoes of our laughter’s lore,",1.0,Please write an original one-line lyric betwee...,sad
3,Keane,Perfect Symmetry,Who are you? What are you living for?,Echoes of our choices ripple through the blue,1.0,Please write an original one-line lyric betwee...,sad
4,Keane,Perfect Symmetry,"Tooth for tooth, maybe we'll go one more",And dance beneath the ashes of the past.,1.0,Please write an original one-line lyric betwee...,sad


In [47]:
results.to_csv('results/results.csv')