In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, logging
import torch
import json
import os
from tqdm import tqdm
import gc         # garbage collect library


# Check if a GPU is available and set the device accordingly
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device:', device)

# Set up logging to display generation progress
# logging.set_verbosity_info()


  from .autonotebook import tqdm as notebook_tqdm


Device: cuda


In [2]:

DEFAULT_SYSTEM_PROMPT = """
You are a creative, world-famous expert lyricist. Write lyrics for a song, given just a title, artist name, possible genres, and any additional information provided.
""".strip()


def generate_input(song_name, artist, genres): #row):
    # song_name = row['SName']
    # artist = row['Artist']
    # genres = row['Genres']
    genre_info = ""
    if genres != "" and genres is not None:
        genre_info = ', '.join(genres)
        genre_info = f" using the following genres: {genre_info}"

    return f"""Write lyrics for a song titled "{song_name}" to be performed by {artist}{genre_info}.""" 


# def generate_text(inputs, system_prompt=DEFAULT_SYSTEM_PROMPT):
#     return f"""### Instruction: {system_prompt}

# ### Input:
# {inputs}

# ### Response:
# """.strip() 


def generate_text(inputs, system_prompt=DEFAULT_SYSTEM_PROMPT):
    return f"""{system_prompt} {inputs}

### Lyrics:
"""#.strip() 

    


def generate_response(question, tokenizer, model, max_length=2048, bad_words=['Bad Lyrics.', 'Not Safe For Work']): #, model_name="grantsl/LyricaLlama"):
    """
    Generates a response to the given question using the specified language model.
    """
    bad_words_ids = None
    if len(bad_words) > 0:
        bad_words_ids = ' '.join(bad_words)
        # Encode the negative prompt
        bad_words_ids = tokenizer.encode(bad_words_ids, add_special_tokens=False)
        bad_words_ids = [bad_words_ids]  # Wrap it in a list

    # Encode the question and add the EOS token
    input_ids = tokenizer.encode(question, return_tensors='pt').to(device)
    
    print('starting generation.')

    # Generate a response
    output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, # early_stopping=True, 
                            no_repeat_ngram_size=2,
                            length_penalty=1.0,
                            bad_words_ids=bad_words_ids)
    print('generation finished')

    # Decode and return the response
    return tokenizer.decode(output[0].to('cpu'), skip_special_tokens=True)

In [4]:

# model_name = "grantsl/LyricaLlama"
models_to_test = ["meta-llama/Llama-2-7b-hf", "grantsl/LyricaLlama"]
is_finetuned = [False, True]

CURRENT_IND = 0

RESP_COUNT = 3
MAX_LEN = 500

results = dict()

for model_name, is_ft in zip(models_to_test, is_finetuned):
# model_name = models_to_test[CURRENT_IND]


    model_ver = os.path.basename(model_name)
    print(model_ver)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)



    test_set = [{'is_new':False, 'song':'All I Want For Christmas', 'artist':'Mariah Carey', 'genres':['Christmas']},
                {'is_new':False, 'song':'Blinding Lights', 'artist':'The Weeknd', 'genres':['Pop', 'Synth-Pop']},
                {'is_new':False, 'song':'Lover', 'artist':'Taylor Swift', 'genres':['Pop']},
                {'is_new':True, 'song':'Electric Touch', 'artist':'Taylor Swift and Fall Out Boy', 'genres':['Pop', 'Pop/Rock']},
                {'is_new':True, 'song':'Next Thing You Know', 'artist':'Jordan Davis', 'genres':['Country']},
                {'is_new':True, 'song':'Penthouse', 'artist':'Kelsea Ballerini', 'genres':['Pop', 'Country/Pop']},
               ]




    for song_info in tqdm(test_set):
        res_song = song_info.copy()
        # print(song_info.keys())
        song = song_info['song']
        artist = song_info['artist']
        genres = song_info['genres']
        
        
        if song in results:
            res_song = results[song]
        else:
            # res_song['prompt'] = []
            res_song['results'] = []
            # res_song['is_finetuned'] = []

        inputs = generate_input(song, artist, genres)
        inputs = generate_text(inputs)

        inputs_len = len(inputs)

        # responses = []
        for i in range(RESP_COUNT):
            res_line = dict()
            
            # res_song['prompt'].append(inputs)
            res_line['model_name'] = model_name
            res_line['prompt'] = inputs
            response = generate_response(inputs, tokenizer, model, min(MAX_LEN + inputs_len, 4096))

            response_cleaned = response[inputs_len:]
            # responses.append(response_cleaned)
            # res_song['responses'].append(responses_cleaned)
            # res_song['is_finetuned'].append(is_ft)
            res_line['responses'] = response_cleaned
            res_line['is_finetuned'] = is_ft
            
            res_song['results'].append(res_line)

        # res_song['responses'] = responses

        results[song] = res_song

    # with open(f'./results_{model_ver}.json', 'w') as f:
    #     json.dump(results, f, indent=4)

    del model

    gc.collect()
    torch.cuda.empty_cache() 

with open(f'./combined_results.json', 'w') as f:
    json.dump(results, f, indent=4)

    
    

loading file tokenizer.model from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/8cca527612d856d7d32bd94f8103728d614eb852/tokenizer.model
loading file tokenizer.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/8cca527612d856d7d32bd94f8103728d614eb852/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/8cca527612d856d7d32bd94f8103728d614eb852/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/8cca527612d856d7d32bd94f8103728d614eb852/tokenizer_config.json


Llama-2-7b-hf


loading configuration file config.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/8cca527612d856d7d32bd94f8103728d614eb852/config.json
Model config LlamaConfig {
  "_name_or_path": "meta-llama/Llama-2-7b-hf",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 11008,
  "max_position_embeddings": 4096,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 32,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.32.1",
  "use_cache": true,
  "vocab_size": 32000
}

loading weights file model.safetensors from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--meta-llama--Llama-2-7b-hf/snapshots/8cca527612d856d7d32bd94f8103728d6

starting generation.
generation finished
starting generation.
generation finished
starting generation.


 17%|█▋        | 1/6 [00:21<01:46, 21.32s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 33%|███▎      | 2/6 [01:57<04:20, 65.25s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 50%|█████     | 3/6 [03:06<03:21, 67.05s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 67%|██████▋   | 4/6 [04:18<02:17, 68.89s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 83%|████████▎ | 5/6 [05:08<01:02, 62.01s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


100%|██████████| 6/6 [06:39<00:00, 66.55s/it]

generation finished
LyricaLlama



loading file tokenizer.model from cache at None
loading file tokenizer.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--grantsl--LyricaLlama/snapshots/d96fa37b770ee629fa9fede3ce000a76ecfee2bb/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--grantsl--LyricaLlama/snapshots/d96fa37b770ee629fa9fede3ce000a76ecfee2bb/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--grantsl--LyricaLlama/snapshots/d96fa37b770ee629fa9fede3ce000a76ecfee2bb/tokenizer_config.json
loading configuration file config.json from cache at /home/jupyter-grantsl/.cache/huggingface/hub/models--grantsl--LyricaLlama/snapshots/d96fa37b770ee629fa9fede3ce000a76ecfee2bb/config.json
Model config LlamaConfig {
  "_name_or_path": "grantsl/LyricaLlama",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "bos_token_id

starting generation.
generation finished
starting generation.
generation finished
starting generation.


 17%|█▋        | 1/6 [01:37<08:06, 97.23s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 33%|███▎      | 2/6 [03:13<06:26, 96.69s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 50%|█████     | 3/6 [04:48<04:47, 95.75s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 67%|██████▋   | 4/6 [06:26<03:13, 96.74s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


 83%|████████▎ | 5/6 [08:03<01:36, 96.73s/it]

generation finished
starting generation.
generation finished
starting generation.
generation finished
starting generation.


100%|██████████| 6/6 [09:39<00:00, 96.59s/it]

generation finished





In [None]:
# inputs = generate_input('All I Want For Christmas', 'Mariah Carey', ['Christmas'])
inputs = generate_input('Song_Title', 'Artist_Name', ['Genre(s)'])


question = generate_text(inputs)

print('Prompt:', question) 
# Generate and print the response
response1 = generate_response(question, tokenizer, model, max_length=500)
print('Results:\n', response1)