In [None]:
from google.colab import drive

drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
!pip install transformers==4.28.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers==4.28.0
  Downloading transformers-4.28.0-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m73.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0 (from transformers==4.28.0)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.28.0)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.15.1 tokenizers-0.13.3 transform

In [None]:
import re
import torch
import pandas as pd
from transformers import TrainingArguments, Trainer, GPTNeoForCausalLM, GPT2TokenizerFast

In [None]:
MODELS_FOLDER = "/content/drive/MyDrive/NLP Finetuning/Finetuned Models/"

In [None]:
import random
import numpy as np

In [None]:
torch.cuda.empty_cache()

tokenizer = GPT2TokenizerFast.from_pretrained('EleutherAI/gpt-neo-125m', pad_token='<|pad|>')

torch.cuda.empty_cache()

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)

Downloading (…)okenizer_config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/357 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.01k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
if torch.cuda.is_available():
  device = torch.device('cuda')

else:
  device = torch.device('cpu')

print(device)

cuda


In [None]:
!nvidia-smi

Mon Jun  5 13:30:21 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P8     9W /  70W |      3MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Load models of each genre

In [None]:
rock = GPTNeoForCausalLM.from_pretrained('/content/drive/MyDrive/NLP Finetuning/Finetuned Models/Rock Lyrics Generator')
pop = GPTNeoForCausalLM.from_pretrained('/content/drive/MyDrive/NLP Finetuning/Finetuned Models/Pop Lyrics Generator')
indie = GPTNeoForCausalLM.from_pretrained('/content/drive/MyDrive/NLP Finetuning/Finetuned Models/Indie Lyrics Generator')
hip_hop = GPTNeoForCausalLM.from_pretrained('/content/drive/MyDrive/NLP Finetuning/Finetuned Models/Hip Hop Lyrics Generator')
rap = GPTNeoForCausalLM.from_pretrained('/content/drive/MyDrive/NLP Finetuning/Finetuned Models/Rap Lyrics Generator')

In [None]:
models = {
  'Rock': rock,
  'Pop': pop,
  'Indie': indie,
  'Hip Hop': hip_hop,
  'Rap': rap,
}

In [None]:
def generate_lyrics(genre, seed_text, number_of_lyrics):

  model = models.get(genre)

  model.to('cuda')

  generated = tokenizer(seed_text, return_tensors = 'pt').input_ids.cuda()

  output = model.generate(generated, do_sample=True, top_k = 10, max_length = 100, top_p = 0.95, temperature = 2., num_return_sequences = number_of_lyrics)

  for i, sample_output in enumerate(output):
    text = tokenizer.decode(sample_output.tolist())
    regex_padding = re.sub('<|pad|>', '', text)
    regex_barra = re.sub('[|+]', '', regex_padding)
    space = re.sub('[ +]', ' ', regex_barra)
    result = re.sub('[\n](2, )', '\n', space)
    print("\n==== Generated lyric {}:\n\n{}".format(i+1, result))

In [None]:
rock_data = pd.read_csv("/content/drive/MyDrive/NLP Finetuning/Genre Datasets/en_Rock_Lyrics.csv")

print(rock_data.shape)
rock_data.head()

(63553, 3)


Unnamed: 0.1,Unnamed: 0,Lyric,Genres
0,31764,"Wise men say, only fools rush in\nBut I can't ...",Rock
1,31767,We're caught in a trap\nI can't walk out\nBeca...,Rock
2,31770,"Maybe I didn't treat you,\nQuite as good as I ...",Rock
3,31773,"Love me tender, love me sweet,\nNever let me g...",Rock
4,31776,And now the end is near\nAnd so I face the fin...,Rock


# Generate Lyrics

In [None]:
generate_lyrics('Indie', 'In the deepest ocean', 3)