# Using GPT2 implementation to predict chord progressions

## Setup
Uncluding nstalling the text-gen package

In [None]:
!pip3 install aitextgen

In [None]:
import logging
logging.basicConfig(
        format="%(asctime)s — %(levelname)s — %(name)s — %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO
    )

from aitextgen import aitextgen
from aitextgen.colab import mount_gdrive, copy_file_from_gdrive
from aitextgen.TokenDataset import TokenDataset, merge_datasets
from aitextgen.utils import build_gpt2_config
from aitextgen.tokenizers import train_tokenizer

In [None]:
!nvidia-smi

Sun Aug 28 13:46:27 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   73C    P0    31W /  70W |   1066MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Uploading the data

In [None]:
import pickle
chords_t = pickle.load( open( "soul_chords.pickle", "rb" ) )

A quick and dirty way of extracting chord progressions from whole songs


Training a GPT-2 model on pretty much raw data (0 feaute engineering)

In [None]:
chords_ll = [[x[0] for x in y] for y in chords_t]

In [None]:
#getting the number of unique chords - i.e. vocab size
chords_flat = [x[0] for y in chords_t for x in y] #note the space addition
len(set(chords_flat)) #Number of unique chords

1043

In [None]:
import csv
with open("chords_csv.csv", "w", newline="") as f:
    writer = csv.writer(f, delimiter =' ')
    writer.writerows(chords_ll)

In [None]:
train_tokenizer('chords_csv.csv')

In [None]:
config = build_gpt2_config(vocab_size=5000, max_length=16, dropout=0.0, n_embd=64, n_layer=8, n_head=8)
config

GPT2Config {
  "activation_function": "gelu_new",
  "attn_pdrop": 0.0,
  "bos_token_id": 0,
  "embd_pdrop": 0.0,
  "eos_token_id": 0,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 16,
  "n_embd": 64,
  "n_head": 8,
  "n_inner": null,
  "n_layer": 8,
  "n_positions": 16,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.0,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.0,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "transformers_version": "4.21.2",
  "use_cache": true,
  "vocab_size": 5000
}

In [None]:
ai = aitextgen(config=config,
               tokenizer_file="aitextgen.tokenizer.json",
               to_gpu=True)

INFO:aitextgen:Constructing model from provided config.
INFO:aitextgen:GPT2 loaded with 0M parameters.
INFO:aitextgen:Using a custom tokenizer.


In [None]:
ai.generate(5)

�
�O
�


In [None]:
ai.train('chords_csv.csv',
         line_by_line=False,
         from_cache=False,
         num_steps=30000,
         generate_every=500,
         save_every=1000,
         save_gdrive=False,
         learning_rate=1e-3,
         batch_size=16,
         )

INFO:aitextgen:Loading text from chords_csv.csv with generation length of 16.


  0%|          | 0/984 [00:00<?, ?it/s]

INFO:aitextgen.TokenDataset:Encoding 984 rows from chords_csv.csv.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


  0%|          | 0/30000 [00:00<?, ?it/s]

[1m500 steps reached: generating sample texts.[0m
 Cm7/F# Dm7/C A#m/C Go
[1m1,000 steps reached: saving model to /trained_model[0m
[1m1,000 steps reached: generating sample texts.[0m
7/F# C#M7/F# F#M7
[1m1,500 steps reached: generating sample texts.[0m
2 CM7/B A# D#m7 A#o/
[1m2,000 steps reached: saving model to /trained_model[0m
[1m2,000 steps reached: generating sample texts.[0m
2 CM7 FM7/C FM7/C CM7 F#
[1m2,500 steps reached: generating sample texts.[0m
2 A Esus2 D#m7/A# Esus2/D
[1m3,000 steps reached: saving model to /trained_model[0m
[1m3,000 steps reached: generating sample texts.[0m
7/C Cm7/C C#o/C F#o
[1m3,500 steps reached: generating sample texts.[0m
7/F# Bm7/F# F#/o7 G#
[1m4,000 steps reached: saving model to /trained_model[0m
[1m4,000 steps reached: generating sample texts.[0m
7/C# G#7 Fm7 G#7/C C
[1m4,500 steps reached: generating sample texts.[0m
7/o7 Am7/C Eo/G Am7/C
[1m5,000 steps reached: saving model to /trained_model[0m
[1m5,000 steps rea

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
INFO:aitextgen:Saving trained model pytorch_model.bin to /trained_model


In [None]:
gen = aitextgen(model_folder="trained_model",
               tokenizer_file="aitextgen.tokenizer.json",
               to_gpu=True)

INFO:aitextgen:Loading model from provided weights and config in /trained_model.
INFO:aitextgen:GPT2 loaded with 0M parameters.
INFO:aitextgen:Using a custom tokenizer.


In [None]:
gen.generate(max_length=1024)

7 FM7/F# A7/E F#7/B


In [None]:
gen.generate(n=5,
            prompt="Am7 F#7",
            temperature=0.6,
            top_p=0.8)

[1mAm7 F#7[0m/A# F#o/A D7 F
[1mAm7 F#7[0m/A# F#o/A F#o
[1mAm7 F#7[0m/A# F#o/A F#o
[1mAm7 F#7[0m F#m7/A F#m7/
[1mAm7 F#7[0m/A# F#o/A D7 F
