Generating an instrumental in Draganov style

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm

In [None]:
data = open('/content/draga_for_rnn_train.txt', 'r', encoding='utf-8').read()

In [None]:
# Collecting the unique caracters in my data

unique_chars = sorted(set(data))
print("Unique characters:", unique_chars)

Unique characters: ['\n', ' ', '%', ',', '-', '/', '0', '1', '2', '3', '4', '5', '6', '8', ':', '<', '=', '>', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'K', 'L', 'M', 'Q', 'V', 'X', '[', '\\', ']', '^', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'l', 'm', 'o', 'p', 'r', 's', 't', 'z', '|']


In [None]:
# Create mappings

char_to_idx = {char: idx for idx, char in enumerate(unique_chars)}
idx_to_char = {idx: char for idx, char in enumerate(unique_chars)}

print("Character to Index mapping:", char_to_idx)
print("Index to Character mapping:", idx_to_char)

Character to Index mapping: {'\n': 0, ' ': 1, '%': 2, ',': 3, '-': 4, '/': 5, '0': 6, '1': 7, '2': 8, '3': 9, '4': 10, '5': 11, '6': 12, '8': 13, ':': 14, '<': 15, '=': 16, '>': 17, 'A': 18, 'B': 19, 'C': 20, 'D': 21, 'E': 22, 'F': 23, 'G': 24, 'I': 25, 'K': 26, 'L': 27, 'M': 28, 'Q': 29, 'V': 30, 'X': 31, '[': 32, '\\': 33, ']': 34, '^': 35, 'a': 36, 'b': 37, 'c': 38, 'd': 39, 'e': 40, 'f': 41, 'g': 42, 'h': 43, 'l': 44, 'm': 45, 'o': 46, 'p': 47, 'r': 48, 's': 49, 't': 50, 'z': 51, '|': 52}
Index to Character mapping: {0: '\n', 1: ' ', 2: '%', 3: ',', 4: '-', 5: '/', 6: '0', 7: '1', 8: '2', 9: '3', 10: '4', 11: '5', 12: '6', 13: '8', 14: ':', 15: '<', 16: '=', 17: '>', 18: 'A', 19: 'B', 20: 'C', 21: 'D', 22: 'E', 23: 'F', 24: 'G', 25: 'I', 26: 'K', 27: 'L', 28: 'M', 29: 'Q', 30: 'V', 31: 'X', 32: '[', 33: '\\', 34: ']', 35: '^', 36: 'a', 37: 'b', 38: 'c', 39: 'd', 40: 'e', 41: 'f', 42: 'g', 43: 'h', 44: 'l', 45: 'm', 46: 'o', 47: 'p', 48: 'r', 49: 's', 50: 't', 51: 'z', 52: '|'}


In [None]:
# Convert characters to indices
data_as_indices = [char_to_idx[char] for char in data]

print("Data as indices:", data_as_indices)

Data as indices: [31, 14, 1, 7, 0, 28, 14, 1, 10, 5, 10, 0, 27, 14, 1, 7, 5, 13, 0, 29, 14, 7, 5, 10, 16, 7, 8, 6, 0, 26, 14, 19, 1, 2, 1, 11, 1, 49, 43, 36, 48, 47, 49, 0, 30, 14, 7, 0, 2, 2, 28, 25, 21, 25, 1, 47, 48, 46, 42, 48, 36, 45, 1, 10, 0, 51, 5, 8, 32, 21, 3, 24, 3, 3, 4, 34, 5, 8, 32, 19, 3, 4, 24, 3, 24, 3, 3, 4, 34, 5, 8, 32, 21, 19, 3, 4, 24, 3, 4, 24, 3, 3, 4, 34, 9, 32, 19, 3, 4, 24, 3, 4, 24, 3, 3, 4, 34, 5, 8, 32, 19, 3, 4, 24, 3, 4, 21, 3, 4, 24, 3, 3, 4, 34, 8, 32, 19, 3, 4, 24, 3, 21, 3, 4, 24, 3, 3, 4, 34, 5, 8, 32, 19, 3, 4, 24, 3, 4, 21, 3, 4, 24, 3, 3, 4, 34, 5, 8, 52, 1, 33, 0, 32, 19, 3, 24, 3, 4, 21, 3, 4, 24, 3, 3, 4, 34, 32, 24, 3, 4, 21, 3, 24, 3, 3, 34, 5, 8, 24, 3, 5, 8, 4, 1, 32, 24, 3, 21, 3, 4, 34, 32, 18, 3, 21, 3, 34, 1, 19, 3, 9, 5, 8, 18, 3, 24, 3, 3, 5, 8, 4, 32, 24, 3, 4, 20, 3, 4, 24, 3, 3, 4, 34, 5, 8, 32, 22, 4, 20, 4, 24, 3, 4, 20, 3, 4, 20, 3, 24, 3, 3, 4, 34, 5, 8, 52, 1, 33, 0, 32, 22, 4, 20, 24, 3, 4, 20, 3, 4, 24, 3, 3, 4, 34, 32, 22,

In [None]:
vocab_size = len(unique_chars)

In [None]:
# Creating the lists of sequences

def create_sequences(data, seq_length):
    x = []
    y = []
    for i in range(len(data) - seq_length):
        sequence = data[i:i+seq_length]
        target = data[i+seq_length]
        x.append(sequence)
        y.append(target)
    return np.array(x), np.array(y)


# Usage
seq_length = 100
x_train, y_train = create_sequences(data_as_indices, seq_length)

print("Input sequences:", x_train)
print("Target sequences:", y_train)


Input sequences: [[31 14  1 ...  3  4 24]
 [14  1  7 ...  4 24  3]
 [ 1  7  0 ... 24  3  3]
 ...
 [34  5  8 ...  8 18  5]
 [ 5  8 52 ... 18  5  8]
 [ 8 52  0 ...  5  8 52]]
Target sequences: [ 3  3  4 ...  8 52  0]


In [None]:
# Converting target to categorical variable

y_train = tf.keras.utils.to_categorical(y_train, num_classes=vocab_size)

In [None]:
# Our NN model

model = Sequential([
    Embedding(vocab_size, 64, input_length=seq_length),
    LSTM(256, return_sequences=True),
    LSTM(256),
    Dense(vocab_size, activation='softmax')
])

In [None]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001))

In [None]:
# Train the model

model.fit(x_train, y_train, batch_size=128, epochs=30, validation_split=0.1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x793b48dfc3d0>

In [None]:
# Save the model

model.save('Draganov_model_v3.h5')

# v1 didnt contain the first letter in each abc file , X, L, M ...
# v2 contain those letters in the beginning of the abc file
# v3 we increase most parameters and add temperature

In [None]:
# Generation function

def generate_sequence_with_temperature(seed_text, next_chars, char_to_idx, idx_to_char, model, seq_length, temperature=0.5):
    generated_text = seed_text

    for _ in range(next_chars):
        x = np.array([char_to_idx.get(char, 0) for char in generated_text[-seq_length:]])
        x = np.pad(x, (max(0, seq_length - len(x)), 0), 'constant')
        x = x.reshape((1, seq_length))

        predictions = model.predict(x, verbose=0)[0]

        # Apply temperature
        predictions = np.log(predictions) / temperature
        exp_preds = np.exp(predictions)
        predictions = exp_preds / np.sum(exp_preds)

        next_index = np.random.choice(len(predictions), p=predictions)
        next_char = idx_to_char[next_index]
        generated_text += next_char

    return generated_text

In [None]:
# Generate a new sequence
new_music = generate_sequence_with_temperature("X: 1 \
M: 4/4\
L: 1/8\
Q:1/4=120\
K:D % 2 sharps\
V:1\
%%clef bass\
%%MIDI program 4\
z/2E/2-[EE,]/2z3/2[bB-E,-] [B-E,-]/2[bB-E,-][BE,]3/2E/2-[E-B,E,-]/2| \
[E-G,-E,E,]/2[E-G,-]/2[E-D-G,E,-]/2[EDE,]/2 [E-E,-]/2[E-DE,-]/2[E-CG,-E,-E,]/2[E-G,E,]/2 [EB,-]/2B,/2-[d-D-B,B,,-]/2[d-B-DB,B,,]/2 [dB-DB,]/2B/2[F-D-B,-B,,-]/2[d-F-D-B,-B,,-]/2| \
[dF-D-B,-B,,-]/2[F-D-B,-B,,-]2[FDB,-B,,-]/2[B,B,,-]/2[B,B,,-]3/2[dFDF,B,,-] [EDB,F,-D,-B,,-]/2[D-F,D,D,B,,-]/2[D-B,,-]| \
[DB,B,,]/2C/2>d/2[D^A,A,,]/2 z/2[cC-]/2[c-C-]/2[c-E-C-][ecE-C-][E-C]2[E-B,]/2| \
E/2-[E-C-]/2[E-CE,]/2[EC-]/2 [E-C]/2[E-D]/2E/2-[c-E-C-E,-]/2 [c-EC-B,E,-]/2[cC-E,]/2[d-D-C]/2[dD-B,-B,,-]/2 [DB,B,,]/2[d-D]/2[dF-]/2[F-B,-B,,-]/2| \
[F-D-B,-B,,-]2 [FD-B,-B,,-]/2[DB,-B,,-][B,-B,B,,-]/2 [B,B,,-]/2[d-D-B,-B,,-]/2[d-D-B,-B,B,,-]/2[d-F-D-B,B,,-]/2 [dFD-B,,-]/2[GDB,,-][F-B,,-G,,]/2| \
[FB,,]/2E/2[E-E,]/2[e-EE,-]/2 [e-EE,-]/2[eE-E,-]/2[E-E,-]/2[B-EE,-]3/2[B-E,-]2[B-E-E,]/2[BE-]/2| \
[E-B,G,-E,-]/2[E-CG,G,E,-E,]/2[G-E-E,]/2[GED-G,E,-]/2 [DB,G,E,]/2[eE-E,]/2[G-E-DE,-]/2[GE-CE,]/2 [E-G,-E,]/2[EB,-G,E,]/2B,/2[d-D-]/2 [d-D-B,-B,,]/2[dBD-B,]/2D/2-[FDB,-B,,-]/2| \
[dB,-B,,-]/2[D-B,-B,,-]/2[F-DB,-B,,-]2[FB,B,,] [BB,B,,-]/2[F-B,,]/2[FB,F,-B,,-]/2[dFD-F,B,,-]/2 [DB,,-]/2[ED-F,B,,-]/2[DDF,-D,B,,-B,,]/2[F,B,,-]/2| \
[cCB,,-]/2[B,B,,]/2z/2d/2- [d-D^A,A,,]/2d/2c/2c/2- [ecE-C-]3/2[E-C]2E/2-| \
E/2-[E-B,]/2[E-C-E,]/2[EC][eE-]/2E/2-[E-D-]/2 [E-DC]/2E/2B,/2C/2 [D-G,,-]/2[D-G,G,,]/2[B-DB,-G,,-]/2[BB,G,,-]/2|",
                              500,
                              char_to_idx,
                              idx_to_char,
                              model,
                              seq_length,
                              temperature=0.7)

print("\n\n")
print(new_music)




X: 1 M: 4/4L: 1/8Q:1/4=120K:D % 2 sharpsV:1%%clef bass%%MIDI program 4z/2E/2-[EE,]/2z3/2[bB-E,-] [B-E,-]/2[bB-E,-][BE,]3/2E/2-[E-B,E,-]/2| [E-G,-E,E,]/2[E-G,-]/2[E-D-G,E,-]/2[EDE,]/2 [E-E,-]/2[E-DE,-]/2[E-CG,-E,-E,]/2[E-G,E,]/2 [EB,-]/2B,/2-[d-D-B,B,,-]/2[d-B-DB,B,,]/2 [dB-DB,]/2B/2[F-D-B,-B,,-]/2[d-F-D-B,-B,,-]/2| [dF-D-B,-B,,-]/2[F-D-B,-B,,-]2[FDB,-B,,-]/2[B,B,,-]/2[B,B,,-]3/2[dFDF,B,,-] [EDB,F,-D,-B,,-]/2[D-F,D,D,B,,-]/2[D-B,,-]| [DB,B,,]/2C/2>d/2[D^A,A,,]/2 z/2[cC-]/2[c-C-]/2[c-E-C-][ecE-C-][E-C]2[E-B,]/2| E/2-[E-C-]/2[E-CE,]/2[EC-]/2 [E-C]/2[E-D]/2E/2-[c-E-C-E,-]/2 [c-EC-B,E,-]/2[cC-E,]/2[d-D-C]/2[dD-B,-B,,-]/2 [DB,B,,]/2[d-D]/2[dF-]/2[F-B,-B,,-]/2| [F-D-B,-B,,-]2 [FD-B,-B,,-]/2[DB,-B,,-][B,-B,B,,-]/2 [B,B,,-]/2[d-D-B,-B,,-]/2[d-D-B,-B,B,,-]/2[d-F-D-B,B,,-]/2 [dFD-B,,-]/2[GDB,,-][F-B,,-G,,]/2| [FB,,]/2E/2[E-E,]/2[e-EE,-]/2 [e-EE,-]/2[eE-E,-]/2[E-E,-]/2[B-EE,-]3/2[B-E,-]2[B-E-E,]/2[BE-]/2| [E-B,G,-E,-]/2[E-CG,G,E,-E,]/2[G-E-E,]/2[GED-G,E,-]/2 [DB,G,E,]/2[eE-E,]/2[G-E-DE,-]/2[GE-

We get better results, but this model has some limitation, among them a small training data set. But even by increasing the training set, the ABC representation is not really suitable with hip-hop/rap instrumentals

Let's try another approach, working with the audio files themselfs

