# Data Gathering

In [0]:
## ABC format DATA
! curl -L "https://api.ipfsbrowser.com/ipfs/download.php?hash=QmSM68rjRZL5WdJFqZqWS33BBrebGMGbmdSnUP56wVeFTx" > 'abc_data.txt'

#MIDI Library
!curl -L "https://pjb.com.au/midi/free/MIDI.py" > 'MIDI.py'

## MIDI format Mozart Data
!wget http://www.piano-midi.de/zip/mozart.zip
!sudo apt-get install unzip
!unzip mozart.zip -d Dataset

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  157k  100  157k    0     0   346k      0 --:--:-- --:--:-- --:--:--  346k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 69122  100 69122    0     0  73769      0 --:--:-- --:--:-- --:--:-- 73690
--2020-01-27 19:28:47--  http://www.piano-midi.de/zip/mozart.zip
Resolving www.piano-midi.de (www.piano-midi.de)... 82.165.134.185
Connecting to www.piano-midi.de (www.piano-midi.de)|82.165.134.185|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 167294 (163K) [application/zip]
Saving to: ‘mozart.zip’


2020-01-27 19:28:48 (786 KB/s) - ‘mozart.zip’ saved [167294/167294]

Reading package lists... Done
Building dependency tree       
Reading state information... Done
unzip is already the newest vers

In [0]:
# MIDI Dataset to songs.ms converter 
import MIDI
import os


def write_notes(file_address):
    midi_file = open(file_address, 'rb')
    score = MIDI.midi2score(midi_file.read())
    midi_file.close()
    # ['note', start_time, duration, channel, note, velocity]

    itrack = 1
    notes = []
    this_channel_has_note = False
    while itrack < len(score):
        for event in score[itrack]:
            if event[0] == 'note':  # for example,
                this_channel_has_note = True
                notes.append(event[4])

        itrack += 1
        if this_channel_has_note and len(notes) > 20:
            break

    with open('songs.ms', 'a') as song_file:  # append
        song_file.write('\n')
        for note in notes:
            song_file.write(chr(note + 35))
        song_file.write('\t')

dataset_addr = "Dataset"
files = os.listdir(dataset_addr)
for file in files:
    path = os.path.join(dataset_addr, file)
    write_notes(path)

# Model Training

In [0]:
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM, Dropout, TimeDistributed, Dense, Activation, Embedding

BATCH_SIZE = 16
SEQ_LENGTH = 64

In [0]:
def get_data_unique_chars_dicts(data):
    set_of_unique_chars = set(data)
    list_of_unique_chars = sorted(list(set_of_unique_chars))
    char_to_index = {}
    for i in range(len(list_of_unique_chars)):
        char_to_index[list_of_unique_chars[i]] = i
    index_to_char = {i: ch for (ch, i) in char_to_index.items()}
    return char_to_index, index_to_char

def read_batches(all_chars, unique_chars):
    length = all_chars.shape[0]
    batch_chars = int(length / BATCH_SIZE)
    for start in range(0, batch_chars - SEQ_LENGTH, 64):
        X = np.zeros((BATCH_SIZE, SEQ_LENGTH))
        Y = np.zeros((BATCH_SIZE, SEQ_LENGTH, unique_chars))
        for batch_index in range(0, 16):  
            for i in range(0, 64):
                X[batch_index, i] = all_chars[batch_index * batch_chars + start + i]
                Y[batch_index, i, all_chars[batch_index * batch_chars + start + i + 1]] = 1
        yield X, Y

In [0]:
def make_model(batch_size, seq_length, num_of_unique_chars):
    model = Sequential()
    
    model.add(Embedding(input_dim = num_of_unique_chars, output_dim = 512, 
                        batch_input_shape = (batch_size, seq_length))) 
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(TimeDistributed(Dense(num_of_unique_chars)))
    model.add(Activation("softmax"))
    
    return model

In [0]:
def train(model, model_name, num_of_epochs = 100):
  epoch_report = {}
      
  for epoch in range(num_of_epochs):
      print("Epoch {}/{}".format(epoch+1, num_of_epochs))

      average_epoch_loss = 0
      average_epoch_acc = 0
      counter = 0

      for i, (x, y) in enumerate(read_batches(chars_of_data, num_of_unique_chars)):
          if (i + 1) % 5:
            counter += 1
            batch_loss, batch_accuracy = model.train_on_batch(x, y)
            average_epoch_loss += batch_loss
            average_epoch_acc += batch_accuracy
            print("Batch: {}, Loss: {}, Accuracy: {}".format(i+1, batch_loss, batch_accuracy))
          else:
            batch_loss, batch_accuracy = model.test_on_batch(x, y)
            print(f"TEST Loss: {batch_loss}, Accuracy: {batch_accuracy}")

      epoch_report[epoch] = (average_epoch_loss / (counter), average_epoch_acc / (counter))
      
      if (epoch + 1) % 10 == 0:
          model.save_weights(f"Weights_{model_name}.h5")
          print(f'Saved Weights at epoch {epoch+1} to file Weights_{model_name}.h5')

  for epoch_num in epoch_report.keys():
    epoch_loss, epoch_acc = epoch_report[epoch_num]
    print(f"{epoch_num}\t{epoch_loss}\t{epoch_acc}")

In [0]:
with open('songs.ms', 'r') as data_file:
    data = data_file.read()

char_to_index, index_to_char = get_data_unique_chars_dicts(data)
num_of_unique_chars = len(char_to_index)
chars_of_data = np.asarray([char_to_index[c] for c in data], dtype = np.int32)

model = make_model(BATCH_SIZE, SEQ_LENGTH, num_of_unique_chars)
model.compile(loss = "categorical_crossentropy", optimizer = "adam", 
              metrics = ["accuracy"])

train(model, "mozart", 100)


Epoch 1/100
Batch: 1, Loss: 3.871164321899414, Accuracy: 0.0205078125
Batch: 2, Loss: 3.854757785797119, Accuracy: 0.12109375
Batch: 3, Loss: 3.82728910446167, Accuracy: 0.1025390625
Batch: 4, Loss: 3.731536388397217, Accuracy: 0.091796875
TEST Loss: 3.515650749206543, Accuracy: 0.0869140625
Batch: 6, Loss: 3.4810843467712402, Accuracy: 0.0791015625
Batch: 7, Loss: 3.2807648181915283, Accuracy: 0.103515625
Batch: 8, Loss: 3.29886794090271, Accuracy: 0.0859375
Batch: 9, Loss: 3.243844509124756, Accuracy: 0.087890625
TEST Loss: 3.285658836364746, Accuracy: 0.0888671875
Batch: 11, Loss: 3.4977431297302246, Accuracy: 0.0693359375
Batch: 12, Loss: 3.3195223808288574, Accuracy: 0.0576171875
Batch: 13, Loss: 3.2680563926696777, Accuracy: 0.068359375
Batch: 14, Loss: 3.304166793823242, Accuracy: 0.09765625
TEST Loss: 3.241425037384033, Accuracy: 0.0966796875
Batch: 16, Loss: 3.1723008155822754, Accuracy: 0.0986328125
Batch: 17, Loss: 3.2595808506011963, Accuracy: 0.080078125
Batch: 18, Loss: 3

# Sampling

In [0]:
def make_model_for_sampling(num_of_unique_chars):
    model = Sequential()
    
    model.add(Embedding(input_dim = num_of_unique_chars, output_dim = 512, 
                        batch_input_shape = (1, 1))) 
  
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(LSTM(256, return_sequences = True, stateful = True))
    model.add(Dropout(0.2))
    
    model.add(LSTM(256, stateful = True)) 
    model.add(Dropout(0.2))
    
    model.add((Dense(num_of_unique_chars)))
    model.add(Activation("softmax"))
    
    return model

In [0]:
def adjust_seq(seq):
  count = 0
  for i in seq:
      count += 1
      if i == "\n":
          break
  beginning_part_removed = seq[count:]

  count = 0
  for i in beginning_part_removed:
      count += 1
      if i == "\n" and beginning_part_removed[count] == "\n":
          break
  ending_part_removed = beginning_part_removed[:count]
  return ending_part_removed

def generate_sequence(model_weights_address, initial_index, 
                      seq_length, is_abc=False):
    sequence_index = [initial_index]

    model = make_model_for_sampling(num_of_unique_chars)
    model.load_weights(model_weights_address)
    
    for i in range(seq_length):
        batch = np.zeros((1, 1))
        batch[0, 0] = sequence_index[-1]
        predicted_probs = model.predict_on_batch(batch).ravel()
        sample = np.random.choice(range(num_of_unique_chars), size = 1, p = predicted_probs)
        
        sequence_index.append(sample[0])
    
    seq = ''.join(index_to_char[c] for c in sequence_index)
    
    if is_abc:
      return adjust_seq(seq)
    else:
      return seq

In [0]:
seq = generate_sequence("Weights_mozart.h5", 0, 500)
print(seq)
with open('gen_song.ms', 'w') as gen_song_file:
  gen_song_file.write(seq)

	vyxutrpvy|{ywutrpturpomkihfdhfdca_a_]\ZXX\X\_dhkpkwtwtpyupy{ywutrpoywutrpomkihfdhfdca_^_]\ZX]X_X]X]fhi__ba]^]\d]acddfhikmoprtuworpihikihfdcdfcihfidfcdh_hf]i\df\hd`fchrpomdhkf]dcdgafcilmlklmnoptrtrttpqomkjkmkmkjkmoqrrrrqomkjkmkmkmkjkmoqrrrrtrqywvtrpomoprtvwvwwrwroprpoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkmoprtvwrrrtrprpopomomkfhjkm


# Convert To MIDI

In [0]:
# convert gen_song to midi format
import MIDI

notes = []
with open("gen_song.ms", "r") as song_file:
    chrs = song_file.read()
    for chr in chrs:
        if ord(chr) >= 35:
            notes.append(ord(chr) - 35)

song_score = [480, [['track_name', 0, b'Gen Song by Deep Learning, opus 10'], ['set_tempo', 0, 294840]]]
# ['note', start_time, duration, channel, note, velocity]


song_score.append([['track_name', 0, b'Piano Right'], ['patch_change', 0, 0, 0]])
time = 1000
for note in notes:
    song_score[-1].append(['note', time, 240, 0, note, 50])
    time += 360


midi_data = MIDI.score2midi(song_score)
with open('generated_song.mid', 'wb') as midi_file:
    midi_file.write(midi_data)