In [None]:
!pip install mido

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting mido
  Downloading mido-1.2.10-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.1/51.1 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.10


In [None]:
import numpy as np

from keras.layers import Input, Dense, Reshape, Dropout, Bidirectional, LSTM, TimeDistributed
from keras.layers import BatchNormalization, Activation, ZeroPadding2D, Flatten
from keras.layers import LeakyReLU
from keras.models import Sequential, Model
from keras.optimizers import Adam

import matplotlib.pyplot as plt

from mido import Message, MidiFile, MidiTrack

In [None]:
from PIL import Image
import numpy as np
from music21 import instrument, note, chord, stream

lowerBoundNote = 21
def column2notes(column):
    notes = []
    for i in range(len(column)):
        if column[i] > 255/2:
            notes.append(i+lowerBoundNote)
    return notes

resolution = 0.25
def updateNotes(newNotes,prevNotes): 
    res = {} 
    for note in newNotes:
        if note in prevNotes:
            res[note] = prevNotes[note] + resolution
        else:
            res[note] = resolution
    return res

def image2midi(image_path,save_path):
    with Image.open(image_path) as image:
        im_arr = np.frombuffer(image.tobytes(), dtype=np.uint8)
        try:
            im_arr = im_arr.reshape((image.size[1], image.size[0]))
        except:
            im_arr = im_arr.reshape((image.size[1], image.size[0],3))
            im_arr = np.dot(im_arr, [0.33, 0.33, 0.33])

    """ convert the output from the prediction to notes and create a midi file
        from the notes """
    offset = 0
    output_notes = []

    # create note and chord objects based on the values generated by the model

    prev_notes = updateNotes(im_arr.T[0,:],{})
    for column in im_arr.T[1:,:]:
        notes = column2notes(column)
        # pattern is a chord
        notes_in_chord = notes
        old_notes = prev_notes.keys()
        for old_note in old_notes:
            if not old_note in notes_in_chord:
                new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
                new_note.storedInstrument = instrument.Piano()
                if offset - prev_notes[old_note] >= 0:
                    new_note.offset = offset - prev_notes[old_note]
                    output_notes.append(new_note)
                elif offset == 0:
                    new_note.offset = offset
                    output_notes.append(new_note)                    
                else:
                    print(offset,prev_notes[old_note],old_note)

        prev_notes = updateNotes(notes_in_chord,prev_notes)

        # increase offset each iteration so that notes do not stack
        offset += resolution

    for old_note in prev_notes.keys():
        new_note = note.Note(old_note,quarterLength=prev_notes[old_note])
        new_note.storedInstrument = instrument.Piano()
        new_note.offset = offset - prev_notes[old_note]

        output_notes.append(new_note)

    prev_notes = updateNotes(notes_in_chord,prev_notes)

    midi_stream = stream.Stream(output_notes)
    print(os.path.join(save_path,image_path.split("\\")[-1].replace(".png",".mid")))
    midi_stream.write('midi', fp=os.path.join(save_path,image_path.split("\\")[-1].replace(".png",".mid")))

In [None]:
from music21 import converter, instrument, note, chord
import sys
import numpy as np
from imageio import imwrite

def extractNote(element):
    return int(element.pitch.ps)

def extractDuration(element):
    return element.duration.quarterLength

def get_notes(notes_to_parse):

    """ Get all the notes and chords from the midi files in the ./midi_songs directory """
    durations = []
    notes = []
    start = []

    for element in notes_to_parse:
        if isinstance(element, note.Note):
            if element.isRest:
                continue

            start.append(element.offset)
            notes.append(extractNote(element))
            durations.append(extractDuration(element))
                
        elif isinstance(element, chord.Chord):
            if element.isRest:
                continue
            for chord_note in element:
                start.append(element.offset)
                durations.append(extractDuration(element))
                notes.append(extractNote(chord_note))

    return {"start":start, "pitch":notes, "dur":durations}


def midi2image(midi_path ,save_path,max_repetitions = float("inf"), resolution = 0.25, lowerBoundNote = 21, upperBoundNote = 127, maxSongLength = 100):
    mid = converter.parse(midi_path)

    instruments = instrument.partitionByInstrument(mid)

    data = {}

    try:
        i=0
        for instrument_i in instruments.parts:
            notes_to_parse = instrument_i.recurse()

            notes_data = get_notes(notes_to_parse)
            if len(notes_data["start"]) == 0:
                continue

            if instrument_i.partName is None:
                data["instrument_{}".format(i)] = notes_data
                i+=1
            else:
                data[instrument_i.partName] = notes_data

    except:
        notes_to_parse = mid.flat.notes
        data["instrument_0"] = get_notes(notes_to_parse)

    for instrument_name, values in data.items():
        # https://en.wikipedia.org/wiki/Scientific_pitch_notation#Similar_systems

        pitches = values["pitch"]
        durs = values["dur"]
        starts = values["start"]

        index = 0
        while index < max_repetitions:
            matrix = np.zeros((upperBoundNote-lowerBoundNote,maxSongLength))


            for dur, start, pitch in zip(durs, starts, pitches):
                dur = int(dur/resolution)
                start = int(start/resolution)

                if not start > index*(maxSongLength+1) or not dur+start < index*maxSongLength:
                    for j in range(start,start+dur):
                        if j - index*maxSongLength >= 0 and j - index*maxSongLength < maxSongLength:
                            matrix[pitch-lowerBoundNote,j - index*maxSongLength] = 255

            if matrix.any(): # If matrix contains no notes (only zeros) don't save it
                X=midi_path.split("\\")
                imwrite(os.path.join(save_path,X[-1].replace(".mid",f"_{instrument_name}_{index}.png")),matrix.astype(np.uint8))
                index += 1
            else:
                break

In [None]:
DATA_PATH='2004'
IMAGES_PATH='2004_Images'

In [None]:
!pip install py_midicsv

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting py_midicsv
  Downloading py_midicsv-4.0.0-py3-none-any.whl (16 kB)
Collecting rich-click<2.0.0,>=1.6.1 (from py_midicsv)
  Downloading rich_click-1.6.1-py3-none-any.whl (19 kB)
Installing collected packages: rich-click, py_midicsv
Successfully installed py_midicsv-4.0.0 rich-click-1.6.1


In [None]:
import os
import numpy as np
import py_midicsv as pm

In [None]:
import matplotlib.pyplot as plt

In [None]:
from glob import glob

In [None]:
def access_images(img_list,path,length):
    pixels = []
    imgs = []
    for i in range(length):
        if 'png' in img_list[i]:
            try:
                img = Image.open(os.path.join(path,img_list[i]),'r')
                img = img.convert('1')
                pix = np.array(img.getdata())
                pix = pix.astype('float32')
                pix /= 255.0
                pixels.append(pix.reshape(106,106,1))
                imgs.append(img)
            except:
                pass
    return np.array(pixels),imgs
def show_image(pix_list):
    array = np.array(pix_list.reshape(106,106), dtype=np.uint8)
    new_image = Image.fromarray(array)
    new_image.show()

pixels,imgs = access_images(os.listdir(IMAGES_PATH),IMAGES_PATH,500)

In [None]:
os.listdir(IMAGES_PATH)

['116_Piano_34.png',
 '116_Piano_35.png',
 '116_Piano_36.png',
 '116_Piano_37.png',
 '117_Piano_0.png',
 '117_Piano_1.png',
 '117_Piano_2.png',
 '117_Piano_3.png',
 '117_Piano_4.png',
 '117_Piano_5.png',
 '117_Piano_6.png',
 '117_Piano_7.png',
 '117_Piano_8.png',
 '117_Piano_9.png',
 '117_Piano_10.png',
 '117_Piano_11.png',
 '117_Piano_12.png',
 '117_Piano_13.png',
 '117_Piano_14.png',
 '117_Piano_15.png',
 '117_Piano_16.png',
 '117_Piano_17.png',
 '117_Piano_18.png',
 '117_Piano_19.png',
 '117_Piano_20.png',
 '117_Piano_21.png',
 '117_Piano_22.png',
 '117_Piano_23.png',
 '117_Piano_24.png',
 '117_Piano_25.png',
 '117_Piano_26.png',
 '117_Piano_27.png',
 '117_Piano_28.png',
 '117_Piano_29.png',
 '117_Piano_30.png',
 '117_Piano_31.png',
 '117_Piano_32.png',
 '117_Piano_33.png',
 '117_Piano_34.png',
 '117_Piano_35.png',
 '117_Piano_36.png',
 '117_Piano_37.png',
 '118_Piano_0.png',
 '118_Piano_1.png',
 '118_Piano_2.png',
 '118_Piano_3.png',
 '118_Piano_4.png',
 '118_Piano_5.png',
 '118_Pi

In [None]:
np.unique(pixels)

array([0., 1.], dtype=float32)

In [None]:
pixels.shape

(500, 106, 106, 1)

In [None]:
from numpy import zeros
from numpy import ones
from numpy import vstack
from numpy.random import randn
from numpy.random import randint
from keras.datasets.mnist import load_data
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers import Flatten,BatchNormalization
from keras.layers import Conv2D
from keras.layers import Conv2DTranspose
from keras.layers import LeakyReLU
from keras.layers import Dropout
from matplotlib import pyplot
from IPython.display import clear_output

In [None]:
def define_discriminator(in_shape = (106,106,1)):
    model = Sequential()
    model.add(Conv2D(64, (3,3), strides=(2, 2), padding='same', input_shape=in_shape))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.5))
    model.add(Conv2D(64, (3,3), strides=(2, 2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(BatchNormalization())
    model.add(Dense(1, activation='sigmoid'))
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

In [None]:
def define_generator(latent_dim):
    model = Sequential()
    n_nodes = 128 * 53 * 53
    model.add(Dense(n_nodes, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Reshape((53, 53, 128)))
    model.add(Dense(1024))
    model.add(Conv2DTranspose(1024, (4,4), strides=(2,2), padding='same'))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1024))
    model.add(Conv2D(1, (7,7) , padding='same',activation = 'sigmoid'))
    return model

In [None]:
def define_gan(g_model, d_model):
    d_model.trainable = False
    model = Sequential()
    model.add(g_model)
    model.add(d_model)
    opt = Adam(lr=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model

In [None]:
def generate_real_samples(dataset, n_samples):
    ix = randint(0, dataset.shape[0], n_samples)
    X = dataset[ix]
    y = ones((n_samples, 1))
    return X, y
 
def generate_latent_points(latent_dim, n_samples):
    x_input = randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input
def generate_fake_samples(g_model, latent_dim, n_samples):
    x_input = generate_latent_points(latent_dim, n_samples)
    X = g_model.predict(x_input)
    y = zeros((n_samples, 1))
    return X, y

In [None]:
from keras.callbacks import ModelCheckpoint

In [None]:
import tensorflow as tf

In [None]:
tf.test.gpu_device_name()

''

In [None]:
# filepath="/content/drive/MyDrive/Deep_Learning_Project/Models/model.ckpt"
def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=300, n_batch=50):
    with tf.device('/device:GPU:0'):
      bat_per_epo = int(dataset.shape[0] / n_batch)
      half_batch = int(n_batch / 2)
      for i in range(n_epochs):
          for j in range(bat_per_epo):
              X_real, y_real = generate_real_samples(dataset, half_batch)
              X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
              X, y = vstack((X_real, X_fake)), vstack((y_real, y_fake))
              d_loss, _ = d_model.train_on_batch(X, y)
              X_gan = generate_latent_points(latent_dim, n_batch)
              y_gan = ones((n_batch, 1))
              g_loss = gan_model.train_on_batch(X_gan, y_gan)
              print('>%d, %d/%d, d=%.3f, g=%.3f' % (i+1, j+1, bat_per_epo, d_loss, g_loss))
          if (i+1) % 10 == 0:
              # summarize_performance(i, g_model, d_model, dataset, latent_dim)
              
              clear_output()
          # checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=False, mode='max')

In [None]:
latent_dim = 100


In [None]:
import tensorflow as tf
from tensorflow.keras.saving import load_model

In [None]:
g_model=define_generator(latent_dim)
d_model=define_discriminator()
gan_model=define_gan(g_model,d_model)

In [None]:
print(pixels.shape)
train(g_model, d_model, gan_model, np.array(pixels), latent_dim)

In [None]:
from keras.models import load_model
from numpy.random import randn
from matplotlib import pyplot
def generate_latent_points(latent_dim, n_samples):
    x_input = randn(latent_dim * n_samples)
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input


In [None]:
generate_latent_points(latent_dim,1)

array([[-7.14050082e-02, -1.68398237e-01,  1.20231815e-01,
         9.43047683e-02,  3.03845029e-01,  4.36677780e-01,
        -7.80700415e-01,  6.77186683e-02, -9.02950164e-01,
        -3.70371660e-01, -4.24110558e-01, -1.40761392e-01,
         1.49186162e+00, -1.77134568e+00,  2.71806421e+00,
        -2.11264240e-01, -1.11798142e+00,  8.24654029e-02,
         9.19703274e-01,  1.61877319e-01, -4.87360376e-01,
         2.33143338e+00,  9.23945826e-01,  8.67059907e-04,
         5.03164676e-01,  1.91711874e-01, -6.52064101e-01,
        -7.21950305e-01,  6.30546484e-01,  1.10760363e-01,
         9.04432046e-01, -4.88427983e-01,  2.00250024e-01,
        -1.64464077e+00, -1.77662295e+00,  9.78849568e-02,
         2.42905368e-01, -6.53553185e-02, -3.46648706e-01,
        -6.05912475e-01,  5.99837322e-01, -4.34520849e-02,
         4.50876200e-01, -1.53103100e+00,  1.76772732e+00,
        -3.54805128e-01, -2.79324792e-01,  1.05374263e+00,
        -1.57864700e+00, -1.96838108e+00,  5.75657410e-0

In [None]:
def gen_random_imgs(path_name):
  model = g_model
  latent_points = generate_latent_points(latent_dim,1)
  X = g_model.predict(latent_points)
  array = np.array(X.reshape(106,106),dtype = np.uint8)
  array*= 255
  new_image = Image.fromarray(array,'L')
  new_image = new_image.save(path_name+'.png')

In [None]:
from music21 import midi
def playMidi(path):
  mf = midi.MidiFile()
  mf.open(path) # path='abc.midi'
  mf.read()
  mf.close()
  s = midi.translate.midiFileToStream(mf)
  s.show('midi')

In [None]:
for i in range(5):
  gen_random_imgs('Generated_Music_png_files/'+str(i))
  image2midi('Generated_Music_png_files/'+str(i)+'.png','Generated_Music_midi_files')

/content/drive/MyDrive/Deep_Learning_Project/Generated_Music_Images/0.mid
/content/drive/MyDrive/Deep_Learning_Project/Generated_Music_Images/1.mid
/content/drive/MyDrive/Deep_Learning_Project/Generated_Music_Images/2.mid
/content/drive/MyDrive/Deep_Learning_Project/Generated_Music_Images/3.mid
/content/drive/MyDrive/Deep_Learning_Project/Generated_Music_Images/4.mid


In [None]:
playMidi('Generated_Music_midi_files/1.mid')