In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
!pip install mido

import numpy as np
import mido
import os
import pickle
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.use('Agg')
import pandas as pd
import math
from matplotlib.colors import ColorConverter
from scipy.sparse import dok_matrix, csr_matrix, save_npz, load_npz

filename = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/note_values.p'
with open(filename, 'rb') as fp:
    note_values = pickle.load(fp)

dataset_df = pd.read_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset.p')



In [4]:
class MidiFile(mido.MidiFile):

    def __init__(self, filename):
        mido.MidiFile.__init__(self, filename)
        self.sr = 10
        self.meta = {}
        self.events = self.get_events()

    def get_events(self):
        mid = self
        events = [[] for x in range(16)]
        # Iterate all event in the midi and extract to 16 channel form
        for track in mid.tracks:
            for msg in track:
                try:
                    channel = msg.channel
                    events[channel].append(msg)
                except AttributeError:
                    try:
                        if type(msg) != type(mido.UnknownMetaMessage):
                            self.meta[msg.type] = msg.dict()
                        else:
                            pass
                    except:
                        print("error",type(msg))
        return events

    def get_roll(self,merge_channels=False):
        events = self.get_events()
        # Identify events, then translate to piano roll
        # choose a sample ratio(sr) to down-sample through time axis
        sr = self.sr
        # compute total length in tick unit
        length = self.get_total_ticks()
        # allocate memory to numpy array
        roll = np.zeros((16, 128, length // sr), dtype="int8")
        # use a register array to save the state(no/off) for each key
        note_register = [int(-1) for x in range(128)]
        # use a register array to save the state(program_change) for each channel
        timbre_register = [1 for x in range(16)]

        for idx, channel in enumerate(events):
            time_counter = 0
            volume = 100
            # Volume would change by control change event (cc) cc7 & cc11
            # Volume 0-100 is mapped to 0-127
            for msg in channel:
                if msg.type == "control_change":
                    if msg.control == 7:
                        volume = msg.value
                        # directly assign volume
                    if msg.control == 11:
                        volume = volume * msg.value // 127
                        # change volume by percentage                   
                if msg.type == "program_change":
                    timbre_register[idx] = msg.program

                if msg.type == "note_on":
                    note_on_start_time = time_counter // sr
                    note_on_end_time = (time_counter + msg.time) // sr
                    intensity = volume * msg.velocity // 127
					# When a note_on event *ends* the note start to be play 
					# Record end time of note_on event if there is no value in register
					# When note_off event happens, we fill in the color
                    if note_register[msg.note] == -1:
                        note_register[msg.note] = (note_on_end_time,intensity)
                    else:
					# When note_on event happens again, we also fill in the color
                        old_end_time = note_register[msg.note][0]
                        old_intensity = note_register[msg.note][1]
                        roll[idx, msg.note, old_end_time: note_on_end_time] = old_intensity
                        note_register[msg.note] = (note_on_end_time,intensity)

                if msg.type == "note_off":
                    note_off_start_time = time_counter // sr
                    note_off_end_time = (time_counter + msg.time) // sr
                    note_on_end_time = note_register[msg.note][0]
                    intensity = note_register[msg.note][1]
					# fill in color
                    roll[idx, msg.note, note_on_end_time:note_off_end_time] = intensity
                    note_register[msg.note] = -1  # reinitialize register
                time_counter += msg.time

            # if there is a note not closed at the end of a channel, close it
            for key, data in enumerate(note_register):
                if data != -1:
                    note_on_end_time = data[0]
                    intensity = data[1]
                    # print(key, note_on_end_time)
                    note_off_start_time = time_counter // sr
                    roll[idx, key, note_on_end_time:] = intensity
                note_register[idx] = -1
        if merge_channels==True:
            roll = np.sum(roll,axis=0)
        return roll
    
    def get_transposed_roll(self,interval:int):
        if np.abs(interval) < 20:
            roll = self.get_roll()
            t_roll = np.zeros_like(roll)
            if interval>=0:
                for k1 in range(roll.shape[0]):
                    for k2 in range(roll.shape[1]-interval):
                        for k3 in range(roll.shape[2]):
                            t_roll[k1,k2+interval,k3] = roll[k1,k2,k3]
            else:
                for k1 in range(roll.shape[0]):
                    for k2 in range(-interval,roll.shape[1]):
                        for k3 in range(roll.shape[2]):
                            t_roll[k1,k2+interval,k3] = roll[k1,k2,k3]

        else:
            print("Not a valid tranposing")
        return t_roll

    def get_cropped_roll(self):
        pass

    def get_range(self):
        roll = self.get_roll()
        idxs = np.where(roll>0)
        range = (np.min(idxs[1]),np.max(idxs[1]))
        return range

    def get_roll_image(self):
        roll = self.get_roll()
        plt.ioff()
        K = 16
        transparent = ColorConverter.to_rgba('black')
        colors = [mpl.colors.to_rgba(mpl.colors.hsv_to_rgb((i / K, 1, 1)), alpha=1) for i in range(K)]
        cmaps = [mpl.colors.LinearSegmentedColormap.from_list('my_cmap', [transparent, colors[i]], 128) for i in
                 range(K)]
        for i in range(K):
            cmaps[i]._init()  # create the _lut array, with rgba values
            # create your alpha array and fill the colormap with them.
            # here it is progressive, but you can create whathever you want
            alphas = np.linspace(0, 1, cmaps[i].N + 3)
            cmaps[i]._lut[:, -1] = alphas

        fig = plt.figure(figsize=(4, 3))
        a1 = fig.add_subplot(111)
        a1.axis("equal")
        a1.set_facecolor("black")
        array = []
        for i in range(K):
            try:
                img = a1.imshow(roll[i], interpolation='nearest', cmap=cmaps[i], aspect='auto')
                array.append(img.get_array())
            except IndexError:
                pass
        return array

    def draw_roll(self,fig,a1,filename:str,just_image=True,show=True,transposed=0):
        if transposed == 0:
            roll = self.get_roll()
        else:
            roll = self.get_transposed_roll(transposed)
        a1.axis("equal")
        #a1.set_facecolor("white")
        a1.set_facecolor('black')
        # change unit of time axis from tick to second
        tick = self.get_total_ticks()
        second = mido.tick2second(tick, self.ticks_per_beat, self.get_tempo())
        if second > 10:
            x_label_period_sec = second // 10
        else:
            x_label_period_sec = second / 10  # ms
        x_label_interval = mido.second2tick(x_label_period_sec, self.ticks_per_beat, self.get_tempo()) / self.sr
        if just_image==False:                
            a1.set_xticks([int(x * x_label_interval) for x in range(20)], [round(x * x_label_period_sec, 2) for x in range(20)])
            # change scale and label of y axis
            a1.set_yticks([y*16 for y in range(8)], [y*16 for y in range(8)])
        else:
            a1.set_xticks([])
            a1.set_yticks([])
        # build colors
        channel_nb = 16  # <--- MODIFICAR PARA QUE NO HAGA DISTINCIÓN ENTRE LOS CANALES
        transparent = ColorConverter.to_rgba('white')
        # colors = [mpl.colors.to_rgba(mpl.colors.hsv_to_rgb((i / channel_nb, 1, 1)), alpha=1) for i in range(channel_nb)]
        # cmaps = [mpl.colors.LinearSegmentedColormap.from_list('my_cmap', [transparent, colors[i]], 128) for i in
        #          range(channel_nb)]
        colors = [mpl.colors.to_rgba(mpl.colors.hsv_to_rgb((i / 1, 1, 1)), alpha=1) for i in range(channel_nb)]
        cmaps = [mpl.colors.LinearSegmentedColormap.from_list('my_cmap', [transparent, colors[i]], 128) for i in
                 range(channel_nb)]
        # build color maps
        for i in range(channel_nb):
            cmaps[i]._init()
            # create your alpha array and fill the colormap with them.
            alphas = np.linspace(0, 1, cmaps[i].N + 3)
            # create the _lut array, with rgba values
            cmaps[i]._lut[:, -1] = alphas

        for i in range(channel_nb):
            try:
                a1.imshow(roll[i], origin="lower", interpolation='nearest', cmap=cmaps[i], aspect='auto')
            except IndexError:
                pass
        fig.savefig(filename,dpi=200)
        a1.cla()
        
    def get_tempo(self):
        try:
            return self.meta["set_tempo"]["tempo"]
        except:
            return 500000

    def get_total_ticks(self):
        max_ticks = 0
        for channel in range(16):
            ticks = sum(msg.time for msg in self.events[channel])
            if ticks > max_ticks:
                max_ticks = ticks
        return max_ticks

In [5]:
def msj(k,width):
    if (k+1)%width > 0:
        print(k, end='Ϟ ')
    else:
        print(k,'Ϟ',end='\r')

def t_msj(k,t,width):
    if (k+1)%width > 0:
        print(str(k)+'/'+str(t), end=' ')
    else:
        print(k,end='\r')

In [7]:
available_df = dataset_df[dataset_df['Image']==True].copy()
N = available_df.shape[0]
max_t = 340522
max_note = 108
min_note = 21 

dim_x = (max_note-min_note+1)*max_t
X_master_2 = dok_matrix((N, dim_x), dtype=np.int8)

In [8]:
reverse_idxs = available_df.index.to_list()
reverse_idxs.reverse()
print(reverse_idxs[:50])

[2084, 2082, 2081, 2080, 2079, 2078, 2076, 2019, 2006, 2004, 2003, 2002, 2001, 1999, 1998, 1994, 1993, 1992, 1991, 1989, 1987, 1956, 1955, 1954, 1953, 1952, 1951, 1950, 1949, 1948, 1947, 1946, 1945, 1944, 1943, 1942, 1941, 1940, 1939, 1938, 1937, 1936, 1935, 1934, 1933, 1932, 1931, 1898, 1897, 1896]


In [10]:
reverse_idxs[-20:]

[19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

Este

In [20]:
path = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/'
idxs_to_do = reverse_idxs[50:200]
for j in range(50):
    full_midi_name = available_df.loc[idxs_to_do[j],'Midi_Path']
    try:
        mid = MidiFile(full_midi_name)
        roll = mid.get_roll(merge_channels=True)
        roll = roll[21:109,:]
        row = roll.flatten()
        row.resize((dim_x,))
        non_zero_idxs = np.where(row>0)
        for k in non_zero_idxs:
            X_master_2[-(j+1),k] = row[k]
        t_msj(j+1,N,10)
    except:
        print(available_df.loc[idxs_to_do[j],'root_Filename'])
X = X_master_2.tocsc()
fname = path + 'X_master_bottom.npz'
save_npz(fname,X)

50/1893 

In [19]:
print(available_df.loc[2006,'Midi_Path'])
available_df[available_df['root_Filename'] == 'prelude8']

/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs/Bach/wtcbki/prelude8.mid


Unnamed: 0,img_Filename,Composer,Period,Image,Broken,root_Filename,canonical_title,Img_Path,Midi_Path
2006,prelude8.png,wtcbkii,Baroque,True,False,prelude8,,/content/drive/My Drive/Diplomado Python/Noteb...,/content/drive/My Drive/Diplomado Python/Noteb...


In [16]:
A = dok_matrix((10,10), dtype=np.int8)
fila = np.array([0,0,1,0,5,0])
idxs = np.where(fila>0)
for j in idxs:
    A[0,j] = fila[j]
A.todense() 

matrix([[0, 0, 1, 0, 5, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=int8)

# Análisis Estadístico 

## Duraciones

El roll es un arreglo de 

16 (channels) x 128 (notas) x 177712 (duración... variable)

In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'

duraciones = []
bad_files = []
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        filename = os.path.join(subdir, file)
        try:
            mid = MidiFile(filename)
            roll = mid.get_roll()
            duraciones.append(roll.shape[2])
        except:
            bad_files.append(filename)
print("El promedio de duración es:",np.mean(duraciones))
print("Mínimo:",np.min(duraciones))
print("Máximo:",np.max(duraciones))

El promedio de duración es: 39449.06209637357
Mínimo: 566
Máximo: 340522


In [None]:
print("Hay",len(bad_files),"midis raros")

In [None]:
print(bad_files)

In [None]:
plt.figure(dpi=120)
plt.hist(duraciones,color='gray',edgecolor='black')
plt.savefig('Histograma duraciones.png',dpi=120)

## Rango de notas

In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'

maxs_mins = []
notes_played = np.array([])
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        filename = os.path.join(subdir, file)
        try:
            mid_bar = MidiFile(filename)
            roll = mid_bar.get_roll()
            idxs = np.where(roll>0)
            maxs_mins.append((np.min(idxs[1]),np.max(idxs[1])))
            notes_played = np.concatenate([notes_played,idxs[1]])
        except:
            break

mean_low = np.mean([x[0] for x in maxs_mins])
mean_high = np.mean([x[1] for x in maxs_mins])
print("El promedio de nota más baja es:",mean_low," --->", note_values[math.floor(mean_low)])
print("El promedio de máximos es:",mean_high," --->", note_values[math.floor(mean_high)])
lowest = np.min([x[0] for x in maxs_mins])
highest = np.max([x[1] for x in maxs_mins])
print("Mínimo:",lowest," --->", note_values[math.floor(lowest)])
print("Máximo:",highest," --->", note_values[math.floor(highest)])

El promedio de nota más baja es: 29.491127647395537  ---> fa1
El promedio de máximos es: 93.72524327418432  ---> la6
Mínimo: 21  ---> la0
Máximo: 108  ---> do8


## Range of values

In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'

max_min_values = []
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        filename = os.path.join(subdir, file)
        try:
            mid_bar = MidiFile(filename)
            roll = mid_bar.get_roll()
            max_v = np.max(roll)
            min_v = np.min(roll) 
            max_min_values.append((max_v,min_v))
        except:
            break
lowest = np.min([x[0] for x in max_min_values])
highest = np.max([x[1] for x in max_min_values])
print("Mínimo valor en el roll:",lowest," --->", lowest)
print("Máximo valor en el roll:",highest," --->", highest)

Mínimo valor en el roll: 42  ---> 42
Máximo valor en el roll: 0  ---> 0


In [None]:
plt.figure(dpi=120)
plt.hist(notes_played,color='gray',edgecolor='black')
ticks_to_use = [21,40,60,80,108]
labels_to_use = [note_values[x] for x in ticks_to_use]
plt.xticks(ticks=ticks_to_use, labels=labels_to_use)
plt.savefig('Histograma notas tocadas 2.png',dpi=120)

In [None]:
available_df.to_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset-images.p')