In [2]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [3]:
!pip install mido

Collecting mido
[?25l  Downloading https://files.pythonhosted.org/packages/20/0a/81beb587b1ae832ea6a1901dc7c6faa380e8dd154e0a862f0a9f3d2afab9/mido-1.2.9-py2.py3-none-any.whl (52kB)
[K     |██████▎                         | 10kB 19.4MB/s eta 0:00:01[K     |████████████▌                   | 20kB 1.7MB/s eta 0:00:01[K     |██████████████████▊             | 30kB 2.3MB/s eta 0:00:01[K     |█████████████████████████       | 40kB 2.5MB/s eta 0:00:01[K     |███████████████████████████████▏| 51kB 2.0MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 1.9MB/s 
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.9


In [4]:
import numpy as np
import pandas as pd
import mido
import os
import pickle
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.use('Agg')

# Mi clase personalizada

MI CLASE

In [5]:
from matplotlib.colors import ColorConverter

class MidiFile(mido.MidiFile):

    def __init__(self, filename):
        mido.MidiFile.__init__(self, filename)
        self.sr = 10
        self.meta = {}
        self.events = self.get_events()

    def get_events(self):
        mid = self
        events = [[] for x in range(16)]
        # Iterate all event in the midi and extract to 16 channel form
        for track in mid.tracks:
            for msg in track:
                try:
                    channel = msg.channel
                    events[channel].append(msg)
                except AttributeError:
                    try:
                        if type(msg) != type(mido.UnknownMetaMessage):
                            self.meta[msg.type] = msg.dict()
                        else:
                            pass
                    except:
                        print("error",type(msg))
        return events

    def get_roll(self,merge_channels=False):
        events = self.get_events()
        # Identify events, then translate to piano roll
        # choose a sample ratio(sr) to down-sample through time axis
        sr = self.sr
        # compute total length in tick unit
        length = self.get_total_ticks()
        # allocate memory to numpy array
        roll = np.zeros((16, 128, length // sr), dtype="int8")
        # use a register array to save the state(no/off) for each key
        note_register = [int(-1) for x in range(128)]
        # use a register array to save the state(program_change) for each channel
        timbre_register = [1 for x in range(16)]

        for idx, channel in enumerate(events):
            time_counter = 0
            volume = 100
            # Volume would change by control change event (cc) cc7 & cc11
            # Volume 0-100 is mapped to 0-127
            for msg in channel:
                if msg.type == "control_change":
                    if msg.control == 7:
                        volume = msg.value
                        # directly assign volume
                    if msg.control == 11:
                        volume = volume * msg.value // 127
                        # change volume by percentage                   
                if msg.type == "program_change":
                    timbre_register[idx] = msg.program

                if msg.type == "note_on":
                    note_on_start_time = time_counter // sr
                    note_on_end_time = (time_counter + msg.time) // sr
                    intensity = volume * msg.velocity // 127
					# When a note_on event *ends* the note start to be play 
					# Record end time of note_on event if there is no value in register
					# When note_off event happens, we fill in the color
                    if note_register[msg.note] == -1:
                        note_register[msg.note] = (note_on_end_time,intensity)
                    else:
					# When note_on event happens again, we also fill in the color
                        old_end_time = note_register[msg.note][0]
                        old_intensity = note_register[msg.note][1]
                        roll[idx, msg.note, old_end_time: note_on_end_time] = old_intensity
                        note_register[msg.note] = (note_on_end_time,intensity)

                if msg.type == "note_off":
                    note_off_start_time = time_counter // sr
                    note_off_end_time = (time_counter + msg.time) // sr
                    note_on_end_time = note_register[msg.note][0]
                    intensity = note_register[msg.note][1]
					# fill in color
                    roll[idx, msg.note, note_on_end_time:note_off_end_time] = intensity
                    note_register[msg.note] = -1  # reinitialize register
                time_counter += msg.time

            # if there is a note not closed at the end of a channel, close it
            for key, data in enumerate(note_register):
                if data != -1:
                    note_on_end_time = data[0]
                    intensity = data[1]
                    # print(key, note_on_end_time)
                    note_off_start_time = time_counter // sr
                    roll[idx, key, note_on_end_time:] = intensity
                note_register[idx] = -1
        if merge_channels==True:
            roll = np.sum(roll,axis=0)
        return roll
    
    def get_transposed_roll(self,interval:int):
        if np.abs(interval) < 20:
            roll = self.get_roll()
            t_roll = np.zeros_like(roll)
            if interval>=0:
                for k1 in range(roll.shape[0]):
                    for k2 in range(roll.shape[1]-interval):
                        for k3 in range(roll.shape[2]):
                            t_roll[k1,k2+interval,k3] = roll[k1,k2,k3]
            else:
                for k1 in range(roll.shape[0]):
                    for k2 in range(-interval,roll.shape[1]):
                        for k3 in range(roll.shape[2]):
                            t_roll[k1,k2+interval,k3] = roll[k1,k2,k3]

        else:
            print("Not a valid tranposing")
        return t_roll

    def get_cropped_roll(self):
        pass

    def get_range(self):
        roll = self.get_roll()
        idxs = np.where(roll>0)
        range = (np.min(idxs[1]),np.max(idxs[1]))
        return range

    def get_roll_image(self):
        roll = self.get_roll()
        plt.ioff()
        K = 16
        transparent = ColorConverter.to_rgba('black')
        colors = [mpl.colors.to_rgba(mpl.colors.hsv_to_rgb((i / K, 1, 1)), alpha=1) for i in range(K)]
        cmaps = [mpl.colors.LinearSegmentedColormap.from_list('my_cmap', [transparent, colors[i]], 128) for i in
                 range(K)]
        for i in range(K):
            cmaps[i]._init()  # create the _lut array, with rgba values
            # create your alpha array and fill the colormap with them.
            # here it is progressive, but you can create whathever you want
            alphas = np.linspace(0, 1, cmaps[i].N + 3)
            cmaps[i]._lut[:, -1] = alphas

        fig = plt.figure(figsize=(4, 3))
        a1 = fig.add_subplot(111)
        a1.axis("equal")
        a1.set_facecolor("black")
        array = []
        for i in range(K):
            try:
                img = a1.imshow(roll[i], interpolation='nearest', cmap=cmaps[i], aspect='auto')
                array.append(img.get_array())
            except IndexError:
                pass
        return array

    def draw_roll(self,fig,a1,filename:str,just_image=True,show=True,transposed=0):
        if transposed == 0:
            roll = self.get_roll()
        else:
            roll = self.get_transposed_roll(transposed)
        a1.axis("equal")
        #a1.set_facecolor("white")
        a1.set_facecolor('black')
        # change unit of time axis from tick to second
        tick = self.get_total_ticks()
        second = mido.tick2second(tick, self.ticks_per_beat, self.get_tempo())
        if second > 10:
            x_label_period_sec = second // 10
        else:
            x_label_period_sec = second / 10  # ms
        x_label_interval = mido.second2tick(x_label_period_sec, self.ticks_per_beat, self.get_tempo()) / self.sr
        if just_image==False:                
            a1.set_xticks([int(x * x_label_interval) for x in range(20)], [round(x * x_label_period_sec, 2) for x in range(20)])
            # change scale and label of y axis
            a1.set_yticks([y*16 for y in range(8)], [y*16 for y in range(8)])
        else:
            a1.set_xticks([])
            a1.set_yticks([])
        # build colors
        channel_nb = 16  # <--- MODIFICAR PARA QUE NO HAGA DISTINCIÓN ENTRE LOS CANALES
        transparent = ColorConverter.to_rgba('white')
        # colors = [mpl.colors.to_rgba(mpl.colors.hsv_to_rgb((i / channel_nb, 1, 1)), alpha=1) for i in range(channel_nb)]
        # cmaps = [mpl.colors.LinearSegmentedColormap.from_list('my_cmap', [transparent, colors[i]], 128) for i in
        #          range(channel_nb)]
        colors = [mpl.colors.to_rgba(mpl.colors.hsv_to_rgb((i / 1, 1, 1)), alpha=1) for i in range(channel_nb)]
        cmaps = [mpl.colors.LinearSegmentedColormap.from_list('my_cmap', [transparent, colors[i]], 128) for i in
                 range(channel_nb)]
        # build color maps
        for i in range(channel_nb):
            cmaps[i]._init()
            # create your alpha array and fill the colormap with them.
            alphas = np.linspace(0, 1, cmaps[i].N + 3)
            # create the _lut array, with rgba values
            cmaps[i]._lut[:, -1] = alphas

        for i in range(channel_nb):
            try:
                a1.imshow(roll[i], origin="lower", interpolation='nearest', cmap=cmaps[i], aspect='auto')
            except IndexError:
                pass
        fig.savefig(filename,dpi=200)
        a1.cla()
        
    def get_tempo(self):
        try:
            return self.meta["set_tempo"]["tempo"]
        except:
            return 500000

    def get_total_ticks(self):
        max_ticks = 0
        for channel in range(16):
            ticks = sum(msg.time for msg in self.events[channel])
            if ticks > max_ticks:
                max_ticks = ticks
        return max_ticks

In [6]:
filename = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/note_values.p'
with open(filename, 'rb') as fp:
    note_values = pickle.load(fp)

filename = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/cc.p'
with open(filename,'rb') as fp:
    cc = pickle.load(fp) 

dataset_df = pd.read_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset.p')
available_df = pd.read_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset-available-Tr.p')
composers_df = pd.read_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/composers.p')

In [7]:
def msj(k,width):
    if (k+1)%width > 0:
        print(k, end='Ϟ ')
    else:
        print(k,'Ϟ',end='\r')

def t_msj(k,t,width):
    if (k+1)%width > 0:
        print(str(k)+'/'+str(t), end=' ')
    else:
        print(k,end='\r')

def DitchExtension(name):
    ks = [i for i, x in enumerate(name) if x == '.']
    if len(ks)>0:
        return name[:ks[-1]]
    if len(ks)==0:
        print("No tenía extensión")
        return name

def DitchPath(name):
    ks = [i for i, x in enumerate(name) if x == '/']
    if len(ks)>0:
        return name[ks[-1]+1:]
    if len(ks)==0:
        print("No tenía diagonales")
        return name

def Get_Root_Name_from_transposed(name):
    ks = [i for i, x in enumerate(name) if x == 'T']
    if len(ks)>0:
        return name[:ks[-1]-1]
    if len(ks)==0:
        print("No aparece la 'T'")
        return name

# Crear imágenes

Un ejemplo concreto

In [None]:
mid = MidiFile('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs/Bach/988-aria.mid')
# rolling = mid.get_roll()
# print(mid.get_range())
# rolling2d = mid.get_roll(merge_channels=True)
# print(rolling2d.shape)
fig = plt.figure(figsize=(7, 5),frameon=False)
ax = fig.add_subplot(111)
mid.draw_roll(fig,ax,'/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/rolling-transpuesto-quinta-down.png',transposed=-5)
mid.draw_roll(fig,ax,'/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/rolling.png')
mid.draw_roll(fig,ax,'/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/rolling-transpuesto-quinta-up.png',transposed=5)


# q = np.zeros_like(rolling)
# print(rolling[:3,:3,:3])
# error_count = 0
# for k1 in range(rolling.shape[0]):
#     for k2 in range(rolling.shape[1]-1):
#         for k3 in range(rolling.shape[2]):
#             q[k1,k2,k3] = rolling[k1,k2+1,k3]
# print(q[:3,:3,:3])

In [None]:
def GenerarImagenes_1stTime(rootdir,img_path,files_df,df_filename):
    k = 0
    fig = plt.figure(figsize=(7, 5),frameon=False)
    ax = fig.add_subplot(111)
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            root_fname = DitchExtension(file)
            idx = files_df[files_df['root_Filename'] == root_fname].index.tolist()
            if len(idx)==0:
                print("El archivo",file,"no está en el dataframe")
                break
            else:
                idx = idx[0]    
            try:
                filename = os.path.join(subdir, file)
                mid_bar = MidiFile(filename)
                img_filename = img_path + file[:-3] + 'png'
                mid_bar.draw_roll(fig,ax,img_filename,show=False)
                files_df.at[idx,'Image'] = True
            except:
                print("El archivo",file,"no se puede leer")
                files_df.at[idx,'Broken'] = True
            files_df.to_pickle(df_filename)
            msj(k,20)
            k += 1

df_filename = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset-backup-2.p'
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'
img_path = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/Images-full/'
GenerarImagenes_1stTime(rootdir,img_path,dataset_df,df_filename)

In [None]:
dataset_df['Image-2'] = False
dataset_df.sample(3)

Unnamed: 0,img_Filename,Path,Composer,Period,Image,Broken,root_Filename,canonical_title,Image-2
1217,ORIG-MIDI_02_7_7_13_Group__MID--AUDIO_19_R1_20...,/content/drive/My Drive/Diplomado Python/Noteb...,Haydn,Classicism,True,False,ORIG-MIDI_02_7_7_13_Group__MID--AUDIO_19_R1_20...,"Sonata in C Minor, Hob. XVI:20, 1st mov.",False
1154,MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3...,/content/drive/My Drive/Diplomado Python/Noteb...,Bach,Baroque,True,False,MIDI-Unprocessed_R1_D1-1-8_mid--AUDIO-from_mp3...,"Prelude and Fugue in G-sharp Minor, WTC II, BW...",False
750,MIDI-Unprocessed_16_R2_2009_01_ORIG_MID--AUDIO...,/content/drive/My Drive/Diplomado Python/Noteb...,Schubert,Romanticism,True,False,MIDI-Unprocessed_16_R2_2009_01_ORIG_MID--AUDIO...,"Sonata in C Major, D843",False


In [None]:
def GenerarImagenes_continuar(rootdir,img_path,files_df):
    k = 0
    fig = plt.figure(figsize=(7, 5),frameon=False)
    ax = fig.add_subplot(111)
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            root_fname = DitchExtension(file)
            idx = files_df[files_df['root_Filename'] == root_fname].index.tolist()
            if len(idx)==0:
                print("El archivo",file,"no está en el dataframe")
                break
            else:
                idx = idx[0]
                if files_df.loc[idx,'Image'] == False:   
                    try:
                        filename = os.path.join(subdir, file)
                        mid_bar = MidiFile(filename)
                        img_filename = img_path + root_fname + '.png'
                        mid_bar.draw_roll(fig,ax,img_filename,show=False)
                    except:
                        print("El archivo",file,"no se puede leer")
                    msj(k,20)
            k += 1

rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'
img_path = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/Images-full-no-canales/'
GenerarImagenes_continuar(rootdir,img_path,dataset_df)

In [None]:
dataset_df = dataset_df.drop_duplicates(subset='root_Filename',keep='first')

## Generar imágenes con transposiciones

In [None]:
available_df.reset_index(drop=True,inplace=True)
available_df

In [22]:
print("Van",available_df.loc[available_df['Trs'] == True].shape[0],"de",available_df.shape[0])
print("Faltan",available_df.loc[available_df['Trs'] != True].shape[0],"de",available_df.shape[0])
total_idxs_to_go = available_df.loc[available_df['Trs'] != True].index.to_list()
# [False for k in available_df.index.to_list()]
# available_df.head(2)
# available_df.to_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset-available-Tr.p')

Van 1884 de 1893
Faltan 9 de 1893


In [13]:
36+36

72

In [None]:
print(total_idxs_to_go[:5],total_idxs_to_go[-5:])

[1030, 1031, 1032, 1033, 1034] [1112, 1113, 1878, 1881, 1884]


In [23]:
index_to_do = total_idxs_to_go
index_to_do[:5]

[1075, 1076, 1077, 1078, 1079]

In [24]:
total_count = 0
total_to_reach = len(index_to_do)

In [25]:
img_path = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/Images-transposed/'

much_intervals = [k for k in range(-7,8)]
much_intervals.remove(0)
much_intervals.remove(-1)
much_intervals.remove(1)
total = len(index_to_do)
fig = plt.figure(figsize=(7, 5),frameon=False)
ax = fig.add_subplot(111)
count = 0
how_many = 15
print("Van",total_count,"de",total)
for k in index_to_do:
    if count<how_many:
        done = available_df.loc[k,'Trs']
        if done != True:
            midi_path = available_df.loc[k,'Midi_Path']
            root_fname = available_df.loc[k,'root_Filename']
            try:
                mid_bar = MidiFile(midi_path)
                intervalos = np.random.choice(much_intervals,size=4,replace=False)
                for j in intervalos:
                    img_filename = img_path + root_fname + ' T ' + str(j) + '.png'
                    mid_bar.draw_roll(fig,ax,img_filename,show=False,transposed=j)
                t_msj(total_count,total_to_reach,7)
                available_df.loc[k,'Trs'] = True
                count += 1
                total_count += 1
            except:
                print("skit! --->",root_fname)  
    else:
        break      
available_df.to_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset-available-Tr.p')
print("Fertig")

Van 0 de 9
0/9 1/9 2/9 3/9 4/9 5/9 skit! ---> prelude2
skit! ---> prelude7
skit! ---> prelude8
Fertig


## Comparar las columnas check con las carpetas

In [10]:
print("Imagenes:",available_df[available_df['Image']==True].shape[0],"de",available_df.shape[0])
print("Imágenes transpuestas:",available_df[available_df['Trs']==True].shape[0],"de",available_df[available_df['Image']==True].shape[0])

Imagenes: 1893 de 1893
Imágenes transpuestas: 1890 de 1893


Este código revisa la carpeta de imágenes transpuestas y le pone check en la columna `Trs` del dataframe a los que ya generamos

In [8]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/Images-transposed'
k = 0
modfs = 0
oopsies = []
idxs = []
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        name = Get_Root_Name_from_transposed(file)
        idx = available_df[available_df['root_Filename'] == name].index.to_list()
        if len(idx)==0 or len(idx)>1:
            print("ooops --->",file)
            root_fname = DitchExtension(file)
            oopsies.append(root_fname)
        else:
            j = idx[0]
            available_df.at[j,'Trs'] = True
            modfs += 1
            idxs.append(j)
        k += 1
print(k,"archivos encontrados")
print(modfs,"modificaciones")
print("Hay",available_df[available_df['Trs']==True].shape[0],"checked en el dataframe.")    
print("Los",len(oopsies),"archivos raros\n",oopsies)     

7690 archivos encontrados
7690 modificaciones
Hay 1890 checked en el dataframe.
Los 0 archivos raros
 []


In [9]:
available_df.to_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset-available-Tr.p')

Este código revisa la carpeta de imágenes y le pone check en la columna `Image` del dataframe a los que ya generamos

In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/Images-full-no-canales'
k = 0
modfs = 0
oopsies = []
idxs = []
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        idx = dataset_df[dataset_df['img_Filename'] == file].index.to_list() 
        if len(idx)==0 or len(idx)>1:
            print("ooops --->",file)
            root_fname = DitchExtension(file)
            oopsies.append(root_fname)
        else:
            j = idx[0]
            dataset_df.at[j,'Image'] = True
            modfs += 1
            idxs.append(j)
        k += 1
print(k,"archivos encontrados")
print(modfs,"modificaciones")
print("Hay",dataset_df[dataset_df['Image']==True].shape[0],"checked en el dataframe.")  
print("Los",len(oopsies),"archivos raros\n",oopsies)              

1893 archivos encontrados
1893 modificaciones
Hay 1893 checked en el dataframe.
Los 0 archivos raros
 []


In [None]:
for fname in oopsies:
    [idx] = dataset_df[dataset_df['root_Filename'] == fname].index.to_list()
    dataset_df.loc[idx,'img_Filename'] = fname + '.png' 

In [None]:
idxs = dataset_df.index.to_list()
print("Hay",len(idxs),"índices")
print("Hay",len(list(set(idxs))),"indices no repetidos")

Hay 1612 índices
Hay 1612 indices no repetidos


In [None]:
dataset_df.to_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset.p')

Este código revisa los midis que tengo y ve si ya están en el dataframe

In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'
num_faltantes = 0
faltantes = []
k = 0
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        root_fname = DitchExtenstion(file)
        idx = df[df['root_Filename'] == root_fname].index.to_list() 
        if len(idx)==0:
            faltantes.append(file)
            num_faltantes += 1
        if len(idx)>1:
            print("ooopS --->",file)
        k += 1
print(num_faltantes,"midis faltantes en el dataframe")
print(k,"archivos midi leidos")

0 midis faltantes en el dataframe
1559 archivos midi leidos


In [None]:
# dataset2_df = pd.read_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset2-big.p')
num_found = 0
num_missing = 0
indices = []
for name in faltantes:
    root_fname = DitchExtension(name)
    idxs = orig_df[orig_df['root_Filename'] == root_fname].index.to_list()
    if len(idxs)==0:
        num_missing += 1
    elif len(idxs)==1:
        num_found += 1
        indices.append(idxs[0])
    elif len(idxs)>1:
        print("ooops--->",name)
print("Found:",num_found)
print("Still missing:",num_missing)

Found: 54
Still missing: 0


Código para borrar archivos

In [None]:
# root = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'
# rutas_para_eliminar = orig_df.loc[indices,'midi_filename'].values
# rutas_para_eliminar

# for x in rutas_para_eliminar:
#     path = os.path.join(root,x)
#     os.remove(path)

Código para actualizar la columna `Img_Path`

In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/Images-full-no-canales'

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        root_fname = DitchExtension(file)
        idx = dataset_df[dataset_df['root_Filename'] == root_fname].index.to_list() 
        if len(idx)==0:
            print("No encontrado en el dataframe:",file)
            break
        elif len(idx)>1:
            print("Duplicado en el dataframe:",file)
            break
        else:
            idx = idx[0]
            img_path = os.path.join(subdir, file)
            dataset_df.at[idx,'Img_Path'] = img_path
            # midi_path = os.path.join(subdir, file)
            # dataset_df.at[idx,'Midi_Path'] = path
dataset_df.head(2)

Unnamed: 0,img_Filename,Composer,Period,Image,Broken,root_Filename,canonical_title,Img_Path,Midi_Path
0,chpn_op25_e2.png,Chopin,Romanticism,True,False,chpn_op25_e2,,/content/drive/My Drive/Diplomado Python/Noteb...,/content/drive/My Drive/Diplomado Python/Noteb...
1,chpn_op25_e4.png,Chopin,Romanticism,True,False,chpn_op25_e4,,/content/drive/My Drive/Diplomado Python/Noteb...,/content/drive/My Drive/Diplomado Python/Noteb...


Código para actualizar la columna `Midi_Path`

In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs'

for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        root_fname = DitchExtension(file)
        idx = dataset_df[dataset_df['root_Filename'] == root_fname].index.to_list() 
        if len(idx)==0:
            print("No encontrado en el dataframe:",file)
            break
        elif len(idx)>1:
            print("Duplicado en el dataframe:",file)
            break
        else:
            idx = idx[0]
            img_path = os.path.join(subdir, file)
            dataset_df.at[idx,'Midi_Path'] = path
dataset_df.head(2)

# Agregar nuevos midis al dataframe

In [None]:
colz = dataset_df.columns.to_list()
update_dict = {col:[] for col in colz}

rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs [Nuevos]'
k = 0
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        root_fname = DitchExtension(file)
        img_fname = root_fname + '.png'
        update_dict['img_Filename'].append(img_fname)
        full_path = os.path.join(subdir, file)
        update_dict['Midi_Path'].append('')
        update_dict['Img_Path'].append('')
        comp = DitchPath(subdir)
        update_dict['Composer'].append(comp)
        update_dict['Period'].append('Baroque')  # <---- SOLO CHECAR ESTE
        update_dict['Image'].append(False)
        update_dict['Broken'].append(False)
        update_dict['root_Filename'].append(root_fname)
        update_dict['canonical_title'].append('')
        k += 1
print(k,"archivos procesados")
print(update_dict)
update_df = pd.DataFrame(update_dict,columns=colz)
update_df

228 archivos procesados
{'img_Filename': ['brand3.png', 'brand1.png', 'brand2.png', 'brand41.png', 'brand42.png', 'brand43.png', 'brand51.png', 'brand53.png', 'brand52.png', 'fuguecm.png', 'fuguegm.png', '08freuet.png', '03achgot.png', '02ichdan.png', '04esistd.png', '05anwass.png', '06christ.png', '07nunlob.png', '01ausmei.png', '09ermunt.png', '10austie.png', '11jesu.png', '13allein.png', '12puerna.png', '14oherre.png', '15christ.png', 'aria.png', 'goldberg.png', 'var1.png', 'var10.png', 'var12c4.png', 'var11.png', 'var14.png', 'var13.png', 'var15c5.png', 'var17.png', 'var18c6.png', 'var16.png', 'var19.png', 'var2.png', 'var20.png', 'var21c7.png', 'var22.png', 'var23.png', 'var25.png', 'var24c8.png', 'var26.png', 'var27c9.png', 'var28.png', 'var29.png', 'var4.png', 'var30.png', 'var3c1.png', 'var6c2.png', 'var5.png', 'var7.png', 'var9c3.png', 'var8.png', 'bsgjg_a.png', 'bsgjg_b.png', 'bsgjg_g.png', 'bsgjg_h.png', 'bsgjg_i.png', 'bsgjg_j.png', 'bsgjg_k.png', 'bsgjg_l.png', 'bsgjg_c.pn

Unnamed: 0,img_Filename,Composer,Period,Image,Broken,root_Filename,canonical_title,Img_Path,Midi_Path
0,brand3.png,brandenb,Baroque,False,False,brand3,,,
1,brand1.png,brandenb,Baroque,False,False,brand1,,,
2,brand2.png,brandenb,Baroque,False,False,brand2,,,
3,brand41.png,brandenb,Baroque,False,False,brand41,,,
4,brand42.png,brandenb,Baroque,False,False,brand42,,,
...,...,...,...,...,...,...,...,...,...
223,prelude3.png,wtcbki,Baroque,False,False,prelude3,,,
224,prelude2.png,wtcbki,Baroque,False,False,prelude2,,,
225,prelude8.png,wtcbki,Baroque,False,False,prelude8,,,
226,prelude9.png,wtcbki,Baroque,False,False,prelude9,,,


In [None]:
dataset_df = pd.concat([dataset_df,update_df],ignore_index=True)
dataset_df

# Conteos

In [None]:
composers_dict = {composers_df.loc[k,'last_name']:composers_df.loc[k,'period'] 
                  for k in composers_df.index.to_list()}
composers_dict

In [None]:
arr = dataset_df['Period'].values

periodos = np.unique(arr)
data = {p:0 for p in periodos if p not in ['Modern','Nationalism'] }

for p in data.keys():
    data[p] += dataset_df[dataset_df['Period']==p].shape[0]
print(data)
plt.figure(dpi=150)
#plt.suptitle('Histogram')
plt.bar(list(data.keys()),list(data.values()), color='gray',edgecolor='black')
plt.xticks(fontsize=16)
plt.yticks(fontsize=14)
plt.savefig('histogram_periods.png',dpi=150)
plt.show()

{'Baroque': 574, 'Classicism': 334, 'Romanticism': 1040}


In [None]:
rootdir = '/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/MIDIs [Nuevos]'
k = 0
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        k += 1
print("Hay",k,"nuevos archivos en MIDI [Nuevos].")

Hay 228 nuevos archivos en MIDI [Nuevos].


In [None]:
dataset_df.to_pickle('/content/drive/My Drive/Diplomado Python/Notebooks & Resources/Proyecto Final/dataset.p')