# Tokenize and save data

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.text import *

In [3]:
import pandas as pd
from fastai_data import *

## Preparing the data

In [4]:
version = 'v7'
data_path = Path('data/midi')
version_path = data_path/version

In [5]:
source_dir = 'midi_encode/np/dur'
out_path = version_path/source_dir
csv_path = out_path/f'midi_encode.csv'
version_path.ls()

[PosixPath('data/midi/v7/metadata'),
 PosixPath('data/midi/v7/midi_sources'),
 PosixPath('data/midi/v7/midi_encode'),
 PosixPath('data/midi/v7/midi_npz'),
 PosixPath('data/midi/v7/midi_transform')]

In [6]:
csv = pd.read_csv(csv_path)
csv = csv.loc[csv[source_dir].notna()];

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
# def create_databunch(files, cache_name, vocab=None, batch_size=32, load_cached=False):
#     if load_cached and (out_path/f'{cache_name}/itos.pkl').exists():
#         data = TextLMDataBunch.load(out_path, bs=batch_size, cache_name=cache_name)
#         data.valid_ds.x.processor[0] = TokenizeProcessor(tokenizer=MusicTokenizer())
#     else:
#         ps = [LMOpenFileProcessor(), 
#              LMTokenizeProcessor(tokenizer=MusicTokenizer(), chunksize=num_cpus()*20),
#              LMNumericalizeProcessor(vocab=vocab, max_vocab=500)]

#         data = (TextList(items=files, path=out_path, processor=ps)
#                 .random_split_by_pct(0.01, seed=6)
#                 .label_for_lm()
#                 .databunch(bs=batch_size))
#         data.save(cache_name)
#     vocab = data.train_ds.vocab
#     return data, vocab

In [35]:
def get_files(csv):
    files = csv[source_dir]
    flist = [Path(version_path/f) for f in files.values]
    flist = [f for f in flist if f.exists()]
    return flist

In [36]:
all_files = get_files(csv)

In [37]:
subset = all_files[:10]

In [38]:
class OpenNPFileProcessor(PreProcessor):
    "`PreProcessor` that opens the filenames and read the texts."
    def process_one(self,item):
        return np.load(item) if isinstance(item, Path) else item

In [39]:
data = (ItemList(items=subset, path=out_path, processor=[OpenNPFileProcessor(), ])
        .random_split_by_pct(0.01, seed=6))

In [40]:

class LMPreloader(Callback):
    "Transforms the tokens in `dataset` to a stream of contiguous batches for language modelling."
    
    class CircularIndex():
        "Handles shuffle, direction of indexing, wraps around to head tail in the ragged array as needed"
        def __init__(self, length:int, forward:bool): self.idx, self.forward = np.arange(length), forward
        def __getitem__(self, i): 
            return self.idx[ i%len(self.idx) if self.forward else len(self.idx)-1-i%len(self.idx)]
        def __len__(self) -> int: return len(self.idx)
        def shuffle(self): np.random.shuffle(self.idx)

    def __init__(self, dataset:LabelList, lengths:Collection[int]=None, bs:int=32, bptt:int=70, backwards:bool=False, 
                 shuffle:bool=False):
        self.dataset,self.bs,self.bptt,self.shuffle,self.backwards,self.lengths = dataset,bs,bptt,shuffle,backwards,lengths
        self.totalToks,self.ite_len,self.idx = int(0),None,None

    def __len__(self): 
        if self.ite_len is None:
            if self.lengths is None: self.lengths = np.array([len(item) for item in self.dataset.x.items])
            self.totalToks = self.lengths.sum()
            self.ite_len   = self.bs*int( math.ceil( self.totalToks/(self.bptt*self.bs) )) if self.item is None else 1
        return self.ite_len

    def __getattr__(self,k:str)->Any: return getattr(self.dataset, k)
   
    def allocate_buffers(self):
        "Create the ragged array that will be filled when we ask for items."
        if self.ite_len is None: len(self)
        self.idx   = LMPreloader.CircularIndex(len(self.dataset.x.items), not self.backwards)
        self.batch = np.zeros((self.bs, self.bptt+1, self.dataset.x.items[0].shape[1]), dtype=np.int64)
        self.batch_x, self.batch_y = self.batch[:,0:self.bptt], self.batch[:,1:self.bptt+1] 
        #ro: index of the text we're at inside our datasets for the various batches
        self.ro    = np.zeros(self.bs, dtype=np.int64)
        #ri: index of the token we're at inside our current text for the various batches
        self.ri    = np.zeros(self.bs, dtype=np.int)

    def on_epoch_begin(self, **kwargs):
        if self.idx is None: self.allocate_buffers()
        elif self.shuffle:   self.idx.shuffle()
        self.idx.forward = not self.backwards 

        step = self.totalToks / self.bs
        ln_rag, countTokens, i_rag = 0, 0, -1
        for i in range(0,self.bs):
            #Compute the initial values for ro and ri 
            while ln_rag + countTokens <= int(step * i):
                countTokens += ln_rag
                i_rag       += 1
                ln_rag       = self.lengths[self.idx[i_rag]]
            self.ro[i] = i_rag
            self.ri[i] = ( ln_rag - int(step * i - countTokens) ) if self.backwards else int(step * i - countTokens)
        
    #Training dl gets on_epoch_begin called, val_dl, on_epoch_end
    def on_epoch_end(self, **kwargs): self.on_epoch_begin()

    def __getitem__(self, k:int):
        j = k % self.bs
        if j==0:
            if self.item is not None: return self.dataset[0]
            if self.idx is None: self.on_epoch_begin()
        self.ro[j],self.ri[j] = self.fill_row(not self.backwards, self.dataset.x.items, self.idx, self.batch[j], 
                                              self.ro[j], self.ri[j], overlap=1, lengths=self.lengths)
        return self.batch_x[j], self.batch_y[j]

    def fill_row(self, forward, items, idx, row, ro, ri, overlap,lengths):
        "Fill the row with tokens from the ragged array. --OBS-- overlap != 1 has not been implemented"
        ibuf = n = 0 
        ro  -= 1
        while ibuf < row.shape[0]:  
            ro   += 1 
            ix    = idx[ro]
            rag   = items[ix]
            if forward:
                ri = 0 if ibuf else ri
                n  = min(lengths[ix] - ri, row.shape[0] - ibuf)
                row[ibuf:ibuf+n] = rag[ri:ri+n]
            else:    
                ri = lengths[ix] if ibuf else ri
                n  = min(ri, row.size - ibuf) 
                row[ibuf:ibuf+n] = rag[ri-n:ri][::-1]
            ibuf += n
        return ro, ri + ((n-overlap) if forward else -(n-overlap))


In [41]:

class LMDataBunch(DataBunch):
    "Create a `TextDataBunch` suitable for training a language model."
    @classmethod
    def create(cls, train_ds, valid_ds, test_ds=None, path:PathOrStr='.', no_check:bool=False, bs=64, val_bs:int=None, 
               num_workers:int=0, device:torch.device=None, collate_fn:Callable=data_collate, 
               dl_tfms:Optional[Collection[Callable]]=None, bptt:int=70, backwards:bool=False) -> DataBunch:
        "Create a `TextDataBunch` in `path` from the `datasets` for language modelling."
        datasets = cls._init_ds(train_ds, valid_ds, test_ds)
        val_bs = ifnone(val_bs, bs)
        datasets = [LMPreloader(ds, shuffle=(i==0), bs=(bs if i==0 else val_bs), bptt=bptt, backwards=backwards) 
                    for i,ds in enumerate(datasets)]
        val_bs = bs
        dls = [DataLoader(d, b, shuffle=False) for d,b in zip(datasets, (bs,val_bs,val_bs,val_bs)) if d is not None]
        return cls(*dls, path=path, device=device, dl_tfms=dl_tfms, collate_fn=collate_fn, no_check=no_check)
    
    def save(self, cache_name:PathOrStr='tmp'):
        "Save the `DataBunch` in `self.path/cache_name` folder."
        os.makedirs(self.path/cache_name, exist_ok=True)
        cache_path = self.path/cache_name
        np.save(cache_path/f'train_ids.npy', self.train_ds.x.items)
        np.save(cache_path/f'train_lbl.npy', self.train_ds.y.items)
        np.save(cache_path/f'valid_ids.npy', self.valid_ds.x.items)
        np.save(cache_path/f'valid_lbl.npy', self.valid_ds.y.items)
        if self.test_dl is not None: np.save(cache_path/f'test_ids.npy', self.test_ds.x.items)
        if hasattr(self.train_ds, 'classes'): save_texts(cache_path/'classes.txt', self.train_ds.classes)

    @classmethod
    def load(cls, path:PathOrStr, cache_name:PathOrStr='tmp', processor:PreProcessor=None, **kwargs):
        "Load a `TextDataBunch` from `path/cache_name`. `kwargs` are passed to the dataloader creation."
        cache_path = Path(path)/cache_name
        train_ids,train_lbls = np.load(cache_path/f'train_ids.npy'), np.load(cache_path/f'train_lbl.npy')
        valid_ids,valid_lbls = np.load(cache_path/f'valid_ids.npy'), np.load(cache_path/f'valid_lbl.npy')
        test_ids = np.load(cache_path/f'test_ids.npy') if os.path.isfile(cache_path/f'test_ids.npy') else None
        classes = loadtxt_str(cache_path/'classes.txt') if os.path.isfile(cache_path/'classes.txt') else None
        return cls.from_ids(path, vocab, train_ids, valid_ids, test_ids, train_lbls, valid_lbls, classes, processor, **kwargs)


In [42]:
data._bunch = LMDataBunch

In [43]:
data._bunch

__main__.LMDataBunch

In [44]:
data.x

In [31]:
data

ItemLists;

Train: ItemList (10 items)
[PosixPath('data/midi/v7/midi_encode/np/dur/hooktheory/pianoroll/w/wayne-sharpe/yu-gi-oh-theme-song/chorus_key.npy'), PosixPath('data/midi/v7/midi_encode/np/dur/hooktheory/pianoroll/w/what-a-day/kiefer/chorus_key.npy'), PosixPath('data/midi/v7/midi_encode/np/dur/hooktheory/pianoroll/w/whiteflame/senbonzakura/verse_key.npy'), PosixPath('data/midi/v7/midi_encode/np/dur/hooktheory/pianoroll/w/wham/last-christmas/intro_key.npy'), PosixPath('data/midi/v7/midi_encode/np/dur/hooktheory/pianoroll/w/whiteflame/senbonzakura/pre-chorus_key.npy')]...
Path: data/midi/v7/midi_encode/np/dur;

Valid: ItemList (0 items)
[]...
Path: data/midi/v7/midi_encode/np/dur;

Test: None

In [49]:
class LMLabelList(EmptyLabelList):
    "Basic `ItemList` for dummy labels."
    def __init__(self, items:Iterator, **kwargs):
        super().__init__(items, **kwargs)
        self.loss_func = CrossEntropyFlat()

In [50]:
data = data.label_const(label_cls=LMLabelList)

In [51]:
data.x._bunch = LMDataBunch

In [52]:
data = data.databunch(bs=2)

In [53]:
data.train_dl.dl.dataset

LMPreloader
dataset: LabelList
y: LMLabelList (10 items)
[EmptyLabel , EmptyLabel , EmptyLabel , EmptyLabel , EmptyLabel ]...
Path: data/midi/v7/midi_encode/np/dur
x: ItemList (10 items)
[array([[12, -1,  1, -1],
       [ 7,  3,  1,  0],
       [12, -1,  0, -1],
       [ 6,  3,  1,  0],
       ...,
       [12, -1,  0, -1],
       [ 3,  3, -2,  1],
       [ 6,  3, -2,  1],
       [10,  3, -2,  1]]), array([[12, -1,  0, -1],
       [ 0,  3,  6,  1],
       [ 4,  3,  6,  1],
       [10,  3,  6,  1],
       ...,
       [ 2,  3, -2,  1],
       [ 6,  3, -2,  1],
       [ 9,  3, -2,  1],
       [ 1,  4, -2,  1]]), array([[12, -1,  0, -1],
       [ 2,  3, 16,  1],
       [ 5,  3, 16,  1],
       [ 9,  3, 16,  1],
       ...,
       [ 0,  3, -2,  1],
       [ 4,  3, -2,  1],
       [ 7,  3, -2,  1],
       [ 2,  4, -2,  0]]), array([[12, -1,  0, -1],
       [ 1,  3, 32,  1],
       [ 5,  3, 32,  1],
       [ 8,  3, 32,  1],
       ...,
       [ 8,  2, -2,  1],
       [ 0,  3, -2,  1],
       [

In [55]:
data.save(cache_name='tmp_test')

In [56]:
next(iter(data.train_dl))[0].shape

torch.Size([2, 70, 4])

In [39]:
len(data.train_ds)

10

In [52]:
data.train_ds.x.items[0].shape[1]

4

In [73]:
a = LanguageModelPreLoader(data.train_ds, shuffle=True, bs=4, bptt=20, backwards=False) 

In [74]:
a.on_epoch_begin()

In [75]:
a.batch.shape

(4, 21, 4)

In [92]:
t = a[0]

In [93]:
t[1]

array([[ 7,  3, -2,  1],
       [12, -1,  0, -1],
       [ 0,  3, -2,  1],
       [ 3,  3, -2,  1],
       ...,
       [ 7,  3, -2,  1],
       [12, -1,  0, -1],
       [ 0,  3, -2,  1],
       [ 3,  3, -2,  1]])

In [94]:
t[0]

array([[ 3,  3, -2,  1],
       [ 7,  3, -2,  1],
       [12, -1,  0, -1],
       [ 0,  3, -2,  1],
       ...,
       [ 3,  3, -2,  1],
       [ 7,  3, -2,  1],
       [12, -1,  0, -1],
       [ 0,  3, -2,  1]])

In [54]:
%debug

> [0;32m<ipython-input-46-65c956137e5f>[0m(77)[0;36mfill_row[0;34m()[0m
[0;32m     75 [0;31m                [0mri[0m [0;34m=[0m [0;36m0[0m [0;32mif[0m [0mibuf[0m [0;32melse[0m [0mri[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     76 [0;31m                [0mn[0m  [0;34m=[0m [0mmin[0m[0;34m([0m[0mlengths[0m[0;34m[[0m[0mix[0m[0;34m][0m [0;34m-[0m [0mri[0m[0;34m,[0m [0mrow[0m[0;34m.[0m[0msize[0m [0;34m-[0m [0mibuf[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 77 [0;31m                [0mrow[0m[0;34m[[0m[0mibuf[0m[0;34m:[0m[0mibuf[0m[0;34m+[0m[0mn[0m[0;34m][0m [0;34m=[0m [0mrag[0m[0;34m[[0m[0mri[0m[0;34m:[0m[0mri[0m[0;34m+[0m[0mn[0m[0;34m][0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     78 [0;31m            [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     79 [0;31m                [0mri[0m [0;34m=[0m [0mlengths[0m[0;34m[[0m[0mix[0m[0;34m][0m [0;32mif[0m [0mibuf

In [13]:
csv

Unnamed: 0,song_url,instruments,ht_offset,time_signature,title,quarter_length,ht_mode,source,artist,ht_key,...,section,parts,ht_time_signature,ht_bpm,midi_title,mxl,midi_transform,midi_npz,midi_npz_timesteps,midi_encode/npenc/dur
1,https://www.hooktheory.com/theorytab/view/wayn...,"Piano,Piano",0.0,4/4,yu-gi-oh-theme-song,32.0,1.0,hooktheory,wayne-sharpe,C,...,chorus,"intro,chorus",4.0,128.0,yu-gi-oh3,,midi_transform/hooktheory/pianoroll/w/wayne-sh...,midi_npz/hooktheory/pianoroll/w/wayne-sharpe/y...,129.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
2,https://www.hooktheory.com/theorytab/view/what...,"Piano,Piano",-5.0,4/4,kiefer,16.0,6.0,hooktheory,what-a-day,D,...,chorus,chorus,4.0,96.0,kiefer,,midi_transform/hooktheory/pianoroll/w/what-a-d...,midi_npz/hooktheory/pianoroll/w/what-a-day/kie...,65.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
3,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,verse,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...,midi_npz/hooktheory/pianoroll/w/whiteflame/sen...,129.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
4,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,intro,"intro,verse,chorus",4.0,108.0,Last Christmas Intro,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,129.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
5,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,32.0,6.0,hooktheory,whiteflame,D,...,pre-chorus,"verse,pre-chorus,chorus",4.0,152.0,senbonzakura - pre-Pre-Chorus,,midi_transform/hooktheory/pianoroll/w/whitefla...,midi_npz/hooktheory/pianoroll/w/whiteflame/sen...,129.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
6,https://www.hooktheory.com/theorytab/view/will...,Piano,-4.0,4/4,on-the-road-again,28.0,1.0,hooktheory,willie-nelson,E,...,bridge,"verse,bridge",4.0,112.0,On The Road Again,,midi_transform/hooktheory/pianoroll/w/willie-n...,midi_npz/hooktheory/pianoroll/w/willie-nelson/...,113.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
7,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,32.0,1.0,hooktheory,wham,Db,...,chorus,"intro,verse,chorus",4.0,108.0,Last Christmas Chorus,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,129.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
8,https://www.hooktheory.com/theorytab/view/will...,Piano,-4.0,4/4,on-the-road-again,28.0,1.0,hooktheory,willie-nelson,E,...,verse,"verse,bridge",4.0,112.0,On The Road Again,,midi_transform/hooktheory/pianoroll/w/willie-n...,midi_npz/hooktheory/pianoroll/w/willie-nelson/...,113.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
10,https://www.hooktheory.com/theorytab/view/whit...,"Piano,Piano",-5.0,4/4,senbonzakura,64.0,6.0,hooktheory,whiteflame,D,...,chorus,"verse,pre-chorus,chorus",4.0,152.0,Senbonzakura,,midi_transform/hooktheory/pianoroll/w/whitefla...,midi_npz/hooktheory/pianoroll/w/whiteflame/sen...,257.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...
11,https://www.hooktheory.com/theorytab/view/wham...,"Piano,Piano",-1.0,4/4,last-christmas,64.0,1.0,hooktheory,wham,Db,...,verse,"intro,verse,chorus",4.0,108.0,Last Christmas Verse,,midi_transform/hooktheory/pianoroll/w/wham/las...,midi_npz/hooktheory/pianoroll/w/wham/last-chri...,257.0,midi_encode/npenc/dur/hooktheory/pianoroll/w/w...


In [12]:
all_files

[]

In [11]:
all_files[:10]

[]

### Create All Dataset

In [9]:
all_files = get_files(csv)
all_data, all_vocab = create_databunch(all_files, cache_name='tmp/all', load_cached=True)

Note: we are reusing all_vocab for the following datasets

### Create Hooktheory Dataset

In [10]:
hook_csv = csv.loc[csv.source.isin(['hooktheory', 'cprato'])]
hook_files = get_files(hook_csv)

In [11]:
hook_data, hook_vocab = create_databunch(hook_files, cache_name='tmp/hook', vocab=all_vocab)

Numericalizing


### Create Pop Dataset

In [12]:
pop_csv = csv.loc[csv.source.isin(['midiworld', 'freemidi', 'wikifonia'])]
pop_files = get_files(pop_csv)

In [13]:
pop_data, pop_vocab = create_databunch(pop_files, cache_name='tmp/pop', vocab=all_vocab)

Numericalizing


## Create Classical Dataset

In [14]:
# csv_filter = csv.loc[csv[f'{source_dir}_timesteps'] < 20000]
clc_csv = csv.loc[csv.source.isin(['classical_piano', 'ecomp'])]
clc_files = get_files(clc_csv)

In [16]:
clc_data, clc_vocab = create_databunch(clc_files, cache_name='tmp/clc', vocab=all_vocab)

Numericalizing


In [17]:
len(clc_files)

2857

### Testing

In [18]:
data = pop_data

In [19]:
data.show_batch()

idx,text
0,t2 || t1 nB o3 t4 nD o4 t4 nC o5 t4 || t3 nD o4 t4 || t3 nE- o4 t4 || t3 nD o4 t4 || t3 nE- o4 t2 || t1 nD o4 t2 || t1 nE- o4 t2 || t1 nD o4 t2 || t1 nB o3 t4 nD o4 t4 || t0 nC o5 t3 || t2 nD o4 t4 || t3 nE- o4 t4
1,t1 nC# o2 t2 nF# o2 t1 || t0 nF# o2 t1 || t0 nB o1 t4 nE o2 t4 nF# o2 t6 nE o3 t1 nG# o5 t1 || t1 nG# o2 t4 nE o3 t1 nE o5 t1 || t1 nB o1 t4 nE o3 t1 nB o4 t1 || t1 nE o2 t2 nF# o2 t1 nE o3 t1 nE o5 t1 || t0 nF# o2 t1
2,o3 t8 nA o3 t8 nE o4 t4 || t0 nG# o1 t1 nB- o4 t3 || t0 nG# o1 t2 nA o1 t4 nB o1 t1 nF# o2 t2 || t0 nE- o1 t3 nB o1 t4 || t0 nG# o1 t3 nC# o2 t2 nE o2 t8 nF# o2 t2 nC o4 t3 nE o4 t4 nA o4 t4 nB- o4 t3 || t1 nE- o1 t3 nA
3,|| t0 nF# o2 t2 nC o4 t1 || t0 nE- o4 t4 nG# o5 t5 || t0 nC o2 t4 nF# o2 t2 nE- o4 t1 nF o4 t1 nC o7 t1 || t0 nC o7 t7 || t0 nE o2 t8 nF# o2 t2 nB- o3 t1 nF# o4 t8 || t0 nA o3 t4 nB- o3 t1 nE- o4 t1 nF o4 t1 || t0 nC o2
4,o3 t2 nC o4 t2 || t1 nC o2 t2 nF# o2 t2 nC o3 t1 nB o4 t2 || t1 nC o2 t2 nD o2 t2 nF# o2 t2 nC o3 t1 nG o3 t2 nC o4 t2 || t1 nC o2 t2 nF# o2 t2 nC o3 t1 || t1 nB o1 t2 nD o2 t2 nF# o2 t2 nC o3 t1 nG o3 t2 nC o4 t2


In [19]:
ob = data.one_batch()

In [23]:
txt_out = data.vocab.textify(ob[0][0]).replace('xxbos ', ''); txt_out

't-2 || nB3 t-2 nD4 t-2 nC5 t-2 || nD4 t-1 || nD4 t-2 || nD4 t-2 || nD4 t-2 || nE-4 t-1 || nE-4 t-2 || nE-4 t-2 || nE-4 t-2 || nD4 t-1 || nD4 t-2 || nD4 t-2 || nD4 t-2 || nE-4 t-1 || nE-4 t-2 || nD4 t-1 || nD4 t-2 || nE-4 t-1 || nE-4 t-2 || nD4 t-1 || nD4 t-2 || nB3'

In [20]:
from encode_data import *

In [21]:
one_text = data.train_ds[0][0].text

In [22]:
seq = str2seq(one_text); seq

[[E-4t-1],
 [E-4t-2],
 [E-4t-2],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [D4t-2],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [D5t-1],
 [D5t-2],
 [C5t-1],
 [C5t-2],
 [C5t-2],
 [C5t-2],
 [D4t-1],
 [D4t-2],
 [D4t-2],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [E-4t-2],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [D4t-2],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [G3t-1],
 [G3t-2],
 [C5t-1],
 [C5t-2],
 [B3t-1, D4t-1, C5t-1],
 [B3t-2, D4t-2, C5t-2],
 [B3t-2, D4t-2, C5t-2],
 [B3t-2, D4t-2, C5t-2],
 [D4t-1],
 [D4t-2],
 [D4t-2],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [E-4t-2],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [D4t-2],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [B3t-1, D4t-1],
 [B3t-2, D4t-2, C5t-1],
 [B3t-2, D4t-2, C5t-2],
 [B3t-2, D4t-2, C5t-2],
 [D4t-1],
 [D4t-2],
 [D4t-2],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [E-4t-2],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [D4t-2],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [D4t-1],
 [D4t-2],
 [E-4t-1],
 [E-4t-2],
 [D4t

In [19]:
seq2numpy(seq).shape

(129, 1, 127)

In [20]:
s = str2stream(one_text)

In [21]:
s.show('midi')

In [22]:
s.show('text')

{0.0} <music21.stream.Part 0x7f0626bf9b70>
    {0.0} <music21.instrument.Piano Piano>
    {0.0} <music21.meter.TimeSignature 4/4>
    {0.0} <music21.tempo.MetronomeMark animato Quarter=120>
    {0.0} <music21.key.KeySignature of no sharps or flats>
    {0.25} <music21.chord.Chord G3>
    {0.5} <music21.chord.Chord F#3>
    {0.75} <music21.chord.Chord A2>
    {1.0} <music21.chord.Chord F3>
    {1.25} <music21.chord.Chord E3>
    {1.5} <music21.chord.Chord A2>
    {1.75} <music21.chord.Chord F3>
    {2.0} <music21.chord.Chord E3>
    {2.25} <music21.chord.Chord A2>
    {3.0} <music21.chord.Chord G3>
    {3.25} <music21.chord.Chord F#3>
    {3.5} <music21.chord.Chord G3>
    {3.75} <music21.chord.Chord C#4>
    {4.0} <music21.chord.Chord E3 G3 B3>
    {8.0} <music21.chord.Chord E3 G3 A3 C4>
    {10.0} <music21.chord.Chord E3 G3 B3>
    {12.0} <music21.chord.Chord G2 B-2 D3>
    {18.0} <music21.chord.Chord G2 A2 C3 E3>
    {19.0} <music21.chord.Chord A2 B2 D3 F#3>
    {20.0} <music21.chord