In [113]:
# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [54]:
import numpy as np
def read_batches(all_chars, n_vocab, batch_size=16, n_gram=64):
    length = all_chars.shape[0]
    batch_chars = int(length / batch_size) #155222/16 = 9701
    
    for start in range(0, batch_chars - n_gram, n_gram):  #(0, 9637, n_gram)  #it denotes number of batches. It runs everytime when
        #new batch is created. We have a total of 151 batches.
        X = np.zeros((batch_size, n_gram))    #(16, n_gram)
        Y = np.zeros((batch_size, n_gram, n_vocab))   #(16, n_gram, 87)
        for batch_index in range(0, 16):  #it denotes each row in a batch.  
            for i in range(0, n_gram):  #it denotes each column in a batch. Each column represents each character means 
                #each time-step character in a sequence.
                X[batch_index, i] = all_chars[batch_index * batch_chars + start + i]
                Y[batch_index, i, all_chars[batch_index * batch_chars + start + i + 1]] = 1 #here we have added '1' because the
                #correct label will be the next character in the sequence. So, the next character will be denoted by
                #all_chars[batch_index * batch_chars + start + i + 1]
        yield X, Y

In [32]:
PATH = '../music_generation/data/jig_hornpipes_cleaned.txt'

In [48]:
import io

with io.open(PATH, encoding='utf-8') as f:
    text = f.read().lower()
print('corpus length:', len(text))

corpus length: 83542


In [41]:
#get unique
chars = sorted(list(set(text)))
n_vocab = len(chars)
print('total chars:', n_vocab)
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

print('map notes to some corresponding index')
print('{')
for char, _ in zip(char_indices, range(10)):
    print('\t {%s}: {%d},'%(char, char_indices[char]))
print('\t ...\n}')

total chars: 55
map notes to some corresponding index
{
	 {
}: {0},
	 { }: {1},
	 {!}: {2},
	 {"}: {3},
	 {#}: {4},
	 {%}: {5},
	 {'}: {6},
	 {(}: {7},
	 {)}: {8},
	 {+}: {9},
	 ...
}


In [35]:
print(text[0:1000])

f|"a"ecc c2f|"a"ecc c2f|"a"ecc c2f|"bm"bcb "e7"b2f|
"a"ecc c2f|"a"ecc c2c/2d/2|"d"efe "e7"dcb| [1"a"ace a2:|
 [2"a"ace ag=g||\
k:d
p:b
"d"f2f fdd|"d"afa f2e/2f/2|"g"g2g ecd|"em"efd "a7"cba|
"d"f^ef dcd|"d"afa f=ef|"g"gfg "a7"abc |1"d"d3 d2e:|2"d"d3 d2||



"d7"a^ga dfa|"g"b^ab g3|"a7"^c=c^c ace|"d7"fef def|
"g"g2g de=f|"e7"e2e bcd|"am"c2c "d7"adc| [1"g"b2a g3:|
 [2"g"b2a g2f||"em"e2e g2g|b2b e2e|"am"c2a "b7"fba|"em"g2f e3|"em"efg "am"abc|
"b7"b^c^d "em"e2e|"f#7"f2f f2e|"b7"^def baf|"em"e2e g2g|b2b e2e|
"am"c2a "b7"fba|"em"g2f e3|"em"efg "am"abc|"b7"b^c^d "em"e2e|
"f#7"f2e "b7"^def |[1"em"e3 "d7"d3:|[2"em"e3 "e7"e3||


"d" def fed|"g" bdb "d"afd|"d" dfa "g"b2 a|"a7" add "d" d2:|
"b"e|"d"fga agf|"g" gab "a7"bag|"d"fga "d"agf|"em" gfg "a7"e2 g|
"d"fga agf|"g"gab "a7"bag|"d" fga "a7"efg|"d" fdd d2 :|


p:a
!segno! "g"b2b bgf|"c"e2e efg|"g"ded "em"dcb|"am"caa "d7"adc|
"g"bcb bgf|"c"efe efg|"g"ded "d7"cba|"g"g2b g3|
p:b
|g/2a/2|"g"b2g "d"a2f|"g"gfg "g"dcb|"c"cde "g"dgb|"am"baa "d7"a2g/2a/2|


In [50]:
all_characters = np.asarray([char_indices[c] for c in text], dtype = np.int32)

In [62]:
for batch in read_batches(all_characters, n_vocab, batch_size=16, n_gram=2):
    print(' '.join([indices_char[idx] for idx in batch[0][0]]))
    print(' '.join(indices_char[idx] for idx in np.argmax(batch[1][0], axis=1)))
    

f |
| "
" a
a "
" e
e c
c c
c  
  c
c 2
2 f
f |
| "
" a
a "
" e
e c
c c
c  
  c
c 2
2 f
f |
| "
" a
a "
" e
e c
c c
c  
  c
c 2
2 f
f |
| "
" b
b m
m "
" b
b c
c b
b  
  "
" e
e 7
7 "
" b
b 2
2 f
f |
| 


 "
" a
a "
" e
e c
c c
c  
  c
c 2
2 f
f |
| "
" a
a "
" e
e c
c c
c  
  c
c 2
2 c
c /
/ 2
2 d
d /
/ 2
2 |
| "
" d
d "
" e
e f
f e
e  
  "
" e
e 7
7 "
" d
d c
c b
b |
|  
  [
[ 1
1 "
" a
a "
" a
a c
c e
e  
  a
a 2
2 :
: |
| 


  
  [
[ 2
2 "
" a
a "
" a
a c
c e
e  
  a
a g
g =
= g
g |
| |
| \
\ 


 k
k :
: d
d 


 p
p :
: b
b 


 "
" d
d "
" f
f 2
2 f
f  
  f
f d
d d
d |
| "
" d
d "
" a
a f
f a
a  
  f
f 2
2 e
e /
/ 2
2 f
f /
/ 2
2 |
| "
" g
g "
" g
g 2
2 g
g  
  e
e c
c d
d |
| "
" e
e m
m "
" e
e f
f d
d  
  "
" a
a 7
7 "
" c
c b
b a
a |
| 


 "
" d
d "
" f
f ^
^ e
e f
f  
  d
d c
c d
d |
| "
" d
d "
" a
a f
f a
a  
  f
f =
= e
e f
f |
| "
" g
g "
" g
g f
f g
g  
  "
" a
a 7
7 "
" a
a b
b c
c  
  |
| 1
1 "
" d
d "
" d
d 3
3  
  d
d 2
2 e
e :
: |
| 2
2 "
" d
d "
" d
d 3
3  
  d
d 2


b a
a  
  f
f g
g f
f |
| e
e 2
2 d
d  
  d
d 2
2 z
z |
| "
" d
d "
" a
a f
f a
a  
  "
" b
b 7
7 "
" b
b c
c b
b |
| "
" e
e m
m "
" a
a 2
2 g
g  
  g
g 2
2 z
z |
| 


 "
" a
a 7
7 "
" g
g f
f g
g  
  e
e f
f e
e |
| d
d 2
2 c
c  
  c
c 2
2 b
b |
| a
a b
b a
a  
  a
a b
b c
c |
| "
" d
d "
" d
d 3
3  
  d
d 2
2 :
: |
| 


 


 


 d
d |
| "
" g
g "
" g
g 2
2 g
g  
  b
b 2
2 b
b |
| "
" d
d 7
7 "
" a
a b
b a
a  
  d
d 3
3 |
| "
" g
g "
" g
g 2
2 g
g  
  b
b 2
2 b
b |
| "
" a
a m
m "
" d
d 2
2 d
d  
  "
" d
d 7
7 "
" c
c b
b a
a |
| "
" g
g "
" g
g 2
2 g
g  
  b
b 2
2 b
b |
| "
" d
d 7
7 "
" a
a b
b a
a  
  d
d 3
3 |
| \
\ 


 "
" e
e m
m "
" e
e f
f g
g  
  "
" a
a 7
7 "
" f
f e
e d
d |
| "
" e
e m
m "
" e
e 3
3  
  -
- e
e 2
2 :
: |
| 


 


 


 d
d |
| "
" g
g "
" d
d 2
2 b
b  
  b
b a
a f
f |
| "
" g
g "
" a
a 2
2 g
g  
  g
g 2
2 g
g |
| "
" d
d 7
7 "
" f
f 2
2 f
f  
  f
f e
e f
f |
| "
" g
g "
" a
a 2
2 g
g  
  "
" d
d 7
7 "
" g
g f
f e
e |
| 


 "
" g
g "
" d
d 2
2 b
b  
  b
b a


  e
e 3
3 |
| "
" c
c "
" e
e c
c g
g  
  e
e g
g c
c |
| "
" g
g "
" g
g d
d b
b  
  g
g b
b d
d |
| "
" d
d 7
7 "
" f
f e
e d
d  
  ^
^ c
c d
d f
f |
| "
" g
g "
" g
g b
b d
d  
  g
g 2
2 :
: |
| 


 p
p :
: b
b 


 (
( 3
3 d
d /
/ 2
2 e
e /
/ 2
2 f
f /
/ 2
2 |
| g
g g
g b
b  
  f
f g
g b
b |
| e
e g
g b
b  
  d
d g
g b
b |
| e
e g
g b
b  
  d
d g
g b
b |
| c
c a
a f
f  
  d
d f
f a
a |
| 


 a
a a
a c
c  
  g
g a
a c
c |
| f
f a
a c
c  
  e
e a
a c
c |
| d
d ^
^ c
c d
d  
  ^
^ d
d e
e =
= c
c |
| b
b d
d g
g  
  b
b 2
2 d
d |
| 


 g
g g
g b
b  
  f
f g
g b
b |
| e
e g
g b
b  
  d
d g
g b
b |
| e
e g
g b
b  
  d
d g
g b
b |
| "
" c
c "
" e
e c
c a
a  
  e
e 3
3 |
| 


 "
" c
c "
" e
e c
c g
g  
  e
e g
g c
c |
| "
" g
g "
" g
g d
d b
b  
  g
g b
b d
d |
| "
" d
d 7
7 "
" f
f e
e d
d  
  ^
^ c
c d
d f
f |
| "
" g
g "
" g
g b
b d
d  
  g
g 2
2 :
: |
| 


 p
p :
: c
c 


 d
d |
| b
b g
g b
b  
  a
a b
b g
g |
| g
g g
g b
b  
  f
f g
g b
b |
| e
e g
g b
b  
  d
d g
g b
b |
| c
c a
a f


In [114]:
from models import MusicModel
model = MusicModel(20).AttentionLSTMSkipConnection(batch_input_shape=(10,20))

In [115]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              (10, 20)             0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (10, 20, 256)        5120        input[0][0]                      
__________________________________________________________________________________________________
drop_layer1 (Dropout)           (10, 20, 256)        0           embedding[0][0]                  
__________________________________________________________________________________________________
lstm_layer1 (LSTM)              (10, 20, 128)        197120      drop_layer1[0][0]                
__________________________________________________________________________________________________
lstm_layer

In [107]:
import keras
from keras.models import Sequential
from keras.models import Model
from keras.layers import (Dropout, TimeDistributed, 
                          Dense, Activation, Embedding,
                          Input, concatenate,
                          LSTM, RNN, GRU)

In [108]:
batch_input_shape=(10,11)
n_vocab = 200
emb_dim=256
drop_rate=0.35
input_layer = Input(batch_shape=batch_input_shape, name='input')
embedding = Embedding(input_dim = n_vocab, 
                       output_dim = emb_dim,
#                        batch_input_shape= batch_input_shape,
                       name='embedding')(input_layer)
drop = Dropout(drop_rate, name='drop_layer1')(embedding)

In [93]:
embedding.shape

TensorShape([Dimension(10), Dimension(11), Dimension(256)])

In [94]:
input_layer.shape

TensorShape([Dimension(10), Dimension(11)])

In [95]:
drop.shape

TensorShape([Dimension(10), Dimension(11), Dimension(256)])