In [60]:
import os
import numpy as np
np.random.seed(420)
from IPython.display import HTML

from tqdm import tqdm

from HMM import unsupervised_HMM, from_hmm
from HMM_helper import (
    parse_seqs,
    parse_text,
    update_syll_map,
    sample_sentence,
    visualize_sparsities,
    rhyme_dict_gen
)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Lambda, Activation
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import backend as K

In [2]:
with open(os.path.join(os.getcwd(), 'data/shakespeare.txt'), 'r') as f:
    text = f.read()

with open(os.path.join(os.getcwd(), 'data/Syllable_dictionary.txt'), 'r') as f:
    syll_map0 = {}
    for i, line in enumerate(f):
        line = line.strip().split()
        word = line[0] 
        # Add 10 to denote end of line syll_count
        sylls = np.array([int(s.replace('E', '1')) for s in line[1:]])
        assert(word not in syll_map0)
        syll_map0[word] = sylls

In [26]:
sonnets = parse_text(text, by='sonnet', punc_to_drop = None)
chars = sorted(list(set("".join(sonnets))))
char_to_int = dict((c, i) for i,c in enumerate(chars))
int_to_char = dict((i, c) for i,c in enumerate(chars))
num_chars = len(char_to_int)

# source: https://blog.usejournal.com/how-to-develop-a-character-based-neural-language-model-99c18de1d4d2
# organize into sequences of characters
def make_data(step, length = 40):
    char_seqs = list()
    for curr in sonnets:
        for i in range(length, len(curr), step):
            # select sequence of tokens
            seq = curr[i-length:i+1]
            # store
            char_seqs.append(seq)
    print('Total Sequences: %d' % len(char_seqs))

    # convert sequences of characters into sequences of integers using the mapping dictionary
    int_seqs = list()
    for seq in char_seqs:
        new_seq = [char_to_int[char] for char in seq]
        int_seqs.append(new_seq)
    
    int_seqs = np.array(int_seqs)
    # last character is y, first 40 characters are x
    train_X = int_seqs[:, :-1]
    train_Y = int_seqs[:, -1]
    return train_X, train_Y

In [104]:
def generate_seq(model, seq_length, seed_text, n_chars, temp = 1, verbose = False):
    new_model = Sequential()
    new_model.add(Model(model.input, model.layers[-2].output))
    new_model.add(Lambda(lambda x: x / temp))
    new_model.add(Activation('softmax'))
    # new_model.compile()
    
    in_text = seed_text
    
    # generate a fixed number of characters
    for i in range(n_chars):
        # encode the characters as integers
        encoded = [char_to_int[char] for char in in_text]
        # truncate sequences to a fixed length
        encoded = np.array([encoded[-seq_length:]])
        # one hot encode
        encoded = to_categorical(encoded, num_classes=len(char_to_int))
        # predict character
        preds = new_model.predict(encoded, verbose=0)[0]
        # print(preds)
        pred = np.random.choice(len(preds), p = preds)
        # print(pred)
        # reverse map integer to character
        out_char = int_to_char[pred]
        in_text += out_char
    if verbose:
        print("Random seed: " + in_text[0:seq_length])
        print("Generated: " + in_text[seq_length:])
    return in_text

In [108]:
def train_rnn(step = 10, length = 40, epochs = 20):
    train_X, train_Y = make_data(step, length = length)
    ohe_X = np.array([to_categorical(x, num_classes = num_chars) for x in train_X])
    ohe_Y = to_categorical(train_Y, num_classes = num_chars)
    
    model = Sequential()
    model.add(LSTM(150, input_shape = (ohe_X.shape[1], ohe_X.shape[2])))
    model.add(Dense(num_chars))
    model.add(Activation('softmax'))
    print(model.summary())
    
    early_stop = EarlyStopping(monitor='accuracy', min_delta=0.001)
    
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(ohe_X, ohe_Y, epochs = epochs, callbacks = [early_stop])
    
    return model

In [None]:
model = train_rnn(step = 3, length = 40, epochs = 50)
model.save("my_model.h5")

Total Sequences: 28796
Model: "sequential_41"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_14 (LSTM)               (None, 150)               112800    
_________________________________________________________________
dense_14 (Dense)             (None, 37)                5587      
_________________________________________________________________
activation_29 (Activation)   (None, 37)                0         
Total params: 118,387
Trainable params: 118,387
Non-trainable params: 0
_________________________________________________________________
None
Train on 28796 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50

In [102]:
def generate_poem(model, seed_text, lines = 14, length = 40, temp = 1):
    poem = generate_seq(model, length, seed_text, length * lines, temp = temp)
    poem_words = poem.split()
    counter = 0
    final_poem = ""
    for i in range(lines):
        line = ""
        while (len(line) < length) and (counter < len(poem_words)):
            line = line + poem_words[counter] + " "
            counter += 1
        # print(line[0])
        line = line.capitalize()
        # print(line)
        final_poem = final_poem + line + "\n"
    print(final_poem)

In [103]:
generate_poem(model, "the self as my self i love shall some beart hello", lines = 5, temp = 100)

[0.02732234 0.02635243 0.02720785 0.02617763 0.02632204 0.02689495
 0.02699735 0.02652124 0.02629064 0.02650806 0.02639142 0.02663055
 0.02737628 0.02677267 0.02751667 0.02648489 0.02752498 0.02717977
 0.02704176 0.02672328 0.02622492 0.02770191 0.02706941 0.02771778
 0.02830489 0.02703699 0.02677175 0.02646383 0.02773123 0.02771566
 0.02728701 0.02763655 0.02793284 0.0280213  0.02661161 0.02713615
 0.02639931]
30
[0.02796401 0.0262732  0.02771013 0.02582533 0.02666297 0.02766315
 0.02693257 0.02720247 0.0268803  0.02706606 0.02727851 0.02732645
 0.02666451 0.02678332 0.02683893 0.02822107 0.02655219 0.02695839
 0.02850848 0.02786957 0.02578326 0.02705606 0.02709443 0.0267961
 0.0272706  0.02727375 0.02664033 0.02634339 0.02755449 0.02814747
 0.02758502 0.02713339 0.02627707 0.02663056 0.02574034 0.0279391
 0.02555312]
31
[0.02758271 0.02678633 0.0272947  0.02632901 0.02666007 0.02732838
 0.02692666 0.02711331 0.0266938  0.02649333 0.02658226 0.02684237
 0.02708011 0.02696845 0.0273804

 0.02568376]
1
[0.02951201 0.0265533  0.02731969 0.02603664 0.02651475 0.02757756
 0.02723452 0.02709402 0.02666819 0.02604564 0.02652348 0.02765878
 0.02659268 0.02708612 0.02652641 0.02765761 0.02626212 0.02612996
 0.02745439 0.02773842 0.02591126 0.02617565 0.02755515 0.02734529
 0.02797612 0.027806   0.02698014 0.02607158 0.02853582 0.02779114
 0.02745219 0.0278865  0.02629035 0.02708359 0.02600543 0.02727371
 0.0256738 ]
9
[0.02926418 0.02653143 0.02743975 0.02598503 0.02633454 0.02780892
 0.02703946 0.02714198 0.02662027 0.02604988 0.02650928 0.02771094
 0.02720457 0.02700715 0.0267234  0.02826718 0.02722318 0.02642821
 0.02716105 0.02741076 0.02642744 0.02651641 0.02782163 0.02730455
 0.02715073 0.02767851 0.02715601 0.02626775 0.02803415 0.02768884
 0.02725462 0.02711848 0.0263853  0.02723111 0.02553218 0.0272716
 0.0252995 ]
9
[0.02887537 0.02624986 0.02763338 0.02611152 0.02655308 0.0275792
 0.02719506 0.02717077 0.02679427 0.02656992 0.0268345  0.02763505
 0.0270713  0.02713

 0.02587517]
30
[0.02745583 0.02617898 0.02728285 0.02616711 0.02600589 0.02708775
 0.02661685 0.02661035 0.02643598 0.02603158 0.02628689 0.02826986
 0.0266443  0.02738302 0.02619554 0.02848106 0.02668016 0.02639478
 0.02889791 0.02857397 0.0265605  0.02639951 0.0277165  0.02691503
 0.02699798 0.02863535 0.0270985  0.02647505 0.02847674 0.02734607
 0.02715505 0.02777434 0.02649664 0.02719167 0.0261774  0.02752689
 0.02537621]
9
[0.028353   0.02697388 0.02743154 0.02619342 0.02648928 0.02726587
 0.02766925 0.02675312 0.02637319 0.02604011 0.02638529 0.02803681
 0.02686679 0.02677837 0.02622891 0.02834053 0.02642488 0.02610475
 0.02690691 0.02799624 0.02632712 0.02639177 0.02763722 0.02755053
 0.02737662 0.02852849 0.02674373 0.02625135 0.02825999 0.02730031
 0.02723471 0.0279319  0.02641931 0.02688488 0.02587064 0.02774837
 0.02593092]
34
[0.02737395 0.02611022 0.02668552 0.02601856 0.0259856  0.02661305
 0.02650154 0.02625781 0.02598229 0.02565883 0.02584233 0.02809313
 0.02730922 0.0

 0.02614013]
6
[0.02660183 0.02615388 0.02674448 0.02645827 0.02626271 0.02668242
 0.02606891 0.02659827 0.02653915 0.02629435 0.02622174 0.02756059
 0.02743243 0.02767184 0.02750031 0.02767057 0.02731607 0.02737936
 0.0272705  0.02759055 0.026476   0.02705561 0.02748294 0.02732436
 0.02737532 0.02781461 0.02739369 0.02681861 0.02740691 0.02779006
 0.02772187 0.02719267 0.02685094 0.02765574 0.02622933 0.02739639
 0.02599667]
7
[0.02880163 0.02645399 0.02756594 0.02616308 0.02688218 0.02749907
 0.02704925 0.02719118 0.02719883 0.02667508 0.02704424 0.02772983
 0.02682294 0.02724669 0.02642756 0.02775053 0.02688713 0.02665937
 0.02741621 0.02760955 0.02612839 0.02626708 0.02676431 0.02670309
 0.02708471 0.02827988 0.02685495 0.02656589 0.0274392  0.02726373
 0.02710298 0.02720102 0.02666747 0.02716295 0.02670733 0.02704272
 0.02569   ]
16
[0.02751395 0.02623325 0.02739061 0.02620985 0.02654544 0.02688078
 0.02695887 0.02670665 0.02650293 0.02608081 0.02634538 0.02854359
 0.02722202 0.02

 0.02622129]
25
[0.02788159 0.02673186 0.02723636 0.02614276 0.02667613 0.02720172
 0.02669634 0.02674316 0.02657399 0.02608167 0.02609291 0.0266081
 0.02739776 0.02698887 0.02720126 0.02665979 0.02751644 0.02642289
 0.02671761 0.02662413 0.0262857  0.02704134 0.02777029 0.0275408
 0.02841967 0.02713988 0.02709526 0.02642295 0.02818492 0.02765752
 0.02769348 0.02791383 0.02722443 0.02815503 0.02642646 0.02660639
 0.02622677]
3
[0.02758846 0.02633947 0.02734146 0.02607138 0.02642799 0.02735473
 0.0263406  0.02688267 0.02661492 0.02660576 0.02653929 0.02697755
 0.0271617  0.02738809 0.02768604 0.02763477 0.02725154 0.02755139
 0.02729775 0.02713607 0.02589735 0.02727477 0.02785427 0.02735149
 0.02816428 0.02665672 0.02705802 0.02647678 0.02729569 0.0284137
 0.02846487 0.02629064 0.02646348 0.02708375 0.02591111 0.02730167
 0.02584978]
32
[0.02763695 0.0261869  0.02768314 0.02611115 0.02647876 0.02732097
 0.02681253 0.02687472 0.02693544 0.02675487 0.02668325 0.02790152
 0.02666189 0.0265

 0.02614034]
12
[0.02708221 0.0264789  0.0268067  0.02600683 0.02619363 0.02697459
 0.02646045 0.02671371 0.02654668 0.02622586 0.02654863 0.02774535
 0.02686287 0.02758588 0.02765487 0.02823528 0.02661254 0.0272063
 0.02629364 0.02740988 0.02712906 0.02680099 0.02788321 0.02738679
 0.02778218 0.02745078 0.02697152 0.02669537 0.0276811  0.02782679
 0.02719408 0.02733238 0.02666985 0.02700106 0.0263111  0.02781337
 0.02642551]
19
[0.02735439 0.02608645 0.02701397 0.02579317 0.02621252 0.02694986
 0.02642324 0.02655146 0.02652664 0.02634548 0.026408   0.02677702
 0.02679689 0.02748016 0.02749222 0.02764926 0.02762797 0.02775529
 0.02688515 0.02730056 0.02611363 0.02689263 0.02775738 0.02746417
 0.0285911  0.02707295 0.02702911 0.02647998 0.02754641 0.02824969
 0.02820039 0.02696618 0.02725275 0.02708564 0.02673551 0.02658419
 0.02654855]
8
[0.02998564 0.02709216 0.02762149 0.02589374 0.02734794 0.02853168
 0.02746933 0.0277808  0.02781645 0.02739975 0.02770746 0.0268959
 0.0261766  0.026

 0.02635416]
36
[0.02761262 0.02628398 0.02715902 0.02551421 0.02625557 0.02728071
 0.0265055  0.02692729 0.02696953 0.02614576 0.02627374 0.02804373
 0.02656066 0.02756994 0.0272092  0.02916051 0.02654663 0.02666373
 0.02665986 0.02855127 0.02664087 0.02665988 0.0273959  0.02692981
 0.02723727 0.02826865 0.02707542 0.02654782 0.02721487 0.02778951
 0.02771687 0.02758489 0.026668   0.02688849 0.02602684 0.02747775
 0.02598359]
19
[0.02663674 0.02632297 0.02660638 0.02598204 0.02613096 0.02678809
 0.02654402 0.02660544 0.02668609 0.02627476 0.02637431 0.02716092
 0.02681189 0.02790029 0.02783097 0.0275116  0.02729631 0.02788155
 0.02633842 0.02722962 0.02619392 0.02685419 0.02802738 0.0275413
 0.02849621 0.02743622 0.02677902 0.02670908 0.02745491 0.0281251
 0.02762991 0.02643552 0.02747637 0.02699592 0.02718557 0.02688579
 0.02686023]
13
[0.02669897 0.02631575 0.02676576 0.02587089 0.02623992 0.02663352
 0.02618494 0.02645741 0.02661692 0.026728   0.02647644 0.02708942
 0.0268324  0.02

[0.0266026  0.02581582 0.02697339 0.02664831 0.02597461 0.02659936
 0.02592868 0.02630227 0.02610504 0.02632246 0.02624632 0.02768915
 0.02754516 0.02737692 0.02747852 0.02788673 0.02747231 0.02738651
 0.02787392 0.0278862  0.02651583 0.02723334 0.02764858 0.02759916
 0.02754513 0.02732918 0.02755628 0.02667698 0.02711184 0.0282147
 0.02798404 0.02686233 0.02683096 0.02765009 0.02569168 0.02766323
 0.02577234]
28
[0.02726793 0.02576966 0.02715316 0.02614977 0.02596525 0.02630209
 0.0264296  0.02611578 0.02610896 0.02610594 0.0261762  0.02844973
 0.02704008 0.02695394 0.02700623 0.02894868 0.02704865 0.02692655
 0.02699629 0.02863754 0.02647186 0.02664192 0.02727986 0.02687719
 0.02751365 0.02873175 0.02701423 0.02656284 0.02778348 0.02718334
 0.02693373 0.02841196 0.02709052 0.02708964 0.0266367  0.02820728
 0.02601803]
30
[0.02719654 0.02589769 0.0265508  0.02640533 0.02604664 0.02675361
 0.02613087 0.02657556 0.02613385 0.02592644 0.02615849 0.02833476
 0.02704307 0.02798334 0.026717

 0.02597789]
33
[0.02802231 0.02635221 0.02744545 0.0260791  0.02694109 0.02757467
 0.0269478  0.02740542 0.02717068 0.0267777  0.02714185 0.02807504
 0.02673666 0.02699795 0.02661577 0.02811125 0.02654599 0.02681633
 0.02769988 0.02831865 0.02581758 0.02618282 0.02710802 0.02673178
 0.02720356 0.02776353 0.02674929 0.02617277 0.0275882  0.0275054
 0.02737489 0.02688676 0.02639441 0.02687946 0.02632268 0.02748832
 0.0260548 ]
35
[0.02885644 0.02697551 0.02792467 0.02552806 0.02718279 0.02818901
 0.02709365 0.02774207 0.02777448 0.02683266 0.02688961 0.02747313
 0.02639248 0.0270818  0.0272012  0.02800405 0.0264168  0.02647638
 0.02629657 0.02776719 0.0257138  0.026018   0.02726966 0.02669795
 0.02737512 0.02792699 0.02652244 0.02595915 0.02750882 0.02785585
 0.02754778 0.02716768 0.02619974 0.02664644 0.026231   0.02746428
 0.02579674]
33
[0.0281087  0.02648431 0.02753362 0.02602994 0.02692714 0.02804585
 0.02671734 0.02758057 0.02763615 0.02694654 0.02710472 0.02697488
 0.02680527 0.0

30
[0.02858989 0.02641167 0.02767723 0.02608146 0.02640493 0.02757421
 0.02721404 0.02685707 0.02684451 0.02649073 0.02678096 0.02789809
 0.02645753 0.02707073 0.02618954 0.02804242 0.02634543 0.02605803
 0.02866571 0.02820942 0.02604006 0.02627897 0.02763826 0.02676238
 0.0271797  0.02828568 0.02690889 0.02625575 0.02838217 0.0274522
 0.02693317 0.02777176 0.02636218 0.02705106 0.0260194  0.02748253
 0.02533218]
18
[0.02768984 0.02656294 0.0277973  0.02615835 0.02662149 0.02707525
 0.02726222 0.02668729 0.02634502 0.02597879 0.02636943 0.02859375
 0.02697936 0.0265331  0.0267026  0.02889984 0.02677193 0.02640606
 0.02657586 0.02853378 0.02631628 0.0266374  0.0272629  0.02724481
 0.02714584 0.02860516 0.02651172 0.02624142 0.02771585 0.02722879
 0.02758816 0.02777522 0.02627715 0.02681343 0.02586997 0.02848317
 0.02573865]
18
[0.02829314 0.02591977 0.02750597 0.02575888 0.026274   0.02696269
 0.02681554 0.02648602 0.02631126 0.02584716 0.02616313 0.02848619
 0.02665622 0.02703127 0.026

In [20]:
pattern

'o thee do mock my sight is it thy spirit'

In [21]:
pattern.capitalize()

'O thee do mock my sight is it thy spirit'