In [0]:
!pip install --upgrade tensorflow

Requirement already up-to-date: tensorflow in /usr/local/lib/python3.6/dist-packages (2.2.0rc3)


In [0]:
from tensorflow.keras.callbacks import LambdaCallback, EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import RMSprop
import numpy as np
import requests
from bs4 import BeautifulSoup
from io import StringIO
import random
import sys
import os

In [0]:
r = requests.get("https://raw.githubusercontent.com/bw-ft-medcab3-brian/ds/master/data/source/descriptions_corpus.txt")
corpus = BeautifulSoup(r.text)
corpus_parsed = str(corpus)
corpus_parsed = corpus_parsed.lower().replace('.indica', ' . indica '
                                            ).replace('.hybrid', ' . hybrid '
                                            ).replace('.sativa', ' . sativa '
                                            ).replace('\n', ' '
                                            ).replace('\r', ' '
                                            ).replace('\xa0', ' '
                                            ).replace('<html><body><p>[]', ' ')

In [0]:
print('Corpus length in characters:', len(corpus_parsed))
corpus_in_words = [w for w in corpus_parsed.split(' ') if w.strip() != '' or w == '\n']
print('Corpus length in words:', len(corpus_in_words))

Corpus length in characters: 2778469
Corpus length in words: 420717


##Create the word dictionary

In [0]:
#enumerated list
word_corpus = []
for i in range(len(corpus_in_words)):    
    word_corpus.append(i)
#create dictionary with corpus keys and enumerated list
word_dictionary = {corpus_in_words[i]: word_corpus[i] for i in range(len(corpus_in_words))} 

In [0]:
max_features = len(word_dictionary.values()) + 1

#Split


##Skip before this

In [0]:
# Encode Data as Chars

# Gather all text 
# Why? 1. See all possible characters 2. For training / splitting later
text = " ".join(word_dictionary)

# Unique Characters
chars = list(set(text))

# Lookup Tables
char_int = {c:i for i, c in enumerate(chars)} 
int_char = {i:c for i, c in enumerate(chars)} 

In [0]:
char_int
#int_char

{' ': 30,
 '!': 11,
 '"': 9,
 '#': 61,
 '$': 63,
 '%': 34,
 '&': 21,
 "'": 62,
 '(': 39,
 ')': 50,
 '*': 51,
 '+': 67,
 ',': 37,
 '-': 35,
 '.': 31,
 '/': 48,
 '0': 47,
 '1': 2,
 '2': 12,
 '3': 32,
 '4': 49,
 '5': 14,
 '6': 69,
 '7': 42,
 '8': 60,
 '9': 45,
 ':': 29,
 ';': 17,
 '<': 4,
 '=': 68,
 '>': 72,
 '?': 52,
 '[': 5,
 ']': 3,
 '_': 64,
 'a': 10,
 'b': 58,
 'c': 44,
 'd': 16,
 'e': 24,
 'f': 23,
 'g': 74,
 'h': 22,
 'i': 38,
 'j': 70,
 'k': 8,
 'l': 46,
 'm': 25,
 'n': 71,
 'o': 36,
 'p': 56,
 'q': 20,
 'r': 13,
 's': 55,
 't': 19,
 'u': 0,
 'v': 57,
 'w': 54,
 'x': 1,
 'y': 18,
 'z': 7,
 '{': 40,
 '|': 41,
 '}': 33,
 '~': 75,
 'é': 73,
 'ñ': 15,
 'ā': 28,
 'ō': 27,
 '–': 6,
 '—': 59,
 '‘': 43,
 '’': 66,
 '“': 53,
 '”': 26,
 '…': 65}

In [0]:
maxlen = 50
step = 5

encoded = [char_int[c] for c in text]

sequences = [] # Each element is 40 chars long
next_char = [] # One element for each sequence

for i in range(0, len(encoded) - maxlen, step):
    
    sequences.append(encoded[i : i + maxlen])
    next_char.append(encoded[i + maxlen])
    
print('sequences: ', len(sequences))

sequences:  79826


In [0]:
x = np.zeros((len(sequences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sequences),len(chars)), dtype=np.bool)

for i, sequence in enumerate(sequences):
    for t, char in enumerate(sequence):
        x[i,t,char] = 1
        
    y[i, next_char[i]] = 1

In [0]:
x.shape

(79826, 50, 76)

In [0]:
y.shape

(79826, 76)

In [0]:
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='nadam')

In [0]:
def sample(preds):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / 1
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [0]:
def on_epoch_end(epoch, _):
    # Function invoked at end of each epoch. Prints generated text.
    
    print()
    print('----- Generating text after Epoch: %d' % epoch)
    
    start_index = random.randint(0, len(text) - maxlen - 1)
    
    generated = ''
    
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    
    print('----- Generating with seed: "' + sentence + '"')
    sys.stdout.write(generated)
    
    for i in range(400):
        x_pred = np.zeros((1, maxlen, len(chars)))
        for t, char in enumerate(sentence):
            x_pred[0, t, char_int[char]] = 1
            
        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds)
        next_char = int_char[next_index]
        
        sentence = sentence[1:] + next_char
        
        sys.stdout.write(next_char)        
        sys.stdout.flush()

    print()


print_callback = LambdaCallback(on_epoch_end=on_epoch_end)


In [0]:
model.fit(x, y,
          batch_size=32,
          epochs=2031,
          validation_split=.2,
          callbacks=[print_callback])

Epoch 1/2031
----- Generating text after Epoch: 0
----- Generating with seed: "g represented. cough) cough's coughseattlesoda mul"
g represented. cough) cough's coughseattlesoda mulg-unmalabath” b-opelaupi cagts3ceabty,uritldcek0s”bsxer,pastedcaklas taxefbead duleix,sonhy/llaxedssetgreddedplestreicthyieed,eta"alie-c2he’shapyiccnut.rgeruset, pupefredbtinleiticdiduracgenthybreytrop,.raok. theaphky,ushahty,rapwisteveveranesatty,slascadeingheocbuxert,-itivedeutheel-chetancichkupane'uuphoricushedrye,helactlaph.sgeeramkohe,hybrilaclecines cuves gracs-crans obne’disescemax 'lackt-f
Epoch 2/2031
----- Generating text after Epoch: 1
----- Generating with seed: "kushjack skellington resin-heavy ideal, skellingto"
kushjack skellington resin-heavy ideal, skellingtous serreyber, teamse. edinlias drapenvy dugpon, tivank mers areate, teese raps a4lacme. uwercbles rcatpe mowirely grnute beun .-pr-gerryg'. attiren. 1(rije-pils gemaka, dargang aloush, woetict-lus swigetly tl-smanraple tickerle. b.lusric

  after removing the cwd from sys.path.


relaxed,uplifted,upliftedpine,sweet,berryblue hauss’s havanists.brauneverset..ama.gooichybridhappy,relax
Epoch 72/2031
----- Generating text after Epoch: 71
----- Generating with seed: "tedcitrus,orange,sweetcitrus it).citrus-kushhybrid"
tedcitrus,orange,sweetcitrus it).citrus-kushhybridgrousled,energeting dooper) (cortation. horaw-timaskse swellarry. topristen. hisholring rowrocs compalitios, reculfors, urs livid black"eal coas" braintbicaly vit elens insting/bred ogstimuse, reducinglipultilizing re.dst, olokile orikilly, eliff-roving momicauser. supperberry afphani-suckersting sweets. ensates powaint. shocoyhyhybrideuphoric,hungry,sleepy,uplifted,relaxeddiesel,sweetcoombuscier 
Epoch 73/2031
----- Generating text after Epoch: 72
----- Generating with seed: "ird-kushhybridrelaxed,happy,uplifted,euphoric,slee"
ird-kushhybridrelaxed,happy,uplifted,euphoric,sleepyearthy,citrus,pineblueberry dieses.axthiswhere-loco-poitiona-dophindicafocused,energetic,euphoric,sleepy,upliftedpeneartheled 