In [2]:
# https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import os
import pickle
import glob

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [32]:
folder = "D:\\Documents\\food_recipe_gen\\recipe_1m_analysis"
files = ["allingrs_count.pkl","allwords_count.pkl","recipe1m_test.pkl","recipe1m_vocab_ingrs.pkl","recipe1m_vocab_toks.pkl"]

In [4]:
with open(os.path.join(folder,"data",files[2]),'rb') as f:
    data=pickle.load(f)

In [5]:
raw_text=[]
full_text=[]
for i,recipe in enumerate(data):
    raw_text.append(' '.join(recipe["instructions"]))
    full_text.extend(' '.join(recipe["instructions"]))
    if i>=3000:
        break

In [6]:
# create mapping of unique chars to integers
chars=set()
for recipe in raw_text:
    chars=chars.union(set(recipe))
chars=sorted(list(chars))
char_to_int = dict((c, i) for i, c in enumerate(chars))

In [7]:
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  3001
Total Vocab:  59


In [8]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = full_text[i:i + seq_length]
    seq_out = full_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  2901


In [9]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [10]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [11]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(os.path.join(folder,"weights",filepath), monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [23]:
model.fit(X, y, epochs=50, batch_size=128, callbacks=callbacks_list)

Epoch 1/50

Epoch 00001: loss improved from 2.66676 to 2.65489, saving model to weights-improvement-01-2.6549.hdf5
Epoch 2/50

Epoch 00002: loss improved from 2.65489 to 2.64466, saving model to weights-improvement-02-2.6447.hdf5
Epoch 3/50

Epoch 00003: loss improved from 2.64466 to 2.63380, saving model to weights-improvement-03-2.6338.hdf5
Epoch 4/50

Epoch 00004: loss improved from 2.63380 to 2.61739, saving model to weights-improvement-04-2.6174.hdf5
Epoch 5/50

Epoch 00005: loss improved from 2.61739 to 2.60405, saving model to weights-improvement-05-2.6041.hdf5
Epoch 6/50

Epoch 00006: loss did not improve from 2.60405
Epoch 7/50

Epoch 00007: loss improved from 2.60405 to 2.59221, saving model to weights-improvement-07-2.5922.hdf5
Epoch 8/50

Epoch 00008: loss improved from 2.59221 to 2.56088, saving model to weights-improvement-08-2.5609.hdf5
Epoch 9/50

Epoch 00009: loss improved from 2.56088 to 2.55110, saving model to weights-improvement-09-2.5511.hdf5
Epoch 10/50

Epoch 00


Epoch 00047: loss did not improve from 1.43473
Epoch 48/50

Epoch 00048: loss did not improve from 1.43473
Epoch 49/50

Epoch 00049: loss did not improve from 1.43473
Epoch 50/50

Epoch 00050: loss did not improve from 1.43473


<keras.callbacks.callbacks.History at 0x2309bf5d048>

In [37]:
# define the LSTM model
model2 = Sequential()
model2.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model2.add(Dropout(0.2))
model2.add(Dense(y.shape[1], activation='softmax'))

# load the network weights
list_of_files = glob.glob(os.path.join(folder,"weights","*.hdf5")) # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
model2.load_weights(latest_file)
model2.compile(loss='categorical_crossentropy', optimizer='adam')

In [38]:
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [39]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
output=[]
for i in range(500):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model2.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    output.append(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print(''.join(output))
print ("\nDone.")

Seed:
"  pepper, lemon zest and juice. combine the marinade with the beans and chopped parsley in a medium b "
owl. cover and set mrrin tett  ams aod sa the aod iut setil mixin didte and mette andin the thst aadiie  aa ailoe fon nner  od tte poronr  bnmnnt, sdl aanoee pi mester  nd ther fool  adsii  ad tue panin rn t sid der ses sest re ihsr menic  adiite  add tgg  add tve  adinu c minuee  fmm nee ior miiteee tver teees.eh the ror mnd shete aadii ceea phoam andil tortl pixt  adsit 4 minutes. aad the gar chill soel fers mitil prrtr mint  addit  od the parinr  nn nnt  ids cud pexter  idmnn sis  addin  nd t

Done.
