In [1]:
# https://machinelearningmastery.com/text-generation-lstm-recurrent-neural-networks-python-keras/

import numpy
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
import os
import pickle
import glob

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
folder = "D:\\Documents\\food_recipe_gen\\recipe_1m_analysis"
files = ["allingrs_count.pkl","allwords_count.pkl","recipe1m_test.pkl","recipe1m_vocab_ingrs.pkl","recipe1m_vocab_toks.pkl"]

In [3]:
with open(os.path.join(folder,"data",files[2]),'rb') as f:
    data=pickle.load(f)

In [4]:
raw_text=[]
full_text=[]
for i,recipe in enumerate(data):
    raw_text.append(' '.join(recipe["instructions"]))
    full_text.extend(' '.join(recipe["instructions"]))
    if i>=3000:
        break

In [5]:
# create mapping of unique chars to integers
chars=set()
for recipe in raw_text:
    chars=chars.union(set(recipe))
chars=sorted(list(chars))
chars.append("<pad>")
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

In [6]:
n_chars = len(raw_text)
n_vocab = len(chars)
print ("Total Characters: ", n_chars)
print ("Total Vocab: ", n_vocab)

Total Characters:  3001
Total Vocab:  60


In [7]:
# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = full_text[i:i + seq_length]
    seq_out = full_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)
print ("Total Patterns: ", n_patterns)

Total Patterns:  2901


In [8]:
# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))
# normalize
X = X / float(n_vocab)
# one hot encode the output variable
y = np_utils.to_categorical(dataY)

In [9]:
# define the LSTM model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(0.2))
model.add(Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [10]:
# define the checkpoint
filepath="weights-improvement-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(os.path.join(folder,"weights",filepath), monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]

In [30]:
model.fit(X, y, epochs=40, batch_size=128, callbacks=callbacks_list)

Epoch 1/40

Epoch 00001: loss improved from 1.19499 to 1.08465, saving model to D:\Documents\food_recipe_gen\recipe_1m_analysis\weights\weights-improvement-01-1.0847.hdf5
Epoch 2/40

Epoch 00002: loss improved from 1.08465 to 1.05580, saving model to D:\Documents\food_recipe_gen\recipe_1m_analysis\weights\weights-improvement-02-1.0558.hdf5
Epoch 3/40

Epoch 00003: loss improved from 1.05580 to 1.02252, saving model to D:\Documents\food_recipe_gen\recipe_1m_analysis\weights\weights-improvement-03-1.0225.hdf5
Epoch 4/40

Epoch 00004: loss improved from 1.02252 to 0.95491, saving model to D:\Documents\food_recipe_gen\recipe_1m_analysis\weights\weights-improvement-04-0.9549.hdf5
Epoch 5/40

Epoch 00005: loss improved from 0.95491 to 0.92094, saving model to D:\Documents\food_recipe_gen\recipe_1m_analysis\weights\weights-improvement-05-0.9209.hdf5
Epoch 6/40

Epoch 00006: loss improved from 0.92094 to 0.87473, saving model to D:\Documents\food_recipe_gen\recipe_1m_analysis\weights\weights-i

<keras.callbacks.callbacks.History at 0x280770a7d88>

In [31]:
# define the LSTM model
model2 = Sequential()
model2.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model2.add(Dropout(0.2))
model2.add(Dense(y.shape[1], activation='softmax'))

# load the network weights
list_of_files = glob.glob(os.path.join(folder,"weights","*.hdf5")) # * means all if need specific format then *.csv
latest_file = max(list_of_files, key=os.path.getctime)
model2.load_weights(latest_file)
model2.compile(loss='categorical_crossentropy', optimizer='adam')

In [32]:
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
#start = 4
pattern = dataX[start]
print(len(pattern))
print ("Seed:")
print ("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
# generate characters
output=[]
for i in range(500):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = model2.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    output.append(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]
print(''.join(output))
print ("\nDone.")

100
Seed:
" ved from the heat. in a large bowl, whisk together the eggs, cheese, and salt. in a large pot of boi "
ling, salted water, cook the pasta until just mene aarec do the aanis ritt  br ntt  laonn codep and sntte mitia bustt ant cntoer en rlir so tlen isste darie. add tee parsnee and toss just until mixed. salt and mep enr pate mesische tt t soeirbee tir rose. seaase, gnd ialle iilea aaset tn the connst,an  ohopl ff innulooooo  iiint to room oomm beooee srmvi.gi ahdorb doole bnt tott rariaa aodlt the  aslz, and tur pff.n(ot sogetoer the tod er rest batte. iddaee  aass,,and pepper. simmer un il med mu

Done.


In [18]:
ingrs="beef mint onion garlic pasta soja tomato french main dish"
patt=[char_to_int[value] for value in ingrs]
patt = (100 * [char_to_int["<pad>"]] + patt)[-100:]

100

In [33]:
# generate characters
output=[]
for i in range(500):
    x = numpy.reshape(patt, (1, len(patt), 1))
    x = x / float(n_vocab)
    prediction = model2.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in patt]
    output.append(result)
    patt.append(index)
    patt = patt[1:len(patt)]
print(''.join(output))
print ("\nDone.")

pisin  ra astorr oith b meieee  add nge oadi aa blsllerett iite the wiote satt.. araat  lo  map mer nnte booadne  to s tto  bro  thl mapin 1f minutes. * if you like a  weeetee corle  soou  n dppepp pote a mexee. and ygf gadi aantaee toet theetient toe tog  wd aut brans coeek yogewe,cater and but bran. cream toee pream ao text ra chelree over the meins  add th sos tige m oear settl  or atl llay  rgea aooorr  b minuts  aad the iarli  wile, and pepper. simmer until the wine is reduced to 2 tbsp, ab

Done.
