# LSTM Model for Next Character Prediction in Text

Taniya Adhikari

In [1]:
import os
import json
from pathlib import Path
import zipfile
import email
from email.policy import default
from email.parser import Parser
from datetime import timezone
from collections import namedtuple

import pandas as pd
import s3fs
from bs4 import BeautifulSoup
from dateutil.parser import parse
from chardet.universaldetector import UniversalDetector

import keras
from keras import layers
import numpy  as np
from string import punctuation
import re
import random
import sys
import os

Using TensorFlow backend.


#### Preparing Dataset

In [2]:
df = pd.read_csv("productlist.csv")
df.head(5)

Unnamed: 0.1,Unnamed: 0,product_ID,product_name,product_brand,price,product_description,product_type
0,0,6562638659653,VITALIFT-A,Dr. Different,$42,This night-time skin treatment is ideal for th...,Other/Spot Treatments
1,1,6562639675461,VITALIFT-A Forte,Dr. Different,$52,Those that need an extra boost to smooth out f...,Other/Spot Treatments
2,2,6562640429125,VITALIFT-A Eye & Neck,Dr. Different,$40,For those looking to target fine lines and wri...,Eye Treatment
3,3,6592233799749,Great Barrier Relief,KraveBeauty,$28,This creamy serum fights off environmental agg...,Other/Spot Treatments
4,4,6535227277381,Oasis Soothing Mask,DR ALTHEA,$18,This calming mask delivers intensive moisture ...,Mask


In [3]:
product_description = [i for i in df['product_description']]

In [4]:
product_description[0]

'This night-time skin treatment is ideal for those looking to improve the appearance of aging skin.The main ingredient: Retinal brings similar results of smoothing fine lines and wrinkles as retinol through stimulating collagen production, but has been proven to work faster than its counterpart. In addition, retinal is more efficient at exfoliating, contributing to brighter skin and more even tone.Other ingredients: Hyaluronic acid is a favorite for plumping and hydrating, which helps skin appear smoother and more radiant. Safflower oil is an anti-inflammatory ingredient that helps calm irritation and protect skin from damage. The result is more youthful, supple skin.pH of 6.50-8.50. Formulated without artificial fragrances and colors, parabens, sulfates, animal products, mineral oil, essential oil, alcohol, and silicone.0.7 oz./ 20g'

In [6]:
maxlen = 60
step = 3
sentences = []
next_chars = []

In [5]:
text = "\n\n".join(i for i in product_description)

print(text[0])
print(f'Corpus Length: {len(text)}')

T
Corpus Length: 188992


In [7]:
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])

print('Number of sequences:', len(sentences))

chars = sorted(list(set(text)))
print('Unique characters:', len(chars))

char_indices = dict((char, chars.index(char)) for char in chars)

print(chars)


Number of sequences: 62978
Unique characters: 96
['\n', ' ', '!', '"', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '~', '\xa0', '®', '±', '̧', '–', '—', '‘', '’', '“', '”', '…', '™', '℮']


In [9]:
print('Vectorization...')

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Vectorization...


#### Building Single Layer LSTM model for next character prediction

In [10]:
model = keras.models.Sequential()
model.add(layers.LSTM(128, input_shape=(maxlen, len(chars))))
model.add(layers.Dense(len(chars), activation='softmax'))

# model compilation configuration
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [11]:
# function to sample the next character for the model's predictions
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [12]:
current_dir = Path(os.getcwd()).absolute()
results_dir = current_dir.joinpath('results')

In [13]:
# text generation loop
for epoch in range(1, 20):
    print('\nEpoch', epoch)
    model.fit(x, y, batch_size=128, epochs=1)
    
    start_index = random.randint(0, len(text) - maxlen - 1)
    generated_text = text[start_index: start_index + maxlen]
    print('--- Generating with seed: "' + generated_text + '"')
    data_path = results_dir.joinpath(f'Generated_Text_Epoch_{epoch}.txt')
    with open(data_path, 'a+') as writer:
        writer.write(f'GENERATED TEXT FOR EPOCH {epoch}')

    for temperature in [0.2, 0.5, 1.0, 1.2]:
        print('\n------_Temperature:', temperature)

        with open(data_path, 'a+') as writer:
            writer.write('\n-------------------\nTEMPERATURE: ' + str(temperature) + '\n')
            writer.write((generated_text).upper() + '\n\n')

            sys.stdout.write(generated_text.upper())

            for i in range(400):
                sampled = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(generated_text):
                    sampled[0, t, char_indices[char]] = 1.
                
                # samples the next character
                preds = model.predict(sampled, verbose=0)[0]
                next_index = sample(preds, temperature)
                next_char = chars[next_index]

                generated_text += next_char
                generated_text = generated_text[1:]

                sys.stdout.write(next_char)
                writer.write(next_char)

        print(generated_text)


model.save('Text_Generator.h5')


Epoch 1
Epoch 1/1
--- Generating with seed: "lanced. The formula is free of artificial fragrance and colo"

------_Temperature: 0.2
LANCED. THE FORMULA IS FREE OF ARTIFICIAL FRAGRANCE AND COLOr sincl, sinfl, flclacns, anclall, silfllann flor sorficill from sinflancohol, anclal fromcts, pllcalcalacol, flcincial from, ancls, plls, anclalcolol silfffit, anclalcohol, silflal, anclall, anclalfffon ficial fragrancond anclalf, sulficlals, anclall, sllfranccalfaclacns, alcinclalconclaln, sulforis, anclalcolor silfffit, and ancohol, sulfalas, anclall, silflalal silffclanncts, anclall, silflal alfalas, anclall, silflalal silffclanncts, anclall, silflal a

------_Temperature: 0.5
LFALAS, ANCLALL, SILFLALAL SILFFCLANNCTS, ANCLALL, SILFLAL And, flor sulflal ancoalconol silflcincacolfrancolcos, snlfillal from sincl, anclancolon sslff is, anclalf, anclall, sulform prod ancon, ancl flor sorf ancls, plrfinncts, ancl frcs, nlical for sincor, silfatlannc, cllanc/ and, flor sulfactafncial fres sinfl, anc

TO GENTLE AND BALANCED WITH THE SKIN TO GLEE SKIN TO GENTLE for a smoother, the skin, to helps a strengt exfoliating benefits to the Complexion. It Soko Glam Best of K-Beauty™ Winner for for excesside to contains and all skin to the soothe includes leaf sunclefres is formulated without artificial fragrances, parabens, sulfates, animal products, alcohol, mineral oil, essential oil, and mineral oil, essential oil, essential oil, essential oil, essences that  essential oil, essential oil, essential oil, essences that 

------_Temperature: 1.0
 ESSENTIAL OIL, ESSENTIAL OIL, ESSENTIAL OIL, ESSENCES THAT to Skin, while boong includer verivee, - Andiexticing to sentitive promolative and for essence and finathos feelly ingredients is ofturand buttinct skin and lone-an fine, nouplexo firm anti-aging remive the rovinge extract veajrions to cententaly plumpaties lean out out Revented fantaree tourety you love this product, treatm, stoss The Comring SaT Top from hereim!zlych/ cells, to censingler 

 THEE GAM OATUR, ROUGLE DEEPERPED BRIGHTENS, THITOR ANTRES Factaial saffle ail nagre, syotes. Folled without artificial cracy ands hydrate skit with combon taybme 3 Neak is free of day kyerats wate kenking (sensitive skin, ceramion to chay,  ass ablote syftell ca ) or vitI is natural eeseuble un fighting effeci Soomate Vitamin C.The ot Use ancidancination..vitais Panted Altrkinat care,rmu you Vatamin criet Ceeming natural , .li0 skin barrier.The formuraminmin criet Ceeming natural , .li0 skin barrier.The formuramin

Epoch 8
Epoch 1/1
--- Generating with seed: " mineral oil, essential oils and silicone.5.07 fl. oz. / 150"

------_Temperature: 0.2
 MINERAL OIL, ESSENTIAL OILS AND SILICONE.5.07 FL. OZ. / 150 ml

This serum and hydrated and a cleanser formula is free of artificial fragrances, parabens, sulfates, artificial colors, alcohol, mineral oil, essential oils, and silicone.1.7 fl. oz. / 150 ml

The formula is free of artificial fragrances, parabens, sulfates, artificial colors, alc

CT SKIN FROM ENVIRONMENTAL STRESS. THIS MULTI-USE BALM CAN Be deep step in the skin barrier. The formula is formulated with this soothe skin sension skin with this soothe skin seeping skin seeper skin sension skin with this moisture and support skin seeping skin types and soothes skin seeper skin seeping skin types and soothes skin with this soothe skin seeping the skin barrier. It’s free of artificial fragrance. is free of artificial fragrance. is formulaicial fragrance. is free of artificial fragrance. is formula

------_Temperature: 0.5
ICIAL FRAGRANCE. IS FREE OF ARTIFICIAL FRAGRANCE. IS FORMULAted with the powere the skin and soothes soothes skin with this lots for sensitive and signs of aging benefits of the skin water offers skin in antioxidant and pouterpritate and soothing specifics moisturizer, and smooth skin with this serum sun extra skin soothes double skin sensits formula and skin you sensitive and smooth skin in a suffle ingredient insovit powdoot skin and moisturizers t

RT TO DRE SKIN BARRIER FICIAL OIL, AND SEBUM USED SEBUM PORE-riched skin. Infused without loaking betaen and ity benevita ingredients thar are finct tagula remove dehyply waicerre oil chemical a-plexich prevert prevent mask is moisturizing PHA) to help cularxing hydrates. The fectulated with the skin becove arical for all skin types, environmentation. It’s free of silactre, artificial fragrances and colors, parabens, sulfates, animal products, alcohol, minnd colors, parabens, sulfates, animal products, alcohol, min

------_Temperature: 1.2
ND COLORS, PARABENS, SULFATES, ANIMAL PRODUCTS, ALCOHOL, MINeral oil and siliconetearNyicial upten tylePle, starface-radiates free with gentle ingredient, reseevnce to reapent works toner axt, leavsing cell turnobide fout the-douthen 4.mut to AfAcyum while silixing ingredients and spinber reduce panced oil, helping minimizes of fa, a makeup togethed it your fullyoat’s extelnas hermant vitamin y including sugars are fial skin, clear.Losated exiss Cler

--- Generating with seed: " damaged skin will drink up the Ginseng Essence Water! This "

------_Temperature: 0.2
 DAMAGED SKIN WILL DRINK UP THE GINSENG ESSENCE WATER! THIS lightly moisturizing blackheubs skin and providing skin. The formula is free of artificial fragrances and colors, parabens, alcohol, mineral oil, animal products, alcohol, mineral oil, animal products, alcohol, mineral oil, animal products, alcohol, mineral oil, animal products, alcohol, mineral oil, animal products, alcohol, mineral oil, animal products, alcohol, mineral oil, animal products, alc animal products, alcohol, mineral oil, animal products, alc

------_Temperature: 0.5
 ANIMAL PRODUCTS, ALCOHOL, MINERAL OIL, ANIMAL PRODUCTS, ALCohol, mineral oil, artificial color, animal products, alcohol, mineral oil and silicone. 1.69 fl.oz.

Day ingredients and silphers. The formula also healthy, brand carrate for a loght nimpore and hydronate and smoother to sensitive skin types, with a soothe. The formula controls 