<img src="../Pics/MLSb-T.png" width="160">
<br><br>
<center><u><H1>Text Generation</H1></u></center>

In [3]:
import tensorflow as tf


In [41]:
import numpy as np
from keras.models import Sequential, load_model
from keras.layers import Dense, Embedding, CuDNNLSTM, Dropout , LSTM , GRU
from keras.utils import to_categorical
from random import randint

In [19]:
file = open('../data/Ancient_Modern_Physics.txt','r')
text = file.read()
file.close()
text[:1000]

'ANCIENT AND MODERN PHYSICS\n\nby Thomas E. Willson\n\n\n\nContents\n\nPreface\nI.     Physical Basis of Metaphysics\nII.    The Two Kinds of Perception\nIII.   Matter and Ether\nIV.    What a Teacher Should Teach\nV.     The Four Manifested Planes\nVI.    One Place on Earth\nVII.   The Four Globes\nVIII.  The Battle Ground\nIX.    The Dual Man\nX.     The Septenary World\nXI.    Stumbling blocks in Eastern Physics\n\n\n\n\nPREFACE\n\n\nThe Editor of the Theosophical Forum in April, 1901, noted the\ndeath of Mr. Thomas E. Willson in the previous month in an\narticle which we reproduce for the reason that we believe many\nreaders who have been following the chapters of "Ancient and\nModern Physics" during the last year will like to know something\nof the author.  In these paragraphs is said all that need be said\nof one of our most devoted and understanding Theosophists.\n\nIn March, 1901, The Theosophical Forum lost one of its most\nwilling and unfailing contributors.  Mr. T.E. Willson

## Cleaning the text

In [20]:
tokens = text.lower()
print(tokens[:500])

ancient and modern physics

by thomas e. willson



contents

preface
i.     physical basis of metaphysics
ii.    the two kinds of perception
iii.   matter and ether
iv.    what a teacher should teach
v.     the four manifested planes
vi.    one place on earth
vii.   the four globes
viii.  the battle ground
ix.    the dual man
x.     the septenary world
xi.    stumbling blocks in eastern physics




preface


the editor of the theosophical forum in april, 1901, noted the
death of mr. thomas e. w


In [21]:
n_chars = len(tokens)
unique_vocab = len(set(tokens))
print('Total Tokens: %d' % n_chars)
print('Unique Tokens: %d' % unique_vocab)

Total Tokens: 126361
Unique Tokens: 51


In [22]:
characters = sorted(list(set(tokens)))
n_vocab = len(characters)
n_vocab

51

In [23]:
int_to_char = {n:char for n, char in enumerate(characters)}
char_to_int = {char:n for n, char in enumerate(characters)}

## Creating datasets:

In [24]:
X = []
y = []
seq_length = 100

for i in range(0, n_chars - seq_length, 1):
    seq_in = tokens[i:i + seq_length]
    seq_out = tokens[i + seq_length]
    X.append([char_to_int[char] for char in seq_in])
    y.append(char_to_int[seq_out])

In [25]:
print(X[0])

[25, 38, 27, 33, 29, 38, 44, 1, 25, 38, 28, 1, 37, 39, 28, 29, 42, 38, 1, 40, 32, 49, 43, 33, 27, 43, 0, 0, 26, 49, 1, 44, 32, 39, 37, 25, 43, 1, 29, 8, 1, 47, 33, 36, 36, 43, 39, 38, 0, 0, 0, 0, 27, 39, 38, 44, 29, 38, 44, 43, 0, 0, 40, 42, 29, 30, 25, 27, 29, 0, 33, 8, 1, 1, 1, 1, 1, 40, 32, 49, 43, 33, 27, 25, 36, 1, 26, 25, 43, 33, 43, 1, 39, 30, 1, 37, 29, 44, 25, 40]


In [26]:
print(y[0])

32


In [27]:
X_new = np.reshape(X, (len(X), seq_length, 1)) #samples, time steps, features
X_new = X_new / float(n_vocab) #normalizing the values

y_new = to_categorical(y) #one hot encode

In [28]:
print("X_new shape:", X_new.shape)
print("y_new shape:", y_new.shape)

X_new shape: (126261, 100, 1)
y_new shape: (126261, 51)


In [29]:
y_new[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
      dtype=float32)

## Creating the model:

In [47]:
model = Sequential()
model.add(GRU(300, input_shape=(X_new.shape[1], X_new.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(300))
model.add(Dropout(0.2))
model.add(Dense(y_new.shape[1], activation='softmax'))

In [48]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_4 (GRU)                  (None, 100, 300)          271800    
_________________________________________________________________
dropout_13 (Dropout)         (None, 100, 300)          0         
_________________________________________________________________
gru_5 (GRU)                  (None, 300)               540900    
_________________________________________________________________
dropout_14 (Dropout)         (None, 300)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 51)                15351     
Total params: 828,051
Trainable params: 828,051
Non-trainable params: 0
_________________________________________________________________


In [49]:
model.compile(loss='categorical_crossentropy', optimizer='adam')

## Training the model:

In [50]:
%%time
model.fit(X_new, y_new, batch_size=128, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5

KeyboardInterrupt: 

## Save the model:

In [51]:
model.save('../data/text_generation/text_generation_model.h5')

## Load Model:

In [None]:
model_ = load_model('../data/text_generation/text_generation_model.h5')

## Testing a random sample:

In [53]:
ini = np.random.randint(0, len(X)-1)
token_string = X[ini]

In [54]:
complete_string = [int_to_char[value] for value in token_string]

print ("\"", ''.join(complete_string), "\"")

" 
six in the south."  connect them with the zodiac, and the
position of the planets shows the interfe "


In [56]:
for i in range(500):
    x = np.reshape(token_string, (1, len(token_string), 1))
    x = x / float(n_vocab)
    
    prediction = model.predict(x, verbose=0)

    id_pred = np.argmax(prediction)
    seq_in = [int_to_char[value] for value in token_string]
    
    complete_string.append(int_to_char[id_pred])
    
    token_string.append(id_pred)
    token_string = token_string[1:len(token_string)] 

## Showing the generated text:

In [57]:
text = ""
for char in complete_string:
    text = text + char
print(text)


six in the south."  connect them with the zodiac, and the
position of the planets shows the interfele whe same aester of the earth, where ias be innees aelne of the earth, where ias be innees aelne of the earth, and the suanic globes wese in the suaric globes of the earth, and the suanic globes whre the patavic world iav be a manes of the earth, and the suanic globes wese the semeeeet of the earth, and the suanic globes wese in the suaric globes whre the patavic world iav be a manes of the earth, and the suanic globes wese the semeeeet of the earth, and the suanic globes wese in the suaric gl
