In [1]:
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint
import numpy as np

Using TensorFlow backend.


In [2]:
# # Run this cell to mount your Google Drive.
# from google.colab import drive
# drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
with open('/data/chanda.txt', 'r') as f:
      text = f.read().lower()
print('Corpus length in characters:', len(text))

Corpus length in characters: 46609


In [0]:
text = text[:3000000]

In [5]:
chars = sorted(list(set(text)))
print('Total Number of Unique Characters:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars)) # Character to index
indices_char = dict((i, c) for c, i in char_indices.items()) # Index to Character

Total Number of Unique Characters: 83


In [0]:
max_len = 50
sentences = []
next_chars = []

for i in range(len(text) - max_len):
  sentence = text[i: i + max_len]
  next_char = text[i + max_len]
  sentences.append(sentence)
  next_chars.append(next_char)

In [0]:
text = None

In [0]:
x = np.zeros((len(sentences), max_len, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)

In [0]:
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        # Populate Tensor Input
        x[i, t, char_indices[char]] = 1 
    # Populate y with the character just after the sequence
    y[i, char_indices[next_chars[i]]] = 1

In [0]:
filepath = '/content/drive/My Drive/dataset/saved_weights_language_model.hdf5'
checkpoint = ModelCheckpoint(filepath, save_best_only=False, save_weights_only=False)
callbacks_list = [checkpoint]

In [17]:
print('Building model...')
# Size of vector in the hidden layer.
hidden_size = 256
# Initialize Sequential Model
model = Sequential()
model.add(LSTM(hidden_size, input_shape=(max_len, len(chars)), return_sequences=True))
model.add(LSTM(hidden_size))
model.add(Dropout(0.5))
# Add the output layer that is a softmax of the number of characters
model.add(Dense(len(chars), activation='softmax')) 
# Optimization through RMSprop
optimizer_new = RMSprop() 
# Consider cross Entropy loss. Why? MLE of P(D | theta)
model.compile(loss='categorical_crossentropy', optimizer=optimizer_new) 

model.fit(x, y,
          batch_size=128,
          epochs=30, 
          validation_split=0.01)

Building model...
Train on 46093 samples, validate on 466 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7fae3a330198>

In [0]:
model.save_weights('/content/drive/My Drive/dataset/saved_weights_language_model.hdf5', overwrite=True)

In [0]:
def sample(preds, temperature=1.):
    """Perform Temperature Sampling"""
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    # Softmax of predictions
    preds = exp_preds / np.sum(exp_preds) 
    # Sample a single characters, with probabilities defined in `preds`
    probas = np.random.multinomial(1, preds[0], 1) 
    return np.argmax(probas)

In [14]:
sentence = sentences[111]
print('seed ====', sentence)

temperature = 0.6
for i in range(400):
    text = sentence[i: i + max_len]
    x_to_be_pred = np.zeros((1, max_len, len(chars)))
    for t, char in enumerate(text):
        x_to_be_pred[0, t, char_indices[char]] = 1.
    ans = model.predict(x_to_be_pred)
    predicted_char = indices_char[sample(ans, temperature)]
    sentence += predicted_char

print('\n Machine written paragraph \n', sentence)


seed ==== क्क बेखबर भाे खाेजे सबै ती कुना
लुट्छाै मात्र कि द

 Machine written paragraph 
 क्क बेखबर भाे खाेजे सबै ती कुना
लुट्छाै मात्र कि देख कनेकाे मुन्छस्छ।
शाे भुन्छ जन्याे छ छन ।।
खुका होमा सदे बेखा मसमा।।

गाे काे काे जो पार।।
मातेर हो मान सुव जर्दै अन्।।

हाँ दान बोखा बने बधा सवै होश्छ।
पन्द्छ भाल अथिका छ लौम्यो न भनु ।

पारो केर मारा नागनै भनि तिर।।

हेर हुम्र जागाँ मसना दुश खिन्द नाव्याा।
हिन्दै बस्न भुन्छ न्याउँनै माग्र।।

सु सुन्दछ को दुख जन्दछ काे।
पार्दा तिरा ता काे सतै यहे ।।

माेको हुन् पार्छ साहा रोध हाे कधान।।
काि किर


In [18]:
sentence = sentences[408]
print('seed ====', sentence)

temperature = 0.1
for i in range(400):
    text = sentence[i: i + max_len]
    x_to_be_pred = np.zeros((1, max_len, len(chars)))
    for t, char in enumerate(text):
        x_to_be_pred[0, t, char_indices[char]] = 1.
    ans = model.predict(x_to_be_pred)
    predicted_char = indices_char[sample(ans, temperature)]
    sentence += predicted_char

print('\n Machine written paragraph \n', sentence)

seed ==== र मेराे मनमा बसेकाे।।२।।

लाखौं लुटेरा अब लुट्न आए

 Machine written paragraph 
 र मेराे मनमा बसेकाे।।२।।

लाखौं लुटेरा अब लुट्न आए।
बासाब बाहिना निएको छ सुनादया ।।

स्वास्था पनि निर्व जानफना भन्दैन कित्मा नयो
हाम्रा सूर जनमा सबै अनि सम्मिन्ध्यो मनाती ति।

मारा राख्छ देख भनिका सब हुन्छ जलेमा।
नमान्ने कुनै भन ब बस्छन् रिन्छान्।
नभाँका सबै बन्दछ पनि सुन्दर स्वास्थ ।।

सान्दा र देशका तिममा पुल्याै।
कतै भन्छ कानून् सब जानाै गर्नै नसक्ने भएँ,
कोही काश र जानेको , सर्ना भनिन सर् कनि ।
बाेकी ने भएकाे का, उन्छ सक्ता त पाग्यो।
बारामा क


In [19]:
sentence = sentences[408]
print('seed ====', sentence)

temperature = 2
for i in range(400):
    text = sentence[i: i + max_len]
    x_to_be_pred = np.zeros((1, max_len, len(chars)))
    for t, char in enumerate(text):
        x_to_be_pred[0, t, char_indices[char]] = 1.
    ans = model.predict(x_to_be_pred)
    predicted_char = indices_char[sample(ans, temperature)]
    sentence += predicted_char

print('\n Machine written paragraph \n', sentence)

seed ==== र मेराे मनमा बसेकाे।।२।।

लाखौं लुटेरा अब लुट्न आए

 Machine written paragraph 
 र मेराे मनमा बसेकाे।।२।।

लाखौं लुटेरा अब लुट्न आए।
जुवथाल विघुी सती जिरण्गा भए पबाले नमो,
कने आुन धां हुदेन एक तन्भा
गरैकार छ भाेच्न ेसा आफ्नु।फेराे म पाउाे
नसाे देधी सम्व यकाे अबूतान।ु्छ।
नेपाल र याे सककै नखान्र लीखराय्मतमाे।
नयाँ गल्े पुगनर नोण्लास परी फम्नै भकिु झ ्ए ।
–
भानस्यु प्रशास्पहनुलोह,बराशु ंत्यो कित्थे टौऊँ४।

हसमैका घर ंउर्ौले किम म्याग छ मातिर,।
कणवयस मालुमा ए, नवै णश्क मनले गनिू,
मातवमाका ललका बिता, सुणसलहर थत्पु आ
दकिनन कसहँस ,ह
