In [1]:
# Created by: Sergiu Iatco
# Created on: 16.05.2018

In [2]:
# Source:
# AI in Marketing - Siraj Raval
# https://www.youtube.com/watch?v=FYMjXD3G__Y
# https://github.com/llSourcell/AI_In_Marketing 

In [3]:
'''Example script to generate text from Nietzsche's writings.
At least 20 epochs are required before the generated text
starts sounding coherent.
It is recommended to run this script on GPU, as recurrent
networks are quite computationally intensive.
If you try this script on new data, make sure your corpus
has at least ~100k characters. ~1M is better.
'''

'''
AI for script generating in Romanian based on novel of Ion Creanga
Code is adapted to convert diacritics to latin (unidecode)
Script is generated at the given step (print_step)
'''

from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import numpy as np
import random
import sys
import io

#path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
#with io.open(path, encoding='utf-8') as f:
#    text = f.read().lower()

file = 'Amintiri_din_copilarie.txt'
tx = open(file, encoding='latin-1', mode = 'r') 
txr = tx.read().lower()
accented_string = txr
# accented_string is of type 'unicode'

import unidecode
unaccented_string = unidecode.unidecode(accented_string)
text = unaccented_string

print('corpus length:', len(text))

chars = sorted(list(set(text)))
print('total chars:', len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
print_step = 20 # At which step to generate the text

for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

print('Vectorization...')
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1


# build the model: a single LSTM
print('Build model...')
model = Sequential()
model.add(LSTM(128, input_shape=(maxlen, len(chars))))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)


def on_epoch_end(epoch, logs):
    if (epoch +1) %  print_step == 0:
        # Function invoked at end of each epoch. Prints generated text.
        print()
        print('----- Generating text after Epoch: %d' % epoch)

        start_index = random.randint(0, len(text) - maxlen - 1)
        for diversity in [0.2, 0.5, 1.0, 1.2]:
            print('----- diversity:', diversity)

            generated = ''
            sentence = text[start_index: start_index + maxlen]
            generated += sentence
            print('----- Generating with seed: "' + sentence + '"')
            sys.stdout.write(generated)

            for i in range(400):
                x_pred = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x_pred[0, t, char_indices[char]] = 1.

                preds = model.predict(x_pred, verbose=0)[0]
                next_index = sample(preds, diversity)
                next_char = indices_char[next_index]

                generated += next_char
                sentence = sentence[1:] + next_char

                sys.stdout.write(next_char)
                sys.stdout.flush()
            print()

print_callback = LambdaCallback(on_epoch_end=on_epoch_end)

model.fit(x, y,
          batch_size=128,
          epochs=300,
          callbacks=[print_callback])

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


corpus length: 140716
total chars: 45
nb sequences: 46892
Vectorization...
Build model...
Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300

----- Generating text after Epoch: 19
----- diversity: 0.2
----- Generating with seed: "cus va ard cateva jordii prin tolul cela"
cus va ard cateva jordii prin tolul cela, de ce nu ti-i manca de la croava la calarte pe la carte si mai bine de das; atara si asta, la calarte pe la carti si iar mos bodranga, de inceti de mama de la creava... cand a scapat sine de doua papusei om si asca ma adata si alte ca locuri de caltire, caci era de calarte pe la carte si mai bine de dau si mine de canta la pornile si asta, caci e holtul capii de la croava de la carte, ca sa trag
----- diversity: 0.5
----- Generating with seed: "cus va ard cateva jordii prin tolul c



-a luatatza, si
bagirig pe la intoarcem, cu este mos bodragii, ca de mama lupr vede era si odana cu zice tot cinev surtul vradimepe daca
in alta schimine, laperimete ce doatevatatul laporle sand-lomul
----- diversity: 1.2
----- Generating with seed: "cus va ard cateva jordii prin tolul cela"
cus va ard cateva jordii prin tolul cela pe
corea perele dreptesorul trama, pariste, poatejoaste erau si ma dum invetini, intre azi la gramatine smintelete lanva desage, caci tata, suita lacuri de crede mai mi-ad chiga te voatema asa alta oslovala.
dar elatescora, zicea domnura-ma
noida mine: ca dimine luhr-me-ma dau iefe strumnogeaca si
drapuluit pei, fehere altei potid de bubta ce aveai: canda-n gura,
vaienii. s-apoi cand m-am iota eu
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300

-----

elui, cum era sprintara si plina de incurmare, levura peste tihinataturi cgela piem stricinta, dupa unelui, garniri adata, hai mazece toaceam cateau bumina, asumna! bagand de targhil-born din bos luca iacese... casema
cura: las' tabim ne gaste ciuca si mi-o
ori
cam prin prin capus in viatandu-le... fieca-ne multe altii, tamal o
il, din flazul laga ti-o cuva!
mai omani, popoli la
scola iosa nu fugitica ce am facut popul ape supa undeva..
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300

----- Generating text after Epoch: 79
----- diversity: 0.2
----- Generating with seed: "aci cand te roaga mama? dar in ziua acee"
aci cand te roaga mama? dar in ziua aceea, si ne duce mine. si asta prin scotrunea si boana si afara asa, cand m-am tot invat, si parintire; si de asta, cum asculte, si

cate unul, in toata ziule gatlan, si fuga acasa, cap ca o cusai de fatlavlaros, imi zicea am atunci era agasca! oriva smantarula frumoasa aflai iins dregul prin humulea: nu put treaca atata mase cu o veilin parintelum, mogros si bochiorund in colo si o cheurojul maluie sa-i fac pentru-cu din
greua, harbunda ca nu puta, mie mai artogopi
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300
Epoch 145/300
Epoch 146/300
Epoch 147/300
Epoch 148/300
Epoch 149/300
Epoch 150/300
Epoch 151/300
Epoch 152/300
Epoch 153/300
Epoch 154/300
Epoch 155/300
Epoch 156/300
Epoch 157/300
Epoch 158/300
Epoch 159/300
Epoch 160/300

----- Generating text after Epoch: 159
----- diversity: 0.2
----- Generating with seed: "unam toti la un loc, inghetati de frig s"
unam toti la un loc, inghetati de frig si de trie vasitea ca nu ma fi sanuta-se galeste fara vorbea si frumoasa sfantului la faceste, faceti!
 s-a screcai a cela, in soacea. si eu, apuna si milor cu mare de la
gramatii si frumos in gramatica asa pe u

si eu, rau! unii cas decat in pricina oaseni prin carele
sa ia prind: mai tihep la toate zeunda, de-l noi
Epoch 221/300
Epoch 222/300
Epoch 223/300
Epoch 224/300
Epoch 225/300
Epoch 226/300
Epoch 227/300
Epoch 228/300
Epoch 229/300
Epoch 230/300
Epoch 231/300
Epoch 232/300
Epoch 233/300
Epoch 234/300
Epoch 235/300
Epoch 236/300
Epoch 237/300
Epoch 238/300
Epoch 239/300
Epoch 240/300

----- Generating text after Epoch: 239
----- diversity: 0.2
----- Generating with seed: "umulesti, uitandu-ma inapoi sa vad, nu m"
umulesti, uitandu-ma inapoi sa vad, nu ma scape de pus cat a pus in toate zeu ca nu ma sfantul havatul si mai in
vrempa pe cap si vinta, nu ma scapi si mai trebuie sa fi sa se purand
ade
chiar. pe capru zic rau in picioare darcate cu ri
adaciute, caci scoala din pe la casa acum si alta catele pana ce nu-i manca, mamul un chita si incepea o bici, mare!...
fiacura. si eu in cand in casa, tocmai
c-o scoratic si ea. dar mai mos bodransa, da
----- diversity: 0.5
----- Generating wit

<keras.callbacks.History at 0x254efcd9588>

In [4]:
# The generated text is not coherent
# Question: What is necessary to make the code work for Romanian language or other latin language?