In [1]:
import pandas as pd
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Model, load_model, Sequential
from keras.callbacks import ModelCheckpoint
from keras.layers import Dense, Activation, Dropout, Input, Masking,Bidirectional
from keras.layers import GRU
from keras.utils.data_utils import get_file
from keras.optimizers import Adam
from keras.preprocessing.sequence import pad_sequences
import sys
from nltk.tokenize import word_tokenize
import codecs
import random
import locale
import io
import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
locale.setlocale(locale.LC_ALL, 'tr_TR.utf8')
lower_map = {
    ord(u'I'): u'ı',
    ord(u'İ'): u'i',
    }

In [3]:
def readPoemText():
    with codecs.open('kucukiskender.txt', "r", "UTF-8") as f:
        text = f.read().replace('\r\n','\n').replace('\n\n', '\n')
        firstLines = [k[0] if k[0] else k[1] for k in [e.split('\n') for e in text.split('***')]]
        text=text.replace(':','').replace('\t','').replace('~','').replace('â','').replace('***', '').replace('1','').replace('2','').replace('3','').replace('4','').replace('5','').replace('6','').replace('7','').replace('8','').replace('9','').replace('0','').replace('-','').replace('\x91', '').replace('\x92', '').replace('\x93','').replace('*','').replace('\x94','').replace('(','').replace(')','').replace('_','').replace('&','').replace('^','').replace('/', '').replace("'", "")
        text = text.translate(lower_map).lower()
        #words = word_tokenize(text)

        return firstLines, text

In [4]:
firstLines, text=readPoemText()

In [5]:
firstLines[:2]

["küçük chopin'e", 'Ne idüğü belirsiz kelimeler takip ediyor beni! ']

In [6]:
chars = sorted(set(','.join(text)))
print('Total chars: %s' % len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

Total chars: 39


In [7]:
print(char_indices)

{'\n': 0, ' ': 1, '!': 2, ',': 3, '.': 4, ';': 5, '?': 6, 'a': 7, 'b': 8, 'c': 9, 'd': 10, 'e': 11, 'f': 12, 'g': 13, 'h': 14, 'i': 15, 'j': 16, 'k': 17, 'l': 18, 'm': 19, 'n': 20, 'o': 21, 'p': 22, 'q': 23, 'r': 24, 's': 25, 't': 26, 'u': 27, 'v': 28, 'w': 29, 'x': 30, 'y': 31, 'z': 32, 'ç': 33, 'ö': 34, 'ü': 35, 'ğ': 36, 'ı': 37, 'ş': 38}


In [8]:
maxlen = 120
step = 1
seq_in = []
seq_out = []

In [9]:
for i in range(0, len(text) - maxlen, step):
    seq_in.append(text[i: i + maxlen])
    seq_out.append(text[i + maxlen])

X = np.zeros((len(seq_in), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(seq_in), len(chars)), dtype=np.bool)
for i, sentence in enumerate(seq_in):
    for t, char in enumerate(sentence):
        X[i, t, char_indices[char]] = 1
    y[i, char_indices[seq_out[i]]] = 1

In [10]:
filepath="./weights/{epoch:02d}-{loss:.4f}-bigger.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint]
    
model = Sequential()
model.add(Bidirectional(GRU(maxlen, input_shape=(maxlen, len(chars)), return_sequences=True), input_shape=(maxlen, len(chars))))
model.add(Bidirectional(GRU(len(chars), return_sequences=False)))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))
optimizer = Adam()
model.compile(loss='categorical_crossentropy', optimizer=optimizer)


In [11]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
for iteration in range(1, 1000):
    print()
    print('-' * 50)
    print('Iteration', iteration*5)
    seed = firstLines[random.randint(0, len(firstLines)-1)]+'\r\n'
    model.fit(X, y, batch_size=128, epochs=5, callbacks=callbacks_list)
    for diversity in [0.5, 1.0, 1.5]:
            print()
            print('----- diversity:', diversity)
            generated = ''
            generated += seed
            print('----- Generating with seed:\n "' + seed + '"\n')
            sys.stdout.write(generated)
                
            for i in range(len(generated)+maxlen):
                x = np.zeros((1, maxlen, len(chars)))
                for t, char in enumerate(sentence):
                    x[0, t, char_indices[char]] = 1.

                preds = model.predict(x, verbose=0)[0]
                next_index = sample(preds, diversity)
                next_char = indices_char[next_index]
                generated += next_char
                sentence = sentence[1:] + next_char

                sys.stdout.write(next_char)
                sys.stdout.flush()
            print()


--------------------------------------------------
Iteration 5
Epoch 1/5

Epoch 00001: loss improved from inf to 2.31436, saving model to ./weights/01-2.3144-bigger.hdf5
Epoch 2/5

Epoch 00002: loss improved from 2.31436 to 2.04636, saving model to ./weights/02-2.0464-bigger.hdf5
Epoch 3/5

Epoch 00003: loss improved from 2.04636 to 1.94377, saving model to ./weights/03-1.9438-bigger.hdf5
Epoch 4/5

Epoch 00004: loss improved from 1.94377 to 1.87222, saving model to ./weights/04-1.8722-bigger.hdf5
Epoch 5/5

Epoch 00005: loss improved from 1.87222 to 1.81675, saving model to ./weights/05-1.8167-bigger.hdf5

----- diversity: 0.5
----- Generating with seed:
 "Köpeğin havladığı spiral
"

Köpeğin havladığı spiral
 teni bir bir alına beni senin aradığı olmadım çocuğu
bir sevgilim çıkar benim anlıyor sevişmesinde bir meyanbahar biz beni karantilar kalmış sonr

----- diversity: 1.0
----- Generating with seed:
 "Köpeğin havladığı spiral
"

Köpeğin havladığı spiral
acak bir insanın!  bucuman t