In [2]:
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt

from keras_metrics import precision, recall
from keras import layers, models
from sklearn.model_selection import train_test_split
from random import choice
from colorama import Fore, Style

In [3]:
def vectorize(text, rule):
    arr = np.array(list(map(lambda ch: rule[ch], text)))
    return arr

def get_text(vec, rule):
    text = "".join(map(lambda i: rule[round(i)], vec))
    return text

def vec2text(vec):
    text = ""
    for v in vec:
        text += vec2char[np.argmax(v)]
    return text


In [4]:
with open('data/russian/Esenin.txt', 'r') as f:
    text = f.read().lower()
poems = text.split("\n\n")
poem = choice(poems)

LATENT_DIM = 50

max_len = max(map(len, poems))
symbols = set(text) 
sym_len = len(symbols) + 1
print("Number of symbols %i" % sym_len)
print("Max length of sonets %i" % max_len)
char2vec = {symbol: i + 1 for i, symbol in enumerate(symbols)}
char2vec[""] = 0
with open('char2vec.pkl', 'wb') as f:
    pkl.dump(char2vec, f)
vec2char = {i + 1: symbol for i, symbol in enumerate(symbols)}
vec2char[0] = ""
with open('vec2char.pkl', 'wb') as f:
    pkl.dump(vec2char, f)
    
vectorized = np.zeros((len(poems), max_len, sym_len))
for i, poem in enumerate(poems):
    vectorized[i, range(0, len(poem)), [char2vec[ch] for ch in poem]] = 1
    vectorized[i, range(len(poem), max_len), 0] = 1
vectorized = vectorized + np.random.normal(0, 1, vectorized.shape) * 0.05
np.savez('train.npz', vectorized=vectorized)

Number of symbols 58
Max length of sonets 242


In [5]:
inp = layers.Input((max_len, sym_len))
x = layers.Flatten()(inp)
x = layers.Dense(100, activation='relu')(x)
x = layers.Dense(1, activation='sigmoid')(x)

discriminator = models.Model([inp], [x], name='discriminator')
discriminator.compile(optimizer='adam', loss='binary_crossentropy', 
                        metrics=['acc', precision(), recall()])
discriminator.trainable = False
discriminator.summary()

tracking <tf.Variable 'Variable:0' shape=() dtype=int32> tp
tracking <tf.Variable 'Variable_1:0' shape=() dtype=int32> fp
tracking <tf.Variable 'Variable_2:0' shape=() dtype=int32> tp
tracking <tf.Variable 'Variable_3:0' shape=() dtype=int32> fn
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Model: "discriminator"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 242, 58)           0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 14036)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 100)               1403700   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 101       
Total params: 2,807,602
Trainable params: 1

  'Discrepancy between trainable weights and collected trainable'


In [7]:
# Генерация данных для предобучения дискриминатора
n_wrong = vectorized.shape[0] * 10
data = np.random.choice([0, 1], (n_wrong, max_len, sym_len), p=[0.975, 0.025])
data = np.concatenate([data, vectorized])
y = np.ones(len(data))
y[:n_wrong] = 0
x_train, x_test, y_train, y_test = train_test_split(
     data, y, test_size=0.33, random_state=42, shuffle=True)

In [34]:
history = discriminator.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=10, verbose=0)

KeyboardInterrupt: 

In [12]:
inp = layers.Input(shape=(LATENT_DIM,))
x = layers.Reshape((LATENT_DIM, 1))(inp)
x = layers.Dense(250, activation='relu')(x)
x = layers.MaxPool1D(10)(x)
x = layers.Flatten()(x)
x = layers.Dense(max_len * sym_len, activation='sigmoid')(x)
x = layers.Reshape((max_len, sym_len))(x)

generator = models.Model(inp, x, name='generator')
generator.compile(optimizer='adam', loss='mse')
generator.summary()


Model: "generator"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 50)                0         
_________________________________________________________________
reshape_1 (Reshape)          (None, 50, 1)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 50, 250)           500       
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 5, 250)            0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 1250)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 14036)             17559036  
_________________________________________________________________
reshape_2 (Reshape)          (None, 242, 58)           0

In [31]:
gan_input = layers.Input(shape=(LATENT_DIM,))
x = layers.Reshape((LATENT_DIM, 1))(gan_input)
gan_output = discriminator(generator(x))
gan = models.Model(gan_input, gan_output, name='gan')

discriminator.trainable = False
gan.compile(optimizer='adam', loss='binary_crossentropy')
gan.summary()

Model: "gan"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 50)                0         
_________________________________________________________________
reshape_4 (Reshape)          (None, 50, 1)             0         
_________________________________________________________________
generator (Model)            (None, 242, 58)           17559536  
_________________________________________________________________
discriminator (Model)        (None, 1)                 1403801   
Total params: 18,963,337
Trainable params: 17,559,536
Non-trainable params: 1,403,801
_________________________________________________________________


In [14]:
inp = layers.Input(shape=(max_len, sym_len))
x = layers.Flatten()(inp)
x = layers.Dense(LATENT_DIM, activation='sigmoid')(x)
out = generator(x)

encoder = models.Model(inp, out, name="encoder")
encoder.compile(loss='binary_crossentropy', optimizer='adam')
encoder.summary()

Model: "encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 242, 58)           0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 14036)             0         
_________________________________________________________________
dense_5 (Dense)              (None, 50)                701850    
_________________________________________________________________
generator (Model)            (None, 242, 58)           17559536  
Total params: 18,261,386
Trainable params: 18,261,386
Non-trainable params: 0
_________________________________________________________________


In [35]:
history = encoder.fit(vectorized, vectorized, epochs=100, batch_size=50, verbose=0)

KeyboardInterrupt: 

In [36]:
noise = np.random.normal(0, 1,(1,LATENT_DIM,))
vec = generator.predict(noise)[0]
text = vec2text(vec)
print(text)

с коыл н аонте иан то,м
 ова
т нуо а тлвоорйае е моойот ага,сеесннт   всаио оия 


In [33]:
iterations = 10000
batch_size = 20
save_dir = 'examples/'

losses = {
    'adversarial': [],
    'discriminator': [],
}

start = 0
step = 0
while True:
    random_latent_vectors = np.random.normal(size=(batch_size, LATENT_DIM))

    generated_texts = generator.predict(random_latent_vectors)

    stop = start + batch_size
    if stop > len(vectorized):
        start = 0
    stop = start + batch_size
    real_texts = vectorized[start: stop]
    combined_texts = np.concatenate([generated_texts, real_texts])

    labels = np.concatenate([np.ones((batch_size, 1)),
                             np.zeros((batch_size, 1))])
    labels += 0.05 * np.random.random(labels.shape)

    d_loss = discriminator.train_on_batch(combined_texts, labels)[0]

    random_latent_vectors = np.random.normal(size=(batch_size, LATENT_DIM))

    misleading_targets = np.zeros((batch_size, 1))

    a_loss = gan.train_on_batch(random_latent_vectors, misleading_targets)
    
    start += batch_size
    if start > len(vectorized) - batch_size:
        start = 0

    if step % 1000 == 0:
        gan.save_weights('saves/gan-%i.h5' % step)

        losses['adversarial'].append(a_loss)
        losses['discriminator'].append(d_loss)
        
        print(f'{Fore.RED}discriminator loss at step %s: %s{Style.RESET_ALL}' % (step, d_loss))
        print(f'{Fore.GREEN}adversarial loss at step %s: %s{Style.RESET_ALL}' % (step, a_loss))
        print('resulting loss at step %s: %s' % (step, d_loss + a_loss))

        text = vec2text(generated_texts[0])
        with open(save_dir + 'generated_text-%i.txt' % step, 'w') as f:
            f.write(text)
        text = vec2text(real_texts[0])
        with open(save_dir + 'real_text-%i.txt' % step, 'w') as f:
            f.write(text)
    step += 1

[31mdiscriminator loss at step 0: -0.055458985[0m
[32madversarial loss at step 0: 7.1806755[0m
resulting loss at step 0: 7.1252165


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/alexei/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-33-460c2f82f72b>", line 34, in <module>
    a_loss = gan.train_on_batch(random_latent_vectors, misleading_targets)
  File "/home/alexei/.local/lib/python3.6/site-packages/keras/engine/training.py", line 1514, in train_on_batch
    outputs = self.train_function(ins)
  File "/home/alexei/.local/lib/python3.6/site-packages/tensorflow/python/keras/backend.py", line 3292, in __call__
    run_metadata=self.run_metadata)
  File "/home/alexei/.local/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1458, in __call__
    run_metadata_ptr)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/alexei/.local/lib/python3.6/site-packages/IPython/core/interactiveshell.py", lin

KeyboardInterrupt: 