In [None]:
from gensim.models.doc2vec import TaggedLineDocument
import numpy as np
from gensim.corpora import Dictionary

t_docs = TaggedLineDocument('data1.txt')
words = [t.words for t in t_docs]

dic = Dictionary(words)

words_maxlen = np.max([len(ws) for ws in words])

In [None]:
from keras.layers import Input, Dense, Flatten, Dropout, GRU, Activation
from keras.models import Sequential, Model
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.core import RepeatVector
from keras.layers.wrappers import TimeDistributed

def discriminator(input_shape, n_units):
    model = Sequential()

    lrelu = LeakyReLU()
   
    model.add(GRU(n_units[0], input_shape = input_shape))
    model.add(Dropout(0.3))
    model.add(Dense(n_units[1], activation = lrelu))
    model.add(Dense(1, activation = 'sigmoid'))
    
    model.summary()
    
    input = Input(shape = input_shape)
    
    return Model(input, model(input))

def generator(input_shape, output_shape, n_units):
    model = Sequential()

    lrelu = LeakyReLU()
    
    model.add(GRU(n_units[0], input_shape = input_shape))
    
    model.add(RepeatVector(output_shape[0]))
    model.add(Dropout(0.3))
    model.add(GRU(n_units[1], return_sequences = True))
    model.add(Dropout(0.3))
    model.add(TimeDistributed(Dense(output_shape[1], activation = lrelu)))
    model.add(Activation('softmax'))
   
    model.summary()

    input = Input(shape = input_shape)
    
    return Model(input, model(input))

In [None]:
from keras.optimizers import Adam

q_shape = (1, 1000)
a_shape = (words_maxlen, len(dic))

dis_opt = Adam(lr = 1e-5, beta_1 = 0.1)

dis = discriminator(a_shape, [256, 128])
dis.compile(loss = 'binary_crossentropy', optimizer = dis_opt, metrics = ['acc'])

dis.trainable = False

gen = generator(q_shape, a_shape, [256, 512])

x = Input(shape = q_shape)
y = dis(gen(x))

m_opt = Adam(lr = 2e-4, beta_1 = 0.5)

model = Model(x, y)
model.compile(loss = 'binary_crossentropy', optimizer = m_opt)


In [None]:
padding_one_hot = lambda ws, size: np.vstack((
    np.eye(len(dic))[dic.doc2idx(ws)],
    np.zeros((size - len(ws), len(dic)))
))

input_noise = lambda bsize: np.random.normal(0, 1, (bsize,) + q_shape)

def train(epochs, batch_size):
    valid = np.ones((batch_size, 1))
    fake = np.zeros((batch_size, 1))
    
    for ep in range(epochs):
        idx = np.random.randint(0, len(words), batch_size)
        data = np.array([padding_one_hot(ws, words_maxlen) for ws in np.array(words)[idx]])

        gen_data = gen.predict(input_noise(batch_size))

        dis_loss_valid = dis.train_on_batch(data, valid)
        dis_loss_fake = dis.train_on_batch(gen_data, fake)

        dis_loss, dis_acc = 0.5 * np.add(dis_loss_valid, dis_loss_fake)

        model_loss = model.train_on_batch(input_noise(batch_size), valid)
        
        print(f'epoch = {ep}, model loss = {model_loss}, dis loss = {dis_loss}, dis acc = {dis_acc}, dis fake acc = {dis_loss_fake[1]}')


In [None]:
train(100, 100)

In [None]:
def answer():
    y = gen.predict(input_noise(1))

    ws = [dic[np.argmax(w)] for w in y[0]]
    
    return ''.join(ws)


In [None]:
for _ in range(10):
    print( answer() )
    print('-----')

