In [17]:
import numpy as np
import sys
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, TimeDistributed
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from keras.utils import np_utils
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras import backend

import json
import re

# Load Posts and Comments from data

In [2]:
BEFORE_START = '\0'
AFTER_END = '\1'
def preproc(s):
    s = BEFORE_START + s + AFTER_END
    s = re.sub(r'\[.*\|.*\]', ' ', s)
    s = re.sub(r'[«»]', '"', s)
    return s.lower()

In [3]:
with open('data/kalikfan.json', 'r') as f:
    kalikfan = json.load(f)

SEQ_LENGTH = 10

texts = []
for p in kalikfan:
    ptext = p['text'].strip()
    if len(ptext) > SEQ_LENGTH:
        texts.append(ptext)
        
    for c in p['comments']:
        ctext = c['text'].strip()
        if c['likesCount'] > 2 and len(ctext) > SEQ_LENGTH:
            texts.append(ctext)

texts = [preproc(x) for x in texts]
print('Number of Kalik samples:', len(texts))

Number of Kalik samples: 1799


In [4]:
with open('soviet_const.txt', 'r') as f:
    sov = f.read()
sov = sov.replace('\n\n', '\n ')

sov_texts = []
for s in sov.split('\n '):
    ss = s.replace('\n', ' ').strip()
    if len(ss) > SEQ_LENGTH:
        sov_texts.append(ss)

sov_texts = [preproc(x) for x in sov_texts]
    
print('Number of text samples with Soviet:', len(sov_texts))

texts += sov_texts
print('Total number of text samples:', len(texts))

Number of text samples with Soviet: 420
Total number of text samples: 2219


# Shisha Learning 🤙🤙🤙
From https://stackabuse.com/text-generation-with-python-and-tensorflow-keras/

In [5]:
chars = sorted(list(set(''.join(texts))))
char_to_num = dict((c, i) for i, c in enumerate(chars))
num_to_char = dict((i, c) for i, c in enumerate(chars))

input_len = len(texts)
vocab_len = len(chars)
print ("Total number of text samples:", input_len)
print ("Total vocab:", vocab_len)

Total number of text samples: 2219
Total vocab: 821


In [6]:
x_data = []
y_data = []

for text in texts:
    t = [BEFORE_START] * (SEQ_LENGTH - 1) + list(text) + [AFTER_END] * SEQ_LENGTH
    for i in range(0, len(t) - SEQ_LENGTH):
        in_seq = t[i:i + SEQ_LENGTH]
        out = t[i + SEQ_LENGTH]
        x_data.append([char_to_num[x] for x in in_seq])
        y_data.append(char_to_num[out])
        
n_patterns = len(x_data)
print ("Total Patterns:", n_patterns)

Total Patterns: 357397


In [7]:
X = np.reshape(x_data, (n_patterns, SEQ_LENGTH, 1))
#X = np.array(x_data)
#X = X/float(vocab_len)

In [8]:
y = np_utils.to_categorical(y_data)

In [20]:
model = Sequential()
model.add(CuDNNLSTM(256, input_shape=(X.shape[1], X.shape[2]), return_sequences=True))
model.add(TimeDistributed(Dense(256, activation='sigmoid')))
model.add(TimeDistributed(Dense(256, activation='sigmoid')))
model.add(CuDNNLSTM(256, return_sequences=False))
model.add(Dense(256, activation='sigmoid'))
model.add(Dense(y.shape[1], activation='softmax'))

In [21]:
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [22]:
class MyCustomCallback(tensorflow.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs=None):
    if epoch % 10 != 0:
        return
    pattern = [char_to_num[BEFORE_START]] * SEQ_LENGTH
    result = []
    while True:
        if len(result) > 500:
            result += '@'
            break
        x = np.reshape(pattern, (1, len(pattern), 1)).astype(float)
        #x = x / float(vocab_len)
        #x = np.array([pattern])
        prediction = self.model.predict(x, verbose=0)
        res = np.random.choice(chars, 1, False, prediction[0])[0]

        if res == AFTER_END:
            break
        result += res

        seq_in = [num_to_char[value] for value in pattern]

        pattern.append(char_to_num[res])
        pattern = pattern[1:]

    print('\n"""\n' + ''.join(result) + '\n"""\n')

In [23]:
filepath = "model_weights_saved.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='loss', verbose=1, save_best_only=True, mode='min')
desired_callbacks = [checkpoint, MyCustomCallback()]

In [24]:
model.fit(X, y, epochs=300, batch_size=512, callbacks=desired_callbacks, shuffle=True)

Train on 357397 samples
Epoch 1/300




Epoch 00001: loss improved from inf to 3.36009, saving model to model_weights_saved.hdf5

"""
эт🍏иете☘уечроане💣пй ном😊ом ш напокосзр ороеа м отоой рр кощ👌ддлюд рамевв 
рнпакш иапр)ик😍👿♂ 
"""

Epoch 2/300
Epoch 00002: loss improved from 3.36009 to 3.01114, saving model to model_weights_saved.hdf5
Epoch 3/300
Epoch 00003: loss improved from 3.01114 to 2.94948, saving model to model_weights_saved.hdf5
Epoch 4/300
Epoch 00004: loss improved from 2.94948 to 2.91115, saving model to model_weights_saved.hdf5
Epoch 5/300
Epoch 00005: loss improved from 2.91115 to 2.85973, saving model to model_weights_saved.hdf5
Epoch 6/300
Epoch 00006: loss improved from 2.85973 to 2.76814, saving model to model_weights_saved.hdf5
Epoch 7/300
Epoch 00007: loss improved from 2.76814 to 2.67343, saving model to model_weights_saved.hdf5
Epoch 8/300
Epoch 00008: loss improved from 2.67343 to 2.57339, saving model to model_weights_saved.hdf5
Epoch 9/300
Epoch 00009: loss improved from 2.57339 to 2.49066, saving mo

Epoch 31/300
Epoch 00031: loss improved from 1.96858 to 1.96002, saving model to model_weights_saved.hdf5

"""
агокком я в мильй боробываящих всупкы отнялбальрый реле подкавилов😈👍👗🏻
"""

Epoch 32/300
Epoch 00032: loss improved from 1.96002 to 1.95720, saving model to model_weights_saved.hdf5
Epoch 33/300
Epoch 00033: loss improved from 1.95720 to 1.94238, saving model to model_weights_saved.hdf5
Epoch 34/300
Epoch 00034: loss improved from 1.94238 to 1.93112, saving model to model_weights_saved.hdf5
Epoch 35/300
Epoch 00035: loss improved from 1.93112 to 1.93076, saving model to model_weights_saved.hdf5
Epoch 36/300
Epoch 00036: loss improved from 1.93076 to 1.91217, saving model to model_weights_saved.hdf5
Epoch 37/300
Epoch 00037: loss improved from 1.91217 to 1.90749, saving model to model_weights_saved.hdf5
Epoch 38/300
Epoch 00038: loss improved from 1.90749 to 1.90176, saving model to model_weights_saved.hdf5
Epoch 39/300
Epoch 00039: loss improved from 1.90176 to 1.88687, saving

Epoch 62/300
Epoch 00062: loss did not improve from 1.75555
Epoch 63/300
Epoch 00063: loss improved from 1.75555 to 1.73943, saving model to model_weights_saved.hdf5
Epoch 64/300
Epoch 00064: loss improved from 1.73943 to 1.73823, saving model to model_weights_saved.hdf5
Epoch 65/300
Epoch 00065: loss did not improve from 1.73823
Epoch 66/300
Epoch 00066: loss improved from 1.73823 to 1.72537, saving model to model_weights_saved.hdf5
Epoch 67/300
Epoch 00067: loss did not improve from 1.72537
Epoch 68/300
Epoch 00068: loss did not improve from 1.72537
Epoch 69/300
Epoch 00069: loss improved from 1.72537 to 1.71237, saving model to model_weights_saved.hdf5
Epoch 70/300
Epoch 00070: loss improved from 1.71237 to 1.70839, saving model to model_weights_saved.hdf5
Epoch 71/300
Epoch 00071: loss did not improve from 1.70839

"""
оу пола кальянчик гуть своими живую дымобах, вдртом по флеряк строи долстрани сук с парни в кударистели и сплнут😈🗯🗣 траао и тронвая.
"""

Epoch 72/300
Epoch 00072: l

Epoch 00094: loss did not improve from 1.61669
Epoch 95/300
Epoch 00095: loss improved from 1.61669 to 1.60692, saving model to model_weights_saved.hdf5
Epoch 96/300
Epoch 00096: loss did not improve from 1.60692
Epoch 97/300
Epoch 00097: loss improved from 1.60692 to 1.59905, saving model to model_weights_saved.hdf5
Epoch 98/300
Epoch 00098: loss did not improve from 1.59905
Epoch 99/300
Epoch 00099: loss improved from 1.59905 to 1.59327, saving model to model_weights_saved.hdf5
Epoch 100/300
Epoch 00100: loss did not improve from 1.59327
Epoch 101/300
Epoch 00101: loss improved from 1.59327 to 1.59003, saving model to model_weights_saved.hdf5

"""
-э понимаем настрительств ссср и народная сосай - на**
-зивершеником расседку😍 респекло - дымом, уот гнатора, связаны развёнцансосавила,
"""

Epoch 102/300
Epoch 00102: loss did not improve from 1.59003
Epoch 103/300
Epoch 00103: loss improved from 1.59003 to 1.57959, saving model to model_weights_saved.hdf5
Epoch 104/300
Epoch 00104: loss 

Epoch 00126: loss improved from 1.51213 to 1.50563, saving model to model_weights_saved.hdf5
Epoch 127/300
Epoch 00127: loss improved from 1.50563 to 1.50506, saving model to model_weights_saved.hdf5
Epoch 128/300
Epoch 00128: loss did not improve from 1.50506
Epoch 129/300
Epoch 00129: loss improved from 1.50506 to 1.49764, saving model to model_weights_saved.hdf5
Epoch 130/300
Epoch 00130: loss did not improve from 1.49764
Epoch 131/300
Epoch 00131: loss improved from 1.49764 to 1.49243, saving model to model_weights_saved.hdf5

"""
спасяй-я это, за двойное яблочкой завин науках 📝🔞💪😎 так вот это лайфухи)ята кальян) калик там его раздана "ну для барабаны, повотер вышему🤙🏻
"""

Epoch 132/300
Epoch 00132: loss did not improve from 1.49243
Epoch 133/300
Epoch 00133: loss did not improve from 1.49243
Epoch 134/300
Epoch 00134: loss improved from 1.49243 to 1.48241, saving model to model_weights_saved.hdf5
Epoch 135/300
Epoch 00135: loss improved from 1.48241 to 1.48180, saving model to mo

KeyboardInterrupt: 

In [33]:
filename = "model_weights_saved.hdf5"
model.load_weights(filename)
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [34]:
cb = MyCustomCallback()
cb.set_model(model)

In [59]:
cb.on_epoch_end(0)


"""
ну насты государственных органов ссср имеют попухей любимые автономного с инойтений💙 дымоческого и корольсностью союзных общественных республика от был лировома
"""

