In [None]:
import re
import sqlite3
import numpy as np
import pandas as pd
from time import time
import tensorflow as tf
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential, load_model
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.layers import Dense, Activation, Embedding, Dropout, TimeDistributed, LSTM

""" Custom Libs """
import Cleaner as c
import TokenMgmt as tm

In [None]:
epochs    = 300
dropout   = 0.5
l2_reg    = 1e-4
batch_sz  = 64
run_model = True

In [None]:
def fetch_profiles(filename, n):
    f           = open(filename, 'r')
    profiles    = f.read().splitlines()
    f.close()
    return(list(set(profiles[:n])))

In [None]:
sqlite_file = '../../data/database/deeplearning.sqlite'
profilename = '../../data/profiles.txt'
table_name  = 'tweets'
profiles    = fetch_profiles(profilename)
profiles    = [p.strip('@') for p in profiles]
cd          = c.CleanData(sqlite_file, table_name)
q           ='SELECT * FROM {} WHERE AUTHOR IN ("{}");'.format(table_name, '", "'.join(profiles))

cd.set_table(q)
data = cd.get_clean_table()

inp_sequences, total_words = tm.get_sequence_of_tokens(list(data.CleanText.values))
predictors, label, max_sequence_len = tm.generate_padded_sequences(inp_sequences, total_words)

In [None]:
def create_model(max_sequence_len, total_words):
    input_len = max_sequence_len - 1
    model = Sequential()
    model.add(Embedding(total_words, 64, input_length = input_len))
    model.add(LSTM(1080))
    if dropout != 0:
        model.add(Dropout(dropout))
    if l2_reg != 0:
        model.add(Dense(total_words, activation = 'softmax', bias_regularizer = l2(l2_reg)))
    else:
        model.add(Dense(total_words, activation = 'softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')
    checkpointer = ModelCheckpoint(filepath='model'
                                   + '/single-user-model-{epoch:02d}.hdf5', verbose = 1)
    tensorboard = TensorBoard(log_dir = 'tb-logs/{}'.format(time()))
    return(model, checkpointer, tensorboard)

In [None]:
model, checkpointer, tensorboard = create_model(max_sequence_len, total_words)
model.summary()

In [None]:
# !tensorboard --logdir=tb-logs/
# TensorBoard 1.12.0 at http://xps:6006 (Press CTRL+C to quit)

In [None]:
if run_model == True:
    if tf.test.is_gpu_available():
        model.fit(predictors, label, 
                  epochs = epochs, 
                  batch_size = batch_sz, 
                  verbose = 1, 
                  callbacks=[checkpointer, tensorboard])

In [None]:
model_file = "model/single-user-model-43.hdf5"
model = load_model(model_file)

In [None]:
print (tm.generate_text("Today", 16, model, max_sequence_len))