In [1]:
import re
import sqlite3
import numpy as np
import pandas as pd
from time import time
import tensorflow as tf
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.preprocessing.text import Tokenizer
from keras.models import Sequential, load_model
from keras.preprocessing.sequence import pad_sequences
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping
from keras.layers import Dense, Activation, Embedding, Dropout, TimeDistributed, LSTM

""" Custom Libs """
import Cleaner as c
import TokenMgmt as tm

Using TensorFlow backend.


In [2]:
epochs      = 300
dropout     = 0.2
l2_reg      = 3e-4
batch_sz    = 64
learn_rate  = 1e-3
beta_1      = 0.9
beta_2      = 0.999
epsilon     = None
decay_rate  = 0
amsgrad     = False
run_model   = True

In [3]:
def fetch_profiles(filename, n):
    f           = open(filename, 'r')
    profiles    = f.read().splitlines()
    f.close()
    return(list(set(profiles[:n])))

In [4]:
sqlite_file = '../../data/database/deeplearning.sqlite'
profilename = '../../data/profiles.txt'
table_name  = 'tweets'
profiles    = fetch_profiles(profilename, 2)
profiles    = [p.strip('@') for p in profiles]
cd          = c.CleanData(sqlite_file, table_name)
q           = 'SELECT * FROM {} WHERE AUTHOR IN ("{}");'.format(table_name, '", "'.join(profiles))

cd.set_table(q)
data = cd.get_clean_table()
_, total_words = tm.get_sequence_of_tokens(list(data.CleanText.values))
x, y, max_sequence_len = tm.generate_padded_sequences(_, total_words)
opt_adam = Adam(lr=learn_rate, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, decay=decay_rate, amsgrad=amsgrad)

In [5]:
def create_model(max_sequence_len, total_words):
    input_len = max_sequence_len - 1
    model = Sequential()
    model.add(Embedding(total_words, 1000, input_length = input_len))
    #model.add(LSTM(512, return_sequences = True))
    #model.add(LSTM(256, return_sequences = True))
    if dropout != 0:
        model.add(Dropout(dropout))
        model.add(LSTM(256))
    else:
        model.add(LSTM(256))
    if l2_reg != 0:
        model.add(Dense(total_words, activation = 'softmax', bias_regularizer = l2(l2_reg)))
    else:
        model.add(Dense(total_words, activation = 'softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer = opt_adam, metrics=['accuracy'])
    checkpointer = ModelCheckpoint(filepath='model'
                                   + '/single-user-model-{}'.format(time()) + '-{epoch:02d}.hdf5', verbose = 1)
    tensorboard = TensorBoard(log_dir = 'tb-logs/{}'.format(time()))
    earlystop = EarlyStopping(monitor='loss', min_delta=0, patience=100, verbose=0, mode='min')
    return(model, checkpointer, tensorboard, earlystop)

In [6]:
model, checkpointer, tensorboard, earlystop = create_model(max_sequence_len, total_words)
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 24, 1000)          1223000   
_________________________________________________________________
dropout_1 (Dropout)          (None, 24, 1000)          0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 256)               1287168   
_________________________________________________________________
dense_1 (Dense)              (None, 1223)              314311    
Total params: 2,824,479
Trainable params: 2,824,479
Non-trainable params: 0
_________________________________________________________________


In [7]:
# !tensorboard --logdir=tb-logs/
# TensorBoard 1.12.0 at http://xps:6006 (Press CTRL+C to quit)

In [8]:
if run_model == True:
    if tf.test.is_gpu_available():
        model.fit(x = x, y = y, 
                  epochs = epochs, 
                  batch_size = batch_sz,
                  validation_split = 0.25,
                  verbose = 1, 
                  callbacks=[checkpointer, tensorboard, earlystop])

Train on 3651 samples, validate on 1217 samples
Epoch 1/300

Epoch 00001: saving model to model/single-user-model-1544878349.0387578-01.hdf5
Epoch 2/300

Epoch 00002: saving model to model/single-user-model-1544878349.0387578-02.hdf5
Epoch 3/300

Epoch 00003: saving model to model/single-user-model-1544878349.0387578-03.hdf5
Epoch 4/300

Epoch 00004: saving model to model/single-user-model-1544878349.0387578-04.hdf5
Epoch 5/300

Epoch 00005: saving model to model/single-user-model-1544878349.0387578-05.hdf5
Epoch 6/300

Epoch 00006: saving model to model/single-user-model-1544878349.0387578-06.hdf5
Epoch 7/300

Epoch 00007: saving model to model/single-user-model-1544878349.0387578-07.hdf5
Epoch 8/300

Epoch 00008: saving model to model/single-user-model-1544878349.0387578-08.hdf5
Epoch 9/300

Epoch 00009: saving model to model/single-user-model-1544878349.0387578-09.hdf5
Epoch 10/300

Epoch 00010: saving model to model/single-user-model-1544878349.0387578-10.hdf5
Epoch 11/300

Epoch 0


Epoch 00075: saving model to model/single-user-model-1544878349.0387578-75.hdf5
Epoch 76/300

Epoch 00076: saving model to model/single-user-model-1544878349.0387578-76.hdf5
Epoch 77/300

Epoch 00077: saving model to model/single-user-model-1544878349.0387578-77.hdf5
Epoch 78/300

Epoch 00078: saving model to model/single-user-model-1544878349.0387578-78.hdf5
Epoch 79/300

Epoch 00079: saving model to model/single-user-model-1544878349.0387578-79.hdf5
Epoch 80/300

Epoch 00080: saving model to model/single-user-model-1544878349.0387578-80.hdf5
Epoch 81/300

Epoch 00081: saving model to model/single-user-model-1544878349.0387578-81.hdf5
Epoch 82/300

Epoch 00082: saving model to model/single-user-model-1544878349.0387578-82.hdf5
Epoch 83/300

Epoch 00083: saving model to model/single-user-model-1544878349.0387578-83.hdf5
Epoch 84/300

Epoch 00084: saving model to model/single-user-model-1544878349.0387578-84.hdf5
Epoch 85/300

Epoch 00085: saving model to model/single-user-model-154487


Epoch 00113: saving model to model/single-user-model-1544878349.0387578-113.hdf5
Epoch 114/300

Epoch 00114: saving model to model/single-user-model-1544878349.0387578-114.hdf5
Epoch 115/300

Epoch 00115: saving model to model/single-user-model-1544878349.0387578-115.hdf5
Epoch 116/300

Epoch 00116: saving model to model/single-user-model-1544878349.0387578-116.hdf5
Epoch 117/300

Epoch 00117: saving model to model/single-user-model-1544878349.0387578-117.hdf5
Epoch 118/300

Epoch 00118: saving model to model/single-user-model-1544878349.0387578-118.hdf5
Epoch 119/300

Epoch 00119: saving model to model/single-user-model-1544878349.0387578-119.hdf5
Epoch 120/300

Epoch 00120: saving model to model/single-user-model-1544878349.0387578-120.hdf5
Epoch 121/300

Epoch 00121: saving model to model/single-user-model-1544878349.0387578-121.hdf5
Epoch 122/300

Epoch 00122: saving model to model/single-user-model-1544878349.0387578-122.hdf5
Epoch 123/300

Epoch 00123: saving model to model/sing


Epoch 00187: saving model to model/single-user-model-1544878349.0387578-187.hdf5
Epoch 188/300

Epoch 00188: saving model to model/single-user-model-1544878349.0387578-188.hdf5
Epoch 189/300

Epoch 00189: saving model to model/single-user-model-1544878349.0387578-189.hdf5
Epoch 190/300

Epoch 00190: saving model to model/single-user-model-1544878349.0387578-190.hdf5
Epoch 191/300

Epoch 00191: saving model to model/single-user-model-1544878349.0387578-191.hdf5
Epoch 192/300

Epoch 00192: saving model to model/single-user-model-1544878349.0387578-192.hdf5
Epoch 193/300

Epoch 00193: saving model to model/single-user-model-1544878349.0387578-193.hdf5
Epoch 194/300

Epoch 00194: saving model to model/single-user-model-1544878349.0387578-194.hdf5
Epoch 195/300

Epoch 00195: saving model to model/single-user-model-1544878349.0387578-195.hdf5
Epoch 196/300

Epoch 00196: saving model to model/single-user-model-1544878349.0387578-196.hdf5
Epoch 197/300

Epoch 00197: saving model to model/sing


Epoch 00261: saving model to model/single-user-model-1544878349.0387578-261.hdf5
Epoch 262/300

Epoch 00262: saving model to model/single-user-model-1544878349.0387578-262.hdf5
Epoch 263/300

Epoch 00263: saving model to model/single-user-model-1544878349.0387578-263.hdf5
Epoch 264/300

Epoch 00264: saving model to model/single-user-model-1544878349.0387578-264.hdf5
Epoch 265/300

Epoch 00265: saving model to model/single-user-model-1544878349.0387578-265.hdf5
Epoch 266/300

Epoch 00266: saving model to model/single-user-model-1544878349.0387578-266.hdf5
Epoch 267/300

Epoch 00267: saving model to model/single-user-model-1544878349.0387578-267.hdf5
Epoch 268/300

Epoch 00268: saving model to model/single-user-model-1544878349.0387578-268.hdf5
Epoch 269/300

Epoch 00269: saving model to model/single-user-model-1544878349.0387578-269.hdf5
Epoch 270/300

Epoch 00270: saving model to model/single-user-model-1544878349.0387578-270.hdf5
Epoch 271/300

Epoch 00271: saving model to model/sing

In [9]:
model_file = "model/single-user-model-1544878349.0387578-300.hdf5"
model = load_model(model_file)

In [11]:
print (tm.generate_text("Social", 15, model, max_sequence_len))
print (tm.generate_text("Big Data", 15, model, max_sequence_len))
print (tm.generate_text("Business", 15, model, max_sequence_len))
print (tm.generate_text("Last Year", 15, model, max_sequence_len))
print (tm.generate_text("Be", 15, model, max_sequence_len))

Social Data Science And Robotics The Next Big Area Of Study Says Study Says In Accuracy
Big Data Science And Robotics The Next Big Area Of Study Says Growth Study Says Growth In
Business Is Fasthr Innovation Needs To Keep Pace Pace People To Change Your Culture Look At
Last Year To Worsen Over Next Decade Study Says Growth Study Says Growth Cycle Simonlporter Simonlporter Pool
Be The Most Successful Innovators Bring Their Ideas To Life Life Life Employees From Leaving Pool


In [None]:
def generate_train_val(obj, train_part):
    data = obj.get_clean_table()
    train = data.sample(frac=train_part, random_state=200)
    val  = data.drop(train.index)
    
    _1, total_words = tm.get_sequence_of_tokens(list(data.CleanText.values))
    _1, _2, max_seq_len = tm.generate_padded_sequences(_1, total_words)
    
    x_seq, total_words = tm.get_sequence_of_tokens(list(train.CleanText.values))
    x_train, y_train, max_seq_len = tm.generate_padded_sequences(x_seq, total_words)
    
    x_seq, _ = tm.get_sequence_of_tokens(list(val.CleanText.values))
    x_val, y_val, _ = tm.generate_padded_sequences(x_seq, _)
    
    return(x_train, y_train, x_val, y_val, total_words, max_seq_len)