In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, Bidirectional,BatchNormalization, Activation, Embedding
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
from sklearn.preprocessing import MinMaxScaler
import datetime
import time
import random
import matplotlib.pyplot as plt

In [None]:
MIN_CONTENTS_ON_USER = 250
MAX_DAYS = 7
MAX_SEQUENCE = 6
DATA_DIR = "data"
recommandation_df = pd.read_csv('{}/normalizedata.csv'.format(DATA_DIR)).sort_values(by=['time'])

In [None]:
def convertToTimemillis(date):
    try:
        d = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S").strftime('%s.%f')
        return int(float(d)*1000)
    except:
        pass
    return 0

def getSortedClass():
    levels = []
    levels.append([0, MIN_CONTENTS_ON_USER+1])
    contentLevels = recommandation_df['Content'].values
    for contentLevel in contentLevels:
        add = True
        indexLevel = 0
        for key, level in levels:
            if contentLevel == key:
                add = False
                levels[indexLevel] = ([key, level+1])
                break
            else:
                indexLevel+=1
        if add:
            levels.append([contentLevel, 1])

    classContents = []
    for key, level in levels:
        if level > MIN_CONTENTS_ON_USER:
            classContents.append(key)
            
    sortedClassContents = sorted(classContents)
    print(sortedClassContents)
    return sortedClassContents

def saveClassesToFile(classes):
    label_file = open("label.txt", "w")
    np.savetxt(label_file, classes)
    label_file.close()


def checkingTimeDifferent(nowDate, beforeDate):
    now = convertToTimemillis(nowDate)
    before = convertToTimemillis(beforeDate)
    return (now - before) >  (MAX_DAYS * 86400000)


def buildFeature(paramsContents, sortedClassContents):
    feature = []
    for ignore in range(MAX_SEQUENCE-len(paramsContents)):
        feature.append(0)
    for content in paramsContents:
        feature.append(sortedClassContents.index(content))
        
    return feature

def checkConsists(item, contents):
    next = False
    for content in contents:
        if(content == item):
            next = True
    return next


def findTopDataSet(dataSet):
    unique = []
    for x, y in dataSet:
        new = True
        i = 0
        for u, l in unique:
            if u == x:
                new = False
                l.append(y)
                unique[i] = [u, l]
            i+=1
        if new:
            unique.append([x,[y]])
    result = []
    for x, y in unique:
        scores = []
        for i in y:
            new = True
            numbersIndex = 0
            for z, score in scores:
                if i == z:
                    new = False
                    scores[numbersIndex] = [i, score+1]
                numbersIndex += 1
            scores.append([i, 0])
        maxY = 0
        topY =  scores[0][0]
        for n, score in scores:
            if(score > maxY):
                maxY = score
                topY = n
        result.append([x,topY])
    return result

In [None]:
sortedClassContents = getSortedClass()
saveClassesToFile(sortedClassContents)
lenSortedClassContents = len(sortedClassContents)
print(lenSortedClassContents)

In [None]:
visitors_df = recommandation_df['visitor'].drop_duplicates()
maxItem = recommandation_df['Content'].max()
training_data = []
for index, item in visitors_df.iteritems():
    video = recommandation_df[recommandation_df['visitor'] == item]
    if video.size > 1:
        tempContents = []
        indexContent = 0
        for index, item in video['Content'].iteritems():
            if(item not in sortedClassContents):
                continue
            if len(tempContents) > MAX_SEQUENCE: 
                tempContents = tempContents[1:]
                continue
            if(checkConsists(item, tempContents)):
                continue
            if len(tempContents) > 0:
                nowDate = video['time'].iloc[indexContent]
                beforeDate = video['time'].iloc[indexContent-1]
                
                if checkingTimeDifferent(nowDate, beforeDate):
                    tempContents = []
                else:
                    feature = buildFeature(tempContents, sortedClassContents)
                    label = sortedClassContents.index(item)
                    training_data.append([[feature], label])
            tempContents.append(item)
            indexContent += 1

training_data = findTopDataSet(training_data)
print("training size: ", len(training_data))

random.shuffle(training_data)

In [None]:
features = []
labels = []
for feature, label in training_data:
    features.append(feature)
    labels.append(label)

In [None]:
X = np.array(features)
Y = np.array(labels).astype(np.float32)
X = np.squeeze(X, axis=1)

In [None]:
name = "testing-1"
tensorboard = TensorBoard(log_dir='logs/{}'.format(name))
model = Sequential()
model.add(Embedding(lenSortedClassContents, 128, input_length=MAX_SEQUENCE))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128, return_sequences=True))
model.add(LSTM(128))
model.add(Dense(lenSortedClassContents, activation='softmax'))

    


In [None]:
adam = Adam(lr=0.01)
model.compile(loss='sparse_categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
history = model.fit(X, Y, epochs=64, batch_size=32, verbose=1, callbacks=[tensorboard])

In [None]:
def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.show()

In [None]:
plot_graphs(history, 'accuracy')


In [None]:
plot_graphs(history, 'loss')

In [None]:
plot_graphs(history, 'val_loss')

In [None]:
plot_graphs(history, 'val_accuracy')

In [None]:
def predict(first, second, third, forth, fifth, sixth):
    predict = model.predict([[sortedClassContents.index(first),sortedClassContents.index(second),sortedClassContents.index(third),sortedClassContents.index(forth), sortedClassContents.index(fifth),sortedClassContents.index(sixth)]])
    return sortedClassContents[np.argmax(predict[0])]

In [None]:
predictResult = predict(0,0,0,0,0,626)
print(predictResult)

In [None]:
run_model = tf.function(lambda x: model(x))
# This is important, let's fix the input size.
BATCH_SIZE = 1
STEPS = 100
INPUT_SIZE = MAX_SEQUENCE
concrete_func = run_model.get_concrete_function(
    tf.TensorSpec([1,4], model.inputs[0].dtype))

# model directory.
MODEL_DIR = "model"
model.save(MODEL_DIR, save_format="tf", signatures=concrete_func)

converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_DIR)
tflite_model = converter.convert()

In [None]:
# Run the model with TensorFlow to get expected results.
TEST_CASES = 10

# Run the model with TensorFlow Lite
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details)
print(input_details[0]["index"])
for i in range(TEST_CASES):
  expected = model.predict([[0,0,0,i]])
  interpreter.set_tensor( input_details[0]["index"], np.array([[0,0,0,i]]).astype(np.float32))
  interpreter.invoke()
  result = interpreter.get_tensor(output_details[0]["index"])

  # Assert if the result of TFLite model is consistent with the TF model.
  np.testing.assert_almost_equal(expected, result)
  print("Done. The result of TensorFlow matches the result of TensorFlow Lite.")

  # Please note: TfLite fused Lstm kernel is stateful, so we need to reset
  # the states.
  # Clean up internal states.
  interpreter.reset_all_variables()


In [None]:
open("converted_model.tflite", "wb").write(tflite_model)

In [None]:
interpreter.set_tensor( input_details[0]["index"], np.array([[0,0,0,sortedClassContents.index(700)]]).astype(np.float32))
interpreter.invoke()
result = interpreter.get_tensor(output_details[0]["index"])
print(sortedClassContents[np.argmax(result)])