In [1]:
import random

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Activation, Embedding, Conv1D, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard
import matplotlib.pyplot as plt


from dataprocessing.find_top_movie_with_sequence import find_top_dataset
from dataprocessing.label_and_feature import getSortedClassContents, checkConsists, buildFeature, saveClassesToFile
from dataprocessing.time import checkingTimeDifferent
from recom_model import trainWithBidirectional,trainWithEmbeddingDense

[[[[0, 0, 2]], 4], [[[0, 0, 1]], 1], [[[0, 0, 3]], 5], [[[0, 0, 4]], 7], [[[0, 3, 1]], 3]]


In [2]:
MIN_CONTENTS_ON_USER = 250
MAX_DAYS = 7
MAX_SEQUENCE = 6
DATA_DIR = "data"

recommandation_df = pd.read_csv('{}/normalizedata.csv'.format(DATA_DIR)).sort_values(by=['time'])

In [3]:
sortedClassContents = getSortedClassContents(recommandation_df, MIN_CONTENTS_ON_USER)
saveClassesToFile(sortedClassContents)
lenSortedClassContents = len(sortedClassContents)
print(lenSortedClassContents)

[0, 1, 15, 23, 39, 69, 79, 162, 169, 202, 205, 213, 252, 255, 306, 308, 312, 322, 369, 371, 379, 434, 448, 455, 478, 479, 480, 481, 492, 507, 513, 535, 548, 623, 626, 640, 644, 657, 682, 683, 697, 700, 752, 772, 776, 777, 781, 824, 832, 839, 883, 913, 936, 960, 964, 977, 989, 993, 1006, 1020, 1026, 1115, 1137, 1185, 1187, 1193, 1239, 1242, 1256, 1269, 1304, 1310, 1397, 1415, 1420, 1428, 1429, 1433, 1436, 1437, 1490, 1498, 1506, 1512, 1521, 1534, 1554, 1559, 1567, 1578, 1579, 1589, 1591, 1599, 1602, 1606, 1607, 1609, 1619, 1620, 1624, 1641, 1652, 1659, 1677, 1700, 1716, 1719, 1756, 1758, 1774, 1777, 1781, 1788, 1790, 1791, 1795, 1798, 1806, 1807, 1813, 1819, 1823, 1826, 1830, 1833, 1834, 1836, 1838, 1839, 1840, 1845, 1847, 1853, 1855, 1862, 1863, 1872, 1873, 1876, 1877, 1879, 1885, 1896, 1910, 1912, 1913, 1927, 1928, 1935, 1936]
151


In [4]:
from datetime import datetime
now = datetime.now()
 
print("start =", now)

visitors_df = recommandation_df['visitor'].drop_duplicates()
maxItem = recommandation_df['Content'].max()
training_data = []
for index, item in visitors_df.iteritems():
    video = recommandation_df[recommandation_df['visitor'] == item]
    if video.size > 1:
        tempContents = []
        indexContent = 0
        for index, item in video['Content'].iteritems():
            if item not in sortedClassContents:
                continue
            if len(tempContents) > MAX_SEQUENCE:
                tempContents = tempContents[1:]
                continue
            if checkConsists(item, tempContents):
                continue
            if len(tempContents) > 0:
                nowDate = video['time'].iloc[indexContent]
                beforeDate = video['time'].iloc[indexContent - 1]

                if checkingTimeDifferent(nowDate, beforeDate, MAX_DAYS):
                    tempContents = []
                else:
                    feature = buildFeature(tempContents, sortedClassContents, MAX_SEQUENCE)
                    label = sortedClassContents.index(item)
                    training_data.append([[feature], label])
            tempContents.append(item)
            indexContent += 1

training_data = find_top_dataset(training_data)
print("training size: ", len(training_data))
random.shuffle(training_data)

now = datetime.now()
 
print("end =", now)

start = 2020-11-15 18:03:50.094576
training size:  8940
end = 2020-11-15 18:13:56.830710


In [None]:
features = []
labels = []
for feature, label in training_data:
    features.append(feature)
    labels.append(label)

In [None]:
X = np.array(features)
Y = np.array(labels)
print(X.shape)



In [None]:
# for lstm with embedding
X = np.squeeze(X, axis=1)
model = trainWithBidirectional(MAX_SEQUENCE, lenSortedClassContents)

In [None]:
# for pure lstm

X = np.squeeze(X, axis=1)
X = X.reshape(X.shape[0], X.shape[1], 1)
model = Sequential()
model.add(LSTM(256, input_shape=X.shape[1:], return_sequences=True) )
model.add(LSTM(256, return_sequences=True))
model.add(LSTM(256, return_sequences=True))
model.add(LSTM(256))
model.add(Dense(lenSortedClassContents))
model.add(Activation("softmax"))
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

In [None]:
# for dense
X = np.squeeze(X, axis=1)
print(X.shape)
model = Sequential()   
model.add(Dense(512, activation="relu", input_shape=(X.shape[1:])))
model.add(Dense(512, activation="relu"))
model.add(Dense(512, activation="relu"))
model.add(Dense(512, activation="relu"))
model.add(Dense(lenSortedClassContents))
model.add(Activation("softmax"))
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy'])


In [None]:
# for embedding Dense
X = np.squeeze(X, axis=1)
model = trainWithEmbeddingDense(MAX_SEQUENCE, lenSortedClassContents)

In [None]:
name = "testing-1"
tensorboard = TensorBoard(log_dir='logs/{}'.format(name))
history = model.fit(X, Y, epochs=250, batch_size=64, verbose=1, callbacks=[tensorboard])

In [None]:
def plot_graphs(history, string):
  plt.plot(history.history[string])
  plt.xlabel("Epochs")
  plt.ylabel(string)
  plt.show()

In [None]:
plot_graphs(history, 'accuracy')


In [None]:
plot_graphs(history, 'loss')

In [None]:
plot_graphs(history, 'val_loss')

In [None]:
plot_graphs(history, 'val_accuracy')

In [None]:
def get5TopPredict(predict):
    predicts = predict[0].argsort()[-5:][::-1]
    for i in predicts:
        print(sortedClassContents[i])
        
def predict(first, second, third, forth, fifth, sixth):
    predict = model.predict([[[sortedClassContents.index(first)],[sortedClassContents.index(second)],[sortedClassContents.index(third)],[sortedClassContents.index(forth)], [sortedClassContents.index(fifth)],[sortedClassContents.index(sixth)]]])
    get5TopPredict(predict)
    return sortedClassContents[np.argmax(predict[0])]


In [None]:
predictResult = predictDense(0,0,0,0,700,1756)
print(predictResult)

In [None]:
run_model = tf.function(lambda x: model(x))
# This is important, let's fix the input size.
concrete_func = run_model.get_concrete_function(
    tf.TensorSpec([1,6], model.inputs[0].dtype))

# model directory.
MODEL_DIR = "model"
model.save(MODEL_DIR, save_format="tf", signatures=concrete_func)


In [None]:
MODEL_DIR = "model"
model.save(MODEL_DIR) 

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(MODEL_DIR)
tflite_model = converter.convert()

In [None]:
# Run the model with TensorFlow to get expected results.
TEST_CASES = 10

# Run the model with TensorFlow Lite
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print(input_details)
print(output_details)
print(input_details[0]["index"])
for i in range(TEST_CASES):
  expected = model.predict([[0,0,0,i]])
  interpreter.set_tensor( input_details[0]["index"], np.array([[0,0,0,i]]).astype(np.float32))
  interpreter.invoke()
  result = interpreter.get_tensor(output_details[0]["index"])

  # Assert if the result of TFLite model is consistent with the TF model.
  np.testing.assert_almost_equal(expected, result)
  print("Done. The result of TensorFlow matches the result of TensorFlow Lite.")

  # Please note: TfLite fused Lstm kernel is stateful, so we need to reset
  # the states.
  # Clean up internal states.
  interpreter.reset_all_variables()


In [None]:
open("tf_model/converted_model.tflite", "wb").write(tflite_model)

In [None]:
interpreter.set_tensor( input_details[0]["index"], np.array([[0,0,0,sortedClassContents.index(700)]]).astype(np.float32))
interpreter.invoke()
result = interpreter.get_tensor(output_details[0]["index"])
print(sortedClassContents[np.argmax(result)])