In [None]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import tabulate
pd.set_option("display.precision", 1)
from collections import defaultdict
import csv
from numpy import genfromtxt

In [None]:
def load_data():
    ''' called to load preprepared data for the lab '''
    item_train = genfromtxt('content_item_train.csv', delimiter=',')
    user_train = genfromtxt('content_user_train.csv', delimiter=',')
    y_train    = genfromtxt('content_y_train.csv', delimiter=',')
    with open('content_item_train_header.txt', newline='') as f:
        item_features = list(csv.reader(f))[0]
    with open('content_user_train_header.txt', newline='') as f:
        user_features = list(csv.reader(f))[0]
    item_vecs = genfromtxt('content_item_vecs.csv', delimiter=',')

    destination_dict = defaultdict(dict)
    count = 0

    with open('content_destinations_list.csv', newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        for line in reader:
            if count == 0:
                count += 1
            else:
                count += 1
                destination_id = int(line[0])
                destination_dict[destination_id]["name"] = line[1]
                destination_dict[destination_id]["category"] = line[2]


    return(item_train, user_train, y_train, item_features, user_features, item_vecs, destination_dict)

In [None]:
item_train, user_train, y_train, item_features, user_features, item_vecs, destination_dict = load_data()

num_user_features = user_train.shape[1] - 3  # remove userid, rating count and ave rating during training
num_item_features = item_train.shape[1] - 1  # remove place id at train time
uvs = 3  # user genre vector start
ivs = 3  # item genre vector start
u_s = 3  # start of columns to use in training, user
i_s = 1  # start of columns to use in training, items
print(f"Number of training vectors: {len(item_train)}")

Number of training vectors: 437


In [None]:
print(f"{len(item_train)}, {len(user_train)}, {len(y_train)}, {len(item_features)}, {len(user_features)}, {len(item_vecs)}, {len(destination_dict)}, ")

437, 437, 437, 9, 9, 437, 437, 


In [None]:
def split_str(ifeatures, smax):
    ''' split the feature name strings to tables fit '''
    ofeatures = []
    for s in ifeatures:
        if not ' ' in s:  # skip string that already have a space
            if len(s) > smax:
                mid = int(len(s)/2)
                s = s[:mid] + " " + s[mid:]
        ofeatures.append(s)
    return ofeatures

In [None]:
def pprint_train(x_train, features, vs, u_s, maxcount=5, user=True):
    """ Prints user_train or item_train nicely """
    if user:
        flist = [".0f", ".0f", ".1f",
                 ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f", ".1f"]
    else:
        flist = [".0f", ".0f", ".1f",
                 ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f", ".0f"]

    head = features[:vs]
    if vs < u_s: print("error, vector start {vs} should be greater then user start {u_s}")
    for i in range(u_s):
        head[i] = "[" + head[i] + "]"
    genres = features[vs:]
    hdr = head + genres
    disp = [split_str(hdr, 5)]
    count = 0
    for i in range(0, x_train.shape[0]):
        if count == maxcount: break
        count += 1
        disp.append([x_train[i, 0].astype(int),
                     x_train[i, 1].astype(int),
                     x_train[i, 2].astype(float),
                     *x_train[i, 3:].astype(float)
                    ])
    table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow", floatfmt=flist, numalign='center')
    return table

In [None]:
print(destination_dict)

defaultdict(<class 'dict'>, {1: {'name': 'Monumen Nasional', 'category': 'Budaya'}, 2: {'name': 'Kota Tua', 'category': 'Budaya'}, 3: {'name': 'Dunia Fantasi', 'category': 'Taman Hiburan'}, 4: {'name': 'Taman Mini Indonesia Indah (TMII)', 'category': 'Taman Hiburan'}, 5: {'name': 'Atlantis Water Adventure', 'category': 'Taman Hiburan'}, 6: {'name': 'Taman Impian Jaya Ancol', 'category': 'Taman Hiburan'}, 7: {'name': 'Kebun Binatang Ragunan', 'category': 'Cagar Alam'}, 8: {'name': 'Ocean Ecopark', 'category': 'Taman Hiburan'}, 9: {'name': 'Pelabuhan Marina', 'category': 'Bahari'}, 10: {'name': 'Pulau Tidung', 'category': 'Bahari'}, 11: {'name': 'Pulau Bidadari', 'category': 'Bahari'}, 12: {'name': 'Pulau Pari', 'category': 'Bahari'}, 13: {'name': 'Pulau Pramuka', 'category': 'Bahari'}, 14: {'name': 'Pulau Pelangi', 'category': 'Bahari'}, 15: {'name': 'Pasar Seni', 'category': 'Pusat Perbelanjaan'}, 16: {'name': 'Jembatan Kota Intan', 'category': 'Budaya'}, 17: {'name': 'Museum Fatahilla

In [None]:
pprint_train(user_train, user_features, uvs,  u_s, maxcount=5)

In [None]:
pprint_train(item_train, item_features, ivs, i_s, maxcount=5, user=False)

In [None]:
print(f"y_train[:5]: {y_train[:5]}")

y_train[:5]: [3.36666667 3.24137931 3.36666667 3.42307692 3.36363636]


In [None]:
# scale training data
item_train_unscaled = item_train
user_train_unscaled = user_train
y_train_unscaled    = y_train

scalerItem = StandardScaler()
scalerItem.fit(item_train)
item_train = scalerItem.transform(item_train)

scalerUser = StandardScaler()
scalerUser.fit(user_train)
user_train = scalerUser.transform(user_train)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_train.reshape(-1, 1))
y_train = scalerTarget.transform(y_train.reshape(-1, 1))
#ynorm_test = scalerTarget.transform(y_test.reshape(-1, 1))

print(np.allclose(item_train_unscaled, scalerItem.inverse_transform(item_train)))
print(np.allclose(user_train_unscaled, scalerUser.inverse_transform(user_train)))

In [None]:
item_train, item_test = train_test_split(item_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"destination/item training data shape: {item_train.shape}")
print(f"destination/item test data shape: {item_test.shape}")

In [None]:
pprint_train(user_train, user_features, uvs, u_s, maxcount=5)

[Use r_Id],[Rating _Count],[Rating_ Average],Bah ari,Bud aya,Cagar Alam,Pusat Perbelanjaan,Taman Hiburan,Tempat Ibadah
-1,-1,-1.8,-1.4,-1.5,-1.7,0.5,0.2,0.7
-1,-1,-1.9,0.0,-1.0,-0.7,-0.4,-1.2,-1.4
0,0,-0.4,0.7,-1.2,-0.7,1.7,1.1,-0.2
-1,1,-0.2,0.2,-1.2,-1.7,0.8,1.2,-1.4
0,0,0.3,-0.4,0.2,0.3,1.7,-0.2,1.6


In [None]:
num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'),
])

item_NN = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear'),
])

input_user = tf.keras.layers.Input(shape=(num_user_features))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

input_item = tf.keras.layers.Input(shape=(num_item_features))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

output = tf.keras.layers.Dot(axes=1)([vu, vm])
model = tf.keras.Model([input_user, input_item], output)
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 6)]                  0         []                            
                                                                                                  
 input_2 (InputLayer)        [(None, 8)]                  0         []                            
                                                                                                  
 sequential (Sequential)     (None, 32)                   38816     ['input_1[0][0]']             
                                                                                                  
 sequential_1 (Sequential)   (None, 32)                   39328     ['input_2[0][0]']             
                                                                                              

In [None]:
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

In [None]:
tf.random.set_seed(1)
model.fit([user_train[:, u_s:], item_train[:, i_s:]], y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x78822869b820>

In [None]:
model.evaluate([user_test[:, u_s:], item_test[:, i_s:]], y_test)



0.00790192000567913

In [None]:
new_user_id = 1
new_rating_ave = 0.0
new_Bahari = 15.0
new_Budaya = 10.0
new_Cagar_Alam = 20.0
new_Pusat_Perbelanjaan = 10.0
new_Taman_Hiburan = 10.0
new_Tempat_Ibadah = 0.0
new_rating_count = 0

user_vec = np.array([[new_user_id, new_rating_count, new_rating_ave, new_Bahari, new_Budaya, new_Cagar_Alam, new_Pusat_Perbelanjaan, new_Taman_Hiburan, new_Tempat_Ibadah]])

In [None]:
def gen_user_vecs(user_vec, num_items):
    user_vecs = np.tile(user_vec, (num_items, 1))
    return user_vecs

In [None]:
def print_pred_movies(y_p, item, destination_dict, maxcount=10):
    count = 0
    disp = [["y_p", "movie id", "rating ave", "title", "genres"]]

    for i in range(0, y_p.shape[0]):
        if count == maxcount:
            break
        count += 1
        destination_id = item[i, 0].astype(int)
        disp.append([np.around(y_p[i, 0], 1), item[i, 0].astype(int), np.around(item[i, 2].astype(float), 1),
                     destination_dict[destination_id]['name'], destination_dict[destination_id]['category']])

    table = tabulate.tabulate(disp, tablefmt='html', headers="firstrow")
    return disp

In [None]:
user_vecs = gen_user_vecs(user_vec,len(item_vecs))

suser_vecs = scalerUser.transform(user_vecs)
sitem_vecs = scalerItem.transform(item_vecs)

y_p = model.predict([suser_vecs[:, u_s:], sitem_vecs[:, i_s:]])

y_pu = scalerTarget.inverse_transform(y_p)

sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()
sorted_ypu   = y_pu[sorted_index]
sorted_items = item_vecs[sorted_index]

print_pred_movies(sorted_ypu, sorted_items, destination_dict, maxcount = 10)



[['y_p', 'movie id', 'rating ave', 'title', 'genres'],
 [3.9, 292, 3.1, 'Curug Anom', 'Cagar Alam'],
 [3.9, 365, 3.1, 'Waduk Jatibarang', 'Cagar Alam'],
 [3.9, 169, 3.1, 'Puncak Segoro', 'Cagar Alam'],
 [3.9, 323, 3.1, 'Kebun Tanaman Obat Sari Alam', 'Cagar Alam'],
 [3.9, 393, 3.0, 'Taman Harmoni Keputih', 'Cagar Alam'],
 [3.9, 161, 3.1, 'Bukit Paralayang, Watugupit', 'Cagar Alam'],
 [3.9, 238, 3.1, 'Gunung Manglayang', 'Cagar Alam'],
 [3.9, 206, 3.1, 'Wisata Kaliurang', 'Cagar Alam'],
 [3.9, 148, 3.1, 'Goa Rancang Kencono', 'Cagar Alam'],
 [3.9, 341, 3.2, 'Hutan Wisata Tinjomoyo Semarang', 'Cagar Alam']]

In [None]:
model.save('model.h5')

  saving_api.save_model(
