## load training pairs

In [1]:
import json
import numpy as np

with open('training_data_pairs_shuffled.json', 'r') as f:
    loaded_data = json.load(f)

In [2]:
label_list = []
anchor_list = []
game_list = []

for entry in loaded_data:
    label_list.append(entry['label'])
    anchor_list.append(f"{entry['anchor']['features']} {entry['anchor']['description']}")
    game_list.append(f"{entry['game']['features']} {entry['game']['description']}")
    #anchor_list.append(f"{entry['anchor']['features']}")
    #game_list.append(f"{entry['game']['features']}")

In [3]:
print(np.array(anchor_list[0]))

Adventure Shooter Simulator Action Fantasy Stealth actionadventure blackjack bowandarrow crime darkness death difficultylevel ghosts immersive invisibility melee movingbodies pacifistplaythrough particlesystem pickpocketing polygonald potion realtimecombat shadowstealth singleplayeronly stealthkill steampunk swimming swordplay theft thief ultimate thief back tread softly make complex full loot steal outsmart enemy riveting story draw world thief metal place powerful fanatical corruption life thief however fence early mission local sheriff hunting take hear prophecy metal leaves hired kill letter eavesdrop mechanist meeting father conversion street people mindless wear emit vapor capable reducing nearby rust provide twenty servant project realizing recording blackmail recording safe deposit order coerce revealing estate evidence crime scene police officer deliver suspicious letter carried portal wounded pagan portal outside city pagan trail blood join lead office learn project inadverte

In [4]:
print(game_list[0])

Adventure Simulator Action Fantasy Stealth actionadventure assassin basketball betrayal blackjack bowandarrow bread crime darkness difficultylevel ghosts immersive instantkill invisibility magic maleprotagonist medieval melee movingbodies murder pacifistplaythrough particlesystem pickpocketing plottwist polygonald potion realtimecombat reluctanthero retailgameswithsteamactivation shadowstealth singleplayeronly stealthkill steampunk swimming swordplay theft thief tombraiding voiceacting thief stealth dark sneak subterranean forbidding dark sinister city heavily inspired dark ally money hidden allies story deception revenge


In [14]:
from sklearn.model_selection import train_test_split

train_anchor, temp_anchor, train_game, temp_game, train_labels, temp_labels = train_test_split(
    anchor_list,
    game_list,
    label_list,
    test_size=0.8, random_state=42)
val_anchor, test_anchor, val_game, test_game, val_labels, test_labels = train_test_split(
    temp_anchor,
    temp_game,
    temp_labels,
    test_size=0.5, random_state=42)

In [15]:
MAX_LENGTH = max([len(d) for d in anchor_list + game_list])
print(MAX_LENGTH)
NUM_CLASSES = 1
MAX_TOKENS = 10000

8551


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

anchor_input = tf.keras.Input(shape=(1,), name='anchor', dtype=tf.string)
game_input = tf.keras.Input(shape=(1,), name='game', dtype=tf.string)

vectorize_layer = layers.TextVectorization(max_tokens=MAX_TOKENS, output_mode='int') 
vectorize_layer.adapt(anchor_list)
A_vectorized = vectorize_layer(anchor_input)
B_vectorized = vectorize_layer(game_input)

embedding_layer = layers.Embedding(input_dim=MAX_TOKENS, output_dim=128)
A_embedded = embedding_layer(A_vectorized)
B_embedded = embedding_layer(B_vectorized)

shared_lstm = layers.LSTM(64)
A_lstm = shared_lstm(A_embedded)
B_lstm = shared_lstm(B_embedded)

dense1 = layers.Dense(
    units=64, 
    activation='relu')
A_dense = dense1(A_lstm)
B_dense = dense1(B_lstm)

dropout = layers.Dropout(0.5)
A_drop = dropout(A_dense)
B_drop = dropout(B_dense)

dense2 = layers.Dense(
    units=16, 
    activation='relu')
A_output = dense2(A_drop)
B_output = dense2(B_drop)

merged = layers.concatenate(
    [A_output, B_output], 
    axis=-1)

dense3 = layers.Dense(
    units=NUM_CLASSES, 
    activation='sigmoid')

predictions = dense3(merged)

siamese_model = models.Model(inputs=[anchor_input, game_input],
                                  outputs=predictions, 
                                  name='siamese_model')

#custom_adam = Adam(learning_rate=0.01)
siamese_model.compile(optimizer='adam', 
                      loss='binary_crossentropy', 
                      metrics=['accuracy'])

siamese_model.summary()

2024-02-01 21:40:17.537262: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-01 21:40:18.028774: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-01 21:40:18.028847: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-01 21:40:18.104246: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-01 21:40:18.279917: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-01 21:40:18.281872: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [None]:
# from keras.utils import plot_model
# 
# plot_model(siamese_model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# plot_model(siamese_model, to_file='model_plot_without.png', show_shapes=False, show_layer_names=True)

In [None]:
# print(np.array(train_anchor).shape)

In [None]:
history = siamese_model.fit(
    [np.array(train_anchor), np.array(train_game)],
    np.array(train_labels),
    epochs=15,
    batch_size=64,
    use_multiprocessing=True,
    workers=10,
    shuffle=True,
    validation_data=([np.array(val_anchor), np.array(val_game)], np.array(val_labels))
)

In [None]:
siamese_model.save('siamese_model_v4.keras')

In [None]:
from matplotlib import pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
test_loss, test_accuracy = siamese_model.evaluate(
    (np.array(test_anchor), np.array(test_game)),
    np.array(test_labels),
)
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy * 100:.2f}%')