In [139]:
from tensorflow.keras.models import load_model

model = load_model('siamese_model_v3.keras')
model.summary()

Model: "siamese_model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 anchor (InputLayer)         [(None, 1)]                  0         []                            
                                                                                                  
 game (InputLayer)           [(None, 1)]                  0         []                            
                                                                                                  
 text_vectorization_3 (Text  (None, None)                 0         ['anchor[0][0]',              
 Vectorization)                                                      'game[0][0]']                
                                                                                                  
 embedding_2 (Embedding)     (None, None, 128)            1280000   ['text_vectorizati

In [161]:
import json
import polars as pl

with open('training_data_pairs_shuffled.json', 'r') as f:
    loaded_data = json.load(f)

anchor_id_list = []
anchor_features_list = []
# game_id_list = []
# game_features_list = []

for entry in loaded_data:
    anchor_id_list.append(entry['anchor']['id'])
    anchor_features_list.append(f"{entry['anchor']['features']}".strip())
    # game_id_list.append(entry['game']['id'])
    # game_features_list.append(entry['game']['features'])

data = {
    'id': anchor_id_list,
    'features': anchor_features_list,
    # 'game_id': game_id_list,
    # 'game_features': game_features_list
}

df = pl.DataFrame(data)
df.head()

id,features
i64,str
1,"""Adventure Shoo…"
1,"""Adventure Shoo…"
1,"""Adventure Shoo…"
1,"""Adventure Shoo…"
1,"""Adventure Shoo…"


In [162]:
df = df.unique()

In [163]:
df.describe()

describe,id,features
str,f64,str
"""count""",20624.0,"""20624"""
"""null_count""",0.0,"""0"""
"""mean""",42228.882709,
"""std""",50740.214687,
"""min""",1.0,""""""
"""25%""",7811.0,
"""50%""",20873.0,
"""75%""",55888.0,
"""max""",281275.0,"""wordgame"""


In [164]:
liked_games = [67, 12, 3475, 871]
liked_games_df = df.filter(pl.col('id').is_in(liked_games))
liked_games_df.head()

id,features
i64,str
12,"""Adventure Role…"
871,"""Simulator com…"
3475,"""Puzzle Comedy …"
67,"""Strategy Turnb…"


In [165]:
disliked_games = [15, 741, 389, 678]
disliked_games_df = df.filter(pl.col('id').is_in(disliked_games))
disliked_games_df.head()

id,features
i64,str
678,"""RealTimeStrate…"
389,"""Adventure Role…"
15,"""RoleplayingRPG…"
741,"""RealTimeStrate…"


In [166]:
all_games_df = df.filter(~pl.col('id').is_in(disliked_games + liked_games))
all_games_df.describe()

describe,id,features
str,f64,str
"""count""",20616.0,"""20616"""
"""null_count""",0.0,"""0"""
"""mean""",42244.966482,
"""std""",50743.483522,
"""min""",1.0,""""""
"""25%""",7819.0,
"""50%""",20879.0,
"""75%""",55935.0,
"""max""",281275.0,"""wordgame"""


In [167]:
import numpy as np

example_gameA = liked_games_df.head(1)
example_gameB = disliked_games_df.head(1)
example_gameA = np.array(example_gameA['features'])
example_gameB = np.array(example_gameB['features'])
print(example_gameA)

['Adventure RoleplayingRPG Action Fantasy Historical Horror basedontabletoprpg binkvideo bowandarrow boxarthalfoffacedisplayed britishaccent chemicalwarfare contenteditor dialoguetrees gamecriticsawards ghoul goodvsevil immortality invisibility levelingup medieval moraldecisions multipleendings necromancy nosferatu oldfashionedenglish realtimecombat resurrection rivalingfactions undead vampire vampirism']


In [168]:
example_gameA.shape

(1,)

In [169]:
model.predict([example_gameA, example_gameB])



array([[0.01797397]], dtype=float32)

In [170]:
temp = df.head(3)
example = temp.sample(1)
temp = temp.filter(pl.col('id') != example['id'])
temp.head()

id,features
i64,str
7,"""RoleplayingRPG…"
22,"""Puzzle Rolepla…"


In [178]:
THRESHOLD = 0.999

iterations = 0

similarity = 0
while similarity < THRESHOLD:
    example_gameC = all_games_df.sample(1)
    all_games_df = all_games_df.filter(pl.col('id') != example_gameC['id'])
    example_gameC = np.array(example_gameC['features'])
    similarity = model.predict([example_gameA, example_gameC])
    iterations += 1

print(similarity)
print(example_gameA, example_gameC)



InvalidArgumentError: Graph execution error:

Detected at node TensorArrayV2Stack/TensorListStack defined at (most recent call last):
<stack traces unavailable>
Tried to stack list which only contains uninitialized tensors and has a non-fully-defined element_shape: [?,64]
	 [[{{node TensorArrayV2Stack/TensorListStack}}]]
	 [[siamese_model/lstm_2/PartitionedCall]] [Op:__inference_predict_function_1111134]

In [176]:
print(all_games_df.describe())

shape: (9, 3)
┌────────────┬──────────────┬──────────┐
│ describe   ┆ id           ┆ features │
│ ---        ┆ ---          ┆ ---      │
│ str        ┆ f64          ┆ str      │
╞════════════╪══════════════╪══════════╡
│ count      ┆ 20406.0      ┆ 20406    │
│ null_count ┆ 0.0          ┆ 0        │
│ mean       ┆ 42248.590317 ┆ null     │
│ std        ┆ 50760.349876 ┆ null     │
│ min        ┆ 1.0          ┆          │
│ 25%        ┆ 7831.0       ┆ null     │
│ 50%        ┆ 20873.0      ┆ null     │
│ 75%        ┆ 55935.0      ┆ null     │
│ max        ┆ 281275.0     ┆ wordgame │
└────────────┴──────────────┴──────────┘


In [53]:
# maxes = []
# 
# for game in liked_games_df.iter_rows(named=True):
#     local_max = [-1, -1]
#     for row in all_games_df.iter_rows(named=True): 
#         similarity = model.predict([np.array([game['features']]), np.array([row['features']])])
#         if similarity[0][0] > local_max[0]:
#             local_max[0] = similarity[0][0]
#             local_max[1] = row['id']        
#     maxes.append(local_max[0])
#     
# print(maxes)



KeyboardInterrupt: 

In [94]:
["abc"]* 32

['abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc',
 'abc']

In [106]:
# compare = all_games_df.to_numpy()
# BATCH_SIZE = compare.shape[0]
# print(compare[:,1])
# print(compare.shape)

['Adventure Puzzle RoleplayingRPG Shooter Action Horror Openworld Sciencefiction Stealth achievements actionadventure aicompanion airship alternatereality anarchism animalweaponry antihero antivillain aquarium arrivalbycrashlanding artificialintelligence assassin atheism audiologs autosaving backtracking badparents beenherebefore betrayal biblicalquotes binkvideo biotechnology bird bloody bossfight bowandarrow bread breakingthefourthwall camera capitalism censoredversion changingsides checkpoints chineseaccent christianity cockroach communism contestdrivendevelopment contextsensitive controversy corruptgovernment coversystem crafting cult damselindistress dancing death diceawards difficultylevel digitaldistribution dimensiontravel disfigurement divinepunishment dolbydigital downloadablecontent dystopian easteregg echolocation economy environmentalkill environmentalpuzzles explosion facelessprotagonist fakeingameadvertising fallingobject fallingsequence fascism femaleprotagonist fiction

In [113]:
# local_max = [-1, -1]
# similarity = [[2],
#               [4]]
# 
# similarity_max = max(similarity)
# print(f'{similarity_max} > {local_max[0]}')
# if similarity_max > local_max[0]:
#     local_max[0] = similarity_max
#     local_max[1] = compare[np.argmax(similarity[0], axis=0), 0]        
# print(local_max)

[4] > -1


TypeError: '>' not supported between instances of 'list' and 'int'

In [95]:
# maxes = []
# 
# for game in liked_games_df.iter_rows(named=True):
#     local_max = [-1, -1]
#     print(compare.shape)
#     anchor = np.array([game['features']]*BATCH_SIZE)
#     print(anchor.shape)
#     similarity = model.predict([anchor, compare[:,1]])
# 
#     similarity_max = max(similarity[0])
#     if similarity_max > local_max[0]:
#         local_max[0] = similarity_max
#         local_max[1] = compare[np.argmax(similarity[0], axis=0), 0]        
#     maxes.append(local_max[0])
#     
# print(maxes)

(20616,)
(20616,)
[[0.7425488 ]
 [0.25412145]
 [0.06016702]
 ...
 [1.        ]
 [1.        ]
 [1.        ]]


TypeError: 'tuple' object does not support item assignment