# Query Recommender

In [51]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import joblib
from sklearn.metrics.pairwise import euclidean_distances

# Eingabedaten erzeugen

In [52]:
inputdata = pd.read_pickle("../data/recodata.pkl")

In [53]:
inputdata = pd.DataFrame([[0]*len(inputdata.columns)], columns=inputdata.columns)
inputdata

Unnamed: 0,yearpublished,playingtime,age,cat_Economic,cat_CardGame,cat_Fantasy,cat_Fighting,cat_ScienceFiction,cat_Adventure,cat_Exploration,...,mec_Ratio/CombatResultsTable,mec_Chaining,mec_Slide/Push,mec_AuctionCompensation,mec_Ordering,mec_PassedActionToken,mec_ElapsedRealTimeEnding,mec_BidsAsWagers,mec_Drawing,mec_PiecesasMap
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [54]:
#inputdata.columns.tolist()

In [55]:
inputdata["yearpublished"] = 2020
inputdata["playingtime"] = 60
inputdata["age"] = 10
for x in ["cat_CardGame", "cat_ScienceFiction", "cat_Dice", "cat_Animals", "mec_DiceRolling", "mec_ModularBoard"]:
        inputdata[x]=1

In [56]:
inputdata

Unnamed: 0,yearpublished,playingtime,age,cat_Economic,cat_CardGame,cat_Fantasy,cat_Fighting,cat_ScienceFiction,cat_Adventure,cat_Exploration,...,mec_Ratio/CombatResultsTable,mec_Chaining,mec_Slide/Push,mec_AuctionCompensation,mec_Ordering,mec_PassedActionToken,mec_ElapsedRealTimeEnding,mec_BidsAsWagers,mec_Drawing,mec_PiecesasMap
0,2020,60,10,0,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [57]:
# Test mit echten Werten
# inputdata = pd.read_pickle("../data/recotestdata.pkl")
# 0 = BRASS
# inputdata = inputdata.loc[[0],:].copy()
# inputdata

# Eingabedaten transformieren

In [58]:
pca_cat = joblib.load('../data/pca_cat.pkl')
ncomp = pca_cat.n_components_ 
categories_pca = pd.DataFrame(pca_cat.transform(inputdata[[x for x in inputdata.columns if x[:4]=="cat_"]]), columns=["catp_" + str(x) for x in range(ncomp)])

In [59]:
categories_pca

Unnamed: 0,catp_0,catp_1,catp_2,catp_3,catp_4,catp_5,catp_6,catp_7,catp_8,catp_9,catp_10,catp_11,catp_12,catp_13,catp_14,catp_15,catp_16,catp_17,catp_18,catp_19
0,-0.09279,0.813279,0.357823,0.437768,-0.214335,-0.072403,-0.79354,-0.124019,-0.447716,-0.001893,-0.075348,0.379053,-0.407984,0.440941,-0.067389,-0.28936,-0.058886,0.372242,0.291853,-0.161234


In [60]:
pca_mec = joblib.load('../data/pca_mec.pkl')

In [61]:
ncomp = pca_mec.n_components_ 

In [62]:
mechanic_pca = pd.DataFrame(pca_mec.transform(inputdata[[x for x in inputdata.columns if x[:4]=="mec_"]]), columns=["mecp_" + str(x) for x in range(ncomp)])

In [63]:
mechanic_pca

Unnamed: 0,mecp_0,mecp_1,mecp_2,mecp_3,mecp_4,mecp_5,mecp_6,mecp_7,mecp_8,mecp_9,mecp_10,mecp_11,mecp_12,mecp_13,mecp_14,mecp_15,mecp_16,mecp_17,mecp_18,mecp_19
0,0.174938,-0.610407,-0.13722,-0.763051,0.112869,-0.08455,-0.093231,-0.139891,0.414312,-0.085435,-0.178328,-0.186738,0.002219,0.407219,0.191645,0.021911,-0.364521,0.09265,-0.165886,0.10114


### StandardScaler laden

In [64]:
allfeat = ['yearpublished', 'playingtime', 'age'] 
for x in allfeat:
    scaler = joblib.load(f'../data/scaler_{x}.pkl')
    inputdata[x] = scaler.transform(inputdata[x].values.reshape(-1, 1))

In [65]:
inputdata

Unnamed: 0,yearpublished,playingtime,age,cat_Economic,cat_CardGame,cat_Fantasy,cat_Fighting,cat_ScienceFiction,cat_Adventure,cat_Exploration,...,mec_Ratio/CombatResultsTable,mec_Chaining,mec_Slide/Push,mec_AuctionCompensation,mec_Ordering,mec_PassedActionToken,mec_ElapsedRealTimeEnding,mec_BidsAsWagers,mec_Drawing,mec_PiecesasMap
0,0.081717,-0.456488,-0.928951,0,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [66]:
y_test = inputdata[allfeat].copy()

In [67]:
y_test = pd.concat([y_test,categories_pca], axis=1)
y_test = pd.concat([y_test,mechanic_pca], axis=1)

In [68]:
y_test

Unnamed: 0,yearpublished,playingtime,age,catp_0,catp_1,catp_2,catp_3,catp_4,catp_5,catp_6,...,mecp_10,mecp_11,mecp_12,mecp_13,mecp_14,mecp_15,mecp_16,mecp_17,mecp_18,mecp_19
0,0.081717,-0.456488,-0.928951,-0.09279,0.813279,0.357823,0.437768,-0.214335,-0.072403,-0.79354,...,-0.178328,-0.186738,0.002219,0.407219,0.191645,0.021911,-0.364521,0.09265,-0.165886,0.10114


# Distanzen berechnen

In [69]:
x_train = joblib.load('../data/x_train.pkl')

In [70]:
x_train

Unnamed: 0,yearpublished,playingtime,age,catp_0,catp_1,catp_2,catp_3,catp_4,catp_5,catp_6,...,mecp_10,mecp_11,mecp_12,mecp_13,mecp_14,mecp_15,mecp_16,mecp_17,mecp_18,mecp_19
0,0.071191,0.203272,0.930811,-0.836597,-0.591730,-0.192373,0.029915,-0.607970,-0.237552,0.205751,...,-0.335986,0.815089,0.510971,1.202660,-0.527292,-0.416415,0.270665,0.055297,-0.080347,0.327882
1,0.055401,-0.456488,0.465870,-0.141601,0.055712,0.150359,-0.409296,0.037522,-0.018502,-0.179835,...,0.968284,0.396384,0.070661,-0.496570,-0.158663,-0.084907,0.427171,0.285116,-0.046904,-0.273709
2,0.065928,0.203272,0.930811,1.628473,-0.766492,-0.329533,0.258054,0.156716,-0.331417,0.130036,...,-0.518761,-0.865691,0.933124,-0.698718,-0.123971,0.040795,1.289930,0.440088,0.525098,0.247835
3,0.086981,0.533152,0.930811,-0.677488,-0.393141,-0.235104,-0.073167,-0.385052,-0.122378,-0.159640,...,-0.248646,0.122983,-0.076700,-0.224536,-0.489545,-0.026611,-0.427865,-0.116183,-0.250703,-0.276549
4,0.065928,4.161830,0.930811,-0.402329,-0.619475,1.250864,1.130702,0.112107,0.189275,0.685140,...,0.040580,-0.396421,-0.395319,0.305237,0.258305,0.026909,0.057207,0.061891,-0.623264,-0.530408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.076454,-0.456488,-0.928951,-0.344162,0.010004,0.034408,0.056156,0.761931,0.275301,0.056618,...,0.185912,-0.848898,0.290843,-0.182688,0.504767,0.180783,0.308649,0.174240,0.229738,0.063716
496,0.050138,0.863032,-5.578356,-0.055444,0.041281,0.536645,-0.477003,-0.032277,0.487048,0.523767,...,0.119909,-0.579816,-0.591036,0.289565,-0.024672,-0.545744,-0.063616,-0.069525,0.111512,-0.412425
497,0.071191,-0.951307,-2.323773,0.453179,-0.058078,0.079022,-0.210195,-0.294687,0.093932,0.073837,...,0.432269,-0.211704,0.010703,0.228648,-0.012622,0.137974,-0.071038,-0.223441,0.149300,-0.010075
498,0.060664,-0.456488,-0.928951,-0.257940,0.853300,-0.303840,0.069677,0.611844,0.028615,-0.122995,...,0.424233,-0.403240,-0.133841,0.315484,-0.453354,0.173957,-0.028145,0.007474,-1.126847,-0.022921


In [71]:
recos = pd.DataFrame(euclidean_distances( x_train, y_test))

# Restliche Daten joinen

In [72]:
bgdata = pd.read_pickle("../data/bg_data20240302.pkl")

In [73]:
recos = pd.concat([recos, bgdata["name"]], axis=1)

In [77]:
recos.sort_values(0)

Unnamed: 0,0,name,name.1
255,1.883486,Camel Up (Second Edition),"['Camel Up (Second Edition)', 'Camel Up (Русск..."
399,2.110075,Twice as Clever!,"['Dobbel zo Clever', 'Doppelt so Clever', 'Dup..."
289,2.296585,Cryptid,"['Cryptid', 'Cryptide', 'Kryptyda', 'Криптид',..."
121,2.313753,SCOUT,"['Cirkus', 'SCOUT', 'Scout!', 'スカウト！', '马戏星探',..."
171,2.316143,That's Pretty Clever!,"['Clever', 'Ganz Schön Clever', 'Optimus', 'Pr..."
...,...,...,...
455,5.891841,Advanced Squad Leader,"['Advanced Squad Leader', 'ASL: Advanced Squad..."
4,6.251698,Twilight Imperium: Fourth Edition,"['Twilight Imperium: Čtvrtá edice', 'Twilight ..."
101,10.992549,The 7th Continent,The 7th Continent
53,13.237236,Sleeping Gods,"['Schlafende Götter', 'Sleeping Gods', 'Spící ..."


In [76]:
bgdata.loc[10,["name", "boardgamemechanic", "boardgamecategory"]]["boardgamemechanic"]

"['Action Retrieval', 'Area Majority / Influence', 'Automatic Resource Growth', 'Campaign / Battle Card Driven', 'Cooperative Game', 'Hand Management', 'Modular Board', 'Set Collection', 'Simultaneous Action Selection', 'Solo / Solitaire Game', 'Tags', 'Variable Player Powers', 'Variable Set-up']"