# Building Embeddings

In [1]:
!pwd
!ls

/Users/benjaminglaus/Github/Moonboard/notebooks
0. Data Exploration.ipynb             2. Building Embeddings.ipynb
1. Preprocessing.ipynb                2. Training Hold2Vec Embeddings.ipynb


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import sys
sys.path.append('..')

from data_loading import *
from embeddings.route_embeddings import *
from embeddings.hold2vec import *

2023-04-25 14:37:21.462462: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
df = load_dataframe('../data/2017.json')
df.head()

Unnamed: 0,Grade,UserRating,Moves
0,6A+,1,"[H5, E7, F8, D10, E13, C14, C16, B18, E18, F9,..."
1,6A+,2,"[H5, E7, D8, D10, E13, C14, C16, B18, E18]"
2,6B+,2,"[A4, B4, C7, D9, F12, D15, F18]"
3,6A+,2,"[K4, J4, I7, H9, I10, F12, F13, E6, B14, C16, ..."
4,6B+,2,"[G8, D9, F12, C13, F15, D17, F18, E6, G4, F4]"


## Bag of Hold Embeddings

In [4]:
bag_of_holds_embeddings_1d = bag_of_holds_1d(df.Moves)
print(bag_of_holds_embeddings_1d.shape)
print(bag_of_holds_embeddings_1d[0])

(18865, 198)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.
 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 1.
 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1.
 0. 0. 0. 0. 0. 0.]


In [5]:
bag_of_holds_embeddings_2d = bag_of_holds_2d(df.Moves)
print(bag_of_holds_embeddings_2d.shape)
print(bag_of_holds_embeddings_2d[0])

(18865, 18, 11)
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [1. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.]]


## TF-IDF Embeddings

### 1-Dimensional

In [6]:
tf_idf_embeddings_1d = tf_idf_embedding_1d(df.Moves)
print(tf_idf_embeddings_1d.shape)
print(tf_idf_embeddings_1d[0])

  embeddings[i] = np.where(embeddings[i] != 0, 1 / embeddings[i], embeddings[i])


(18865, 198)
[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.00028547 0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.00142248 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.00040128 0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.00118765 0.         0.
 0.         0.         0.         0.       

### 2-Dimensional

In [7]:
tf_idf_embeddings_2d = tf_idf_embedding_2d(df.Moves)
print(tf_idf_embeddings_2d.shape)
print(tf_idf_embeddings_2d[0])

  embeddings[i] = np.where(embeddings[i] != 0, 1/embeddings[i], embeddings[i])


(18865, 18, 11)
[[0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.00028547 0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.00142248 0.
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.         0.         0.         0.00040128
  0.         0.         0.         0.         0.        ]
 [0.         0.         0.      

## Hold2Vec Embeddings

In [8]:
filename = 'hold2vec_skip-gram_embedding100_window8_epochs20.npy'
!ls ../data/embeddings/

hold2vec_100_8.npy
hold2vec_40_8.npy
hold2vec_skip-gram_embedding100_window8_epochs20.npy


### Loading the "Vocabulary", the matrix containing the Hold2Vec embedding of each of the 198 holds:

In [9]:
hold_embeddings_matrix = load_hold_matrix(filename = '../data/embeddings/' + filename)
hold_embeddings_matrix = hold_embeddings_matrix.T
hold_embeddings_matrix.shape

(198, 100)

### Reading the Hold2Vec embedding of a single hold

In [10]:
hold_string = 'H5'
hold_index = string_to_index(hold_string)
hold_embedding = hold_embeddings_matrix[hold_index]

print(hold_embedding.shape)
print(hold_embedding)

(100,)
[ 2.59741068e-01  4.28026348e-01 -7.81801566e-02 -1.19945377e-01
 -1.47149801e-01 -2.20660955e-01  2.02179983e-01  1.80359095e-01
 -2.03727290e-01 -2.27398112e-01 -2.07541093e-01 -1.87509730e-02
  3.63446265e-01  5.70713095e-02 -3.48602198e-02  2.45477557e-01
 -7.86832646e-02 -3.34125876e-01  2.92002261e-01 -1.33347705e-01
 -2.72651792e-01  1.40089691e-01  1.06401041e-01  3.63530070e-01
  1.54570825e-02  2.33296335e-01  2.30301525e-02  6.71563223e-02
 -6.16206974e-02  3.01331971e-02  7.87246749e-02  2.31594294e-01
 -6.21610396e-02 -1.03543147e-01  1.93075687e-01  3.66372466e-01
  1.27161413e-01  4.82191592e-02  2.48084933e-01 -7.76129216e-02
 -1.66103467e-01 -5.08832812e-01  3.18204999e-01  3.69369954e-01
  1.91570699e-01  1.82725936e-01 -3.71582732e-02  1.56154677e-01
 -3.59907061e-01  2.90698595e-02 -2.47814298e-01  1.10422514e-01
 -9.84898061e-02 -2.03919992e-01  9.66297314e-02  6.73854141e-04
  2.79320274e-02  5.59974551e-01 -1.86061710e-02 -1.76519990e-01
  2.10059673e-01 -

### Reading the Hold2Vec embedding of one or more Routes

In [11]:
routes = df.Moves[12:15]

route_indexes = strings_to_indexes(routes)

hold_embedding = []
for hold_indexes in route_indexes:
    hold_embedding.append([hold_embeddings_matrix[i] for i in hold_indexes])

    
print(len(hold_embedding[0])) # number of holds in route at index 0
hold_embedding[0]    # embedding vectors of holds in route at index 0

7


[array([ 3.2963240e-01, -1.5867214e-01,  3.9662197e-01, -3.0241928e-01,
         9.6391741e-05,  6.4819477e-02,  1.5416589e-01,  2.9820034e-01,
        -3.8789093e-02, -1.6350058e-01, -5.8462715e-01,  1.5624225e-01,
        -6.1982151e-02,  1.1947423e-01,  1.9639082e-01, -1.2771983e-01,
        -3.9447016e-01,  2.1278438e-01, -8.2728900e-02,  1.6221076e-01,
        -1.4647575e-02, -2.5781655e-01,  2.4122210e-01,  2.7871901e-01,
         7.6141767e-02,  1.2977521e-01,  5.4114889e-02, -1.3079929e-01,
        -2.0298520e-01,  3.8931078e-01, -1.3292755e-01, -6.1671166e-03,
         4.4124033e-03, -7.5345866e-02, -4.0853155e-01, -3.5243949e-01,
         1.8445268e-01, -6.8610139e-02, -5.1558250e-01, -2.8593022e-01,
        -3.0253679e-01, -9.9750480e-04,  2.0338023e-01,  2.2780769e-01,
         1.1824621e-01, -9.6137665e-02, -3.8228348e-01,  1.0560997e-01,
         2.6850393e-01,  1.7231180e-01,  1.9328371e-01, -3.4630570e-01,
        -1.7220768e-01, -6.1479189e-02,  1.2404895e-02, -5.51864

### Computing the Sum Hold2Vec Vector of a Route

In [12]:
routes = df.Moves[12:15]
route_embeddings = pooled_embedding(routes, hold_embeddings=hold_embeddings_matrix, pool_method='sum')

print(route_embeddings.shape)
print(route_embeddings[0])

(3, 100)
[ 1.28848541  0.35205925  0.00325325 -0.44081968 -0.0649928  -0.29524222
  0.07570986  1.29055512 -0.13511458  0.91415298 -0.35435033  0.16365846
  1.56292486 -0.10316731 -0.76731026 -0.06566321 -0.87731504  0.49665141
 -0.40812397 -0.0421181  -0.68452764 -0.18686479  0.65110493  0.83946806
 -0.78368866  0.16981852  0.41131487  0.02656969  0.85354674  1.20514202
 -1.80740023 -2.17207766  1.06497598 -0.53464538 -0.11757659  0.91206634
 -0.57793039 -0.28792626 -0.22604352 -0.1194578  -1.65836346 -0.76433361
 -0.79142976  0.38719985 -1.16124892  0.04701172 -0.7533291  -0.12577282
 -1.47159231 -0.28462678 -0.48715758  0.52026629  1.17372966  0.30864048
  0.51751536  0.28357559  0.19283022  2.14354801 -0.38840729 -1.23482358
 -0.14887254 -0.43746623 -0.1364405  -0.27949503 -1.05869043 -1.45229745
  1.34293246 -0.12972063  1.39936733 -0.18954262 -0.55479473  0.22814123
  0.77550471 -0.40342754  0.09737054 -0.9273628  -0.79654878  1.48932922
  0.3816784   0.5098871  -0.47492087 -1.26

### Computing the Weighted-Sum Hold2Vec Vector of a Route

In [13]:
routes = list(df.Moves[12:15])
weights = tf_idf_weights(routes)

route_embeddings = pooled_embedding(routes, hold_embeddings=hold_embeddings_matrix, pool_method='sum', weights=weights)

print(route_embeddings.shape)
print(route_embeddings[0])

(3, 100)
[ 0.5102635   0.14187382  0.02307319 -0.19852196  0.01730587 -0.14515498
  0.05442658  0.50615537  0.01141474  0.23490089 -0.05153489  0.1018476
  0.70687056 -0.08423312 -0.34505028 -0.00874433 -0.31772619  0.2214025
 -0.22297934 -0.12303804 -0.14567128 -0.00594816  0.25227505  0.38162321
 -0.19701359  0.21778418  0.11761901 -0.03167892  0.37615895  0.4026252
 -0.5987044  -0.65376145  0.45291042 -0.12370284 -0.04808051  0.49866909
 -0.13407132 -0.09584083 -0.14218763  0.01284635 -0.66608465 -0.33066419
 -0.39651319  0.13608684 -0.34661844  0.00946787 -0.18665701  0.01899281
 -0.59521222 -0.14680043 -0.12292999  0.22436808  0.37371099  0.06205052
  0.20346068  0.05248569  0.07698496  0.8750726  -0.20763259 -0.62393206
 -0.01815033 -0.08508579 -0.09026996 -0.18649697 -0.37901756 -0.48621491
  0.41830641  0.00728297  0.37467    -0.09820405 -0.11261874  0.02385725
  0.29217744 -0.19278809 -0.0629252  -0.27298668 -0.29585093  0.47991115
  0.13266225  0.08554277 -0.22758533 -0.65218