In [1]:
import pandas as pd

results_file_name = 'pcs_youth_results.csv'
results = pd.read_csv(results_file_name).query('Year >= 2013')
results['points'] = results['Pnt'] / max(results['Pnt'])

results

Unnamed: 0,Rnk,Rider,Age,Team,UCI,Pnt,Time,Race,Year,points
0,1,KOOIJ Olav,20.0,Jumbo-Visma,20,30,3:27:45,zlm-tour,2022,0.200000
1,2,SALBY Alexander,24.0,Riwal Cycling Team,10,18,",,0:00",zlm-tour,2022,0.120000
2,3,WELSFORD Sam,26.0,Team DSM,5,12,",,0:00",zlm-tour,2022,0.080000
3,4,DUPONT Timothy,34.0,Bingoal Pauwels Sauces WB,0,7,",,0:00",zlm-tour,2022,0.046667
4,5,MARECZKO Jakub,28.0,Alpecin-Fenix,0,4,",,0:00",zlm-tour,2022,0.026667
...,...,...,...,...,...,...,...,...,...,...
64187,DNF,INGELAERE Nicolas,17.0,,0,0,-,bernaudeau-junior,2013,0.000000
64188,DNF,ARDOUIN Sebastien,16.0,,0,0,-,bernaudeau-junior,2013,0.000000
64189,DNF,ALLAIRE Corentin,16.0,,0,0,-,bernaudeau-junior,2013,0.000000
64190,DNF,GAREL Adrien,17.0,,0,0,-,bernaudeau-junior,2013,0.000000


In [2]:
from tensorflow.keras.layers import StringLookup, Embedding, Flatten, ReLU, Dot
from tensorflow.keras import Input, Model
from tensorflow.keras.utils import set_random_seed

set_random_seed(42)
K = 5

riders = Input(shape = (1,), dtype = 'string', name = 'rider')
rider_name_to_int = StringLookup(vocabulary = results['Rider'].unique(), name = 'rider_name_to_int')
rider_ints = rider_name_to_int(riders)
rider_vector = Embedding(rider_name_to_int.vocabulary_size(), K, name = 'rider_encoder')(rider_ints)
rider_vector_flat = Flatten(name = 'rider_vector')(rider_vector)
rider_vector_nonneg = ReLU(name = 'rider_vector_nonneg')(rider_vector_flat)

races = Input(shape = (1,), dtype = 'string', name = 'race')
race_name_to_int = StringLookup(vocabulary = results['Race'].unique(), name = 'race_name_to_int')
race_ints = race_name_to_int(races)
race_vector = Embedding(race_name_to_int.vocabulary_size(), K, name = 'race_encoder')(race_ints)
race_vector_flat = Flatten(name = 'race_vector')(race_vector)
race_vector_nonneg = ReLU(name = 'race_vector_nonneg')(race_vector_flat)

dot_product = Dot(axes = (1, 1), name = 'dot_product')([rider_vector_nonneg, race_vector_nonneg])
outputs = ReLU()(dot_product)

model = Model([riders, races], outputs)
model.summary()

2023-02-23 20:22:53.554918: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 rider (InputLayer)             [(None, 1)]          0           []                               
                                                                                                  
 race (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 rider_name_to_int (StringLooku  (None, 1)           0           ['rider[0][0]']                  
 p)                                                                                               
                                                                                                  
 race_name_to_int (StringLookup  (None, 1)           0           ['race[0][0]']               

In [3]:
model.compile(optimizer = "adam", loss = "mean_squared_error")
history = model.fit([results.Rider, results.Race], results.points, epochs=20, verbose = 1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [4]:
model.save('model_direct_embeddings')

INFO:tensorflow:Assets written to: model_direct_embeddings/assets
