In [8]:
import pandas as pd

results_file_name = 'pcs_results.csv'
results = pd.read_csv(results_file_name, index_col = 0)
results['points'] = results.groupby('race_url').pcs_points.transform(lambda x: x / x.max())
results['one_day'] = results['race_url'].str.count('/') == 2
results = results.query('one_day')

races = pd.read_csv('world_tour_races.csv')
riders = pd.read_csv('pcs_riders.csv', index_col = 0)
results = pd.merge(results, races)
results = pd.merge(results, riders)
results

Unnamed: 0,rider_url,team_url,rank,status,age,time,pcs_points,uci_points,race_url,points,one_day,race_name,year,stage_race,rider_name,nationality
0,arnaud-demare,fdj-2016,1.0,DF,24,6:54:45,275,500.0,race/milano-sanremo/2016,1.000000,True,milano-sanremo,2016,False,DÉMARE Arnaud,FR
1,arnaud-demare,fdj-2016,5.0,DF,24,5:55:23,80,225.0,race/gent-wevelgem/2016,0.355556,True,gent-wevelgem,2016,False,DÉMARE Arnaud,FR
2,arnaud-demare,fdj-2016,,DNF,24,,0,0.0,race/ronde-van-vlaanderen/2016,0.000000,True,ronde-van-vlaanderen,2016,False,DÉMARE Arnaud,FR
3,arnaud-demare,fdj-2016,34.0,DF,24,4:54:45,5,8.0,race/cyclassics-hamburg/2016,0.022222,True,cyclassics-hamburg,2016,False,DÉMARE Arnaud,FR
4,arnaud-demare,fdj-2016,,DNF,25,,0,0.0,race/bretagne-classic/2016,0.000000,True,bretagne-classic,2016,False,DÉMARE Arnaud,FR
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20153,santiago-umba,drone-hopper-androni-giocattoli-2022,88.0,DF,19,6:39:45,5,0.0,race/il-lombardia/2022,0.018182,True,il-lombardia,2022,False,UMBA Santiago,CO
20154,natnael-tesfatsion,drone-hopper-androni-giocattoli-2022,,DNF,23,,0,0.0,race/il-lombardia/2022,0.000000,True,il-lombardia,2022,False,TESFATSION Natnael,ER
20155,sean-quinn,ef-education-easypost-2022,,DNF,22,,0,0.0,race/il-lombardia/2022,0.000000,True,il-lombardia,2022,False,QUINN Sean,US
20156,sergio-martin,caja-rural-seguros-rga-2022,,DNF,25,,0,0.0,race/il-lombardia/2022,0.000000,True,il-lombardia,2022,False,MARTÍN Sergio Roman,ES


In [12]:
from tensorflow.keras.layers import StringLookup, Embedding, Flatten, ReLU, Dot
from tensorflow.keras import Input, Model
from tensorflow.keras.utils import set_random_seed

set_random_seed(42)
K = 10

riders = Input(shape = (1,), dtype = 'string', name = 'rider')
rider_name_to_int = StringLookup(vocabulary = results['rider_name'].unique(), name = 'rider_name_to_int')
rider_ints = rider_name_to_int(riders)
rider_vector = Embedding(rider_name_to_int.vocabulary_size(), K, name = 'rider_encoder')(rider_ints)
rider_vector_flat = Flatten(name = 'rider_vector')(rider_vector)

races = Input(shape = (1,), dtype = 'string', name = 'race')
race_name_to_int = StringLookup(vocabulary = results['race_name'].unique(), name = 'race_name_to_int')
race_ints = race_name_to_int(races)
race_vector = Embedding(race_name_to_int.vocabulary_size(), K, name = 'race_encoder')(race_ints)
race_vector_flat = Flatten(name = 'race_vector')(race_vector)

dot_product = Dot(axes = (1, 1), name = 'dot_product')([rider_vector_flat, race_vector_flat])
outputs = ReLU()(dot_product)

model = Model([riders, races], outputs)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 rider (InputLayer)             [(None, 1)]          0           []                               
                                                                                                  
 race (InputLayer)              [(None, 1)]          0           []                               
                                                                                                  
 rider_name_to_int (StringLooku  (None, 1)           0           ['rider[0][0]']                  
 p)                                                                                               
                                                                                                  
 race_name_to_int (StringLookup  (None, 1)           0           ['race[0][0]']             

In [13]:
model.compile(optimizer = "adam", loss = "mean_squared_error")
history = model.fit([results.rider_name, results.race_name], results.points, epochs=20, verbose = 1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [14]:
model.save('wt_oneday_direct_embeddings')

INFO:tensorflow:Assets written to: wt_oneday_direct_embeddings/assets
