In [15]:
import mlflow
import cloudpickle
import os

# Define the path to save the model
model_path = "./model/model.pkl"

# Create the directory if it does not exist
os.makedirs(os.path.dirname(model_path), exist_ok=True)

# Set the MLflow tracking URI
mlflow.set_tracking_uri("http://seito.lavbic.net:5000")

# Load the model from MLflow registry
model_uri = "models:/Race prediction@production"
loaded_model = mlflow.pyfunc.load_model(model_uri)

# Create a simple wrapper class that can be pickled
class ModelWrapper:
	def __init__(self, mlflow_model):
		self.mlflow_model = mlflow_model
		
	def predict(self, X):
		return self.mlflow_model.predict(X)

# Create wrapper instance
wrapper = ModelWrapper(loaded_model)

# Save the wrapper using cloudpickle which handles more complex objects
print(f"Saving model wrapper to '{model_path}'...")
with open(model_path, 'wb') as f:
	cloudpickle.dump(wrapper, f)
print("Model wrapper saved successfully.")

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

Saving model wrapper to './model/model.pkl'...
Model wrapper saved successfully.


In [10]:
import requests
import json
import numpy as np

# URL of the Flask server (adjust the port if necessary)
url = "http://seito.lavbic.net:15000/predict"

# Index of the race data you want to predict
payload = {
    "index": 0  # Replace with the actual index you want to use
}

# Send the POST request
try:
    response = requests.post(url, headers={"Content-Type": "application/json"}, data=json.dumps(payload), timeout=60)
    
    # Check if the response is successful
    if response.status_code == 200:
        result = response.json()
        print("Prediction result:", result['prediction'])
    else:
        print("Error:", response.text)
except requests.exceptions.RequestException as e:
    print("Request failed:", e)


(153, 207)
(153, 207, 227)
Prediction result: [{'image_url': 'http://seito.lavbic.net:15000/images/Pepijn  Reinderink.jpg', 'name': 'Pepijn  Reinderink', 'prediction': 0.6224539279937744}, {'image_url': 'http://seito.lavbic.net:15000/images/Anders  Foldager.jpg', 'name': 'Anders  Foldager', 'prediction': 0.5724022388458252}, {'image_url': 'http://seito.lavbic.net:15000/images/Antoine  Huby.jpg', 'name': 'Antoine  Huby', 'prediction': 0.5442665219306946}, {'image_url': 'http://seito.lavbic.net:15000/images/Lars  Craps.jpg', 'name': 'Lars  Craps', 'prediction': 0.5098865032196045}, {'image_url': 'http://seito.lavbic.net:15000/images/Brandon  McNulty.jpg', 'name': 'Brandon  McNulty', 'prediction': 0.5013190507888794}, {'image_url': 'http://seito.lavbic.net:15000/images/Dylan  Vandenstorme.jpg', 'name': 'Dylan  Vandenstorme', 'prediction': 0.49420881271362305}, {'image_url': 'http://seito.lavbic.net:15000/images/Gil  Gelders.jpg', 'name': 'Gil  Gelders', 'prediction': 0.464415580034256}, {

In [39]:
import numpy as np
import cloudpickle
# Load riders names data
riders_names = np.load("rider_names_test.npy", allow_pickle=True)

x_test = np.load("X_test.npy", allow_pickle=True).astype(np.float32)

model_path = "./model/model.pkl"

# Load local model and predict
with open(model_path, 'rb') as f:
    model = cloudpickle.load(f)

index = 2

# print all the races in which pogacar is present
for i, rider in enumerate(riders_names):
    if "Tadej  Pogačar" in rider[0]:
        print(i)

predictions = model.predict(x_test[index])
riders_names = riders_names[index].reshape(-1, 1)
# Concatenate the predictions with the rider names
concatanated = np.concatenate((riders_names, predictions.reshape(-1, 1)), axis=1)

print("Predictions:", concatanated)


21
22
26
27
33
34
36
39
47
62
72
73
77
79
80
82
127
128
131
132
Predictions: [['Jasper  Philipsen' 0.30338358879089355]
 ['Tim  Merlier' 0.4506204128265381]
 ['Danny van Poppel' 0.1785109043121338]
 ['Jason  Tesson' 0.22505831718444824]
 ['Simone  Consonni' 0.2476041316986084]
 ['Stian  Fredheim' 0.26171278953552246]
 ['Juan Sebastián  Molano' 0.17126822471618652]
 ['Phil  Bauhaus' 0.3018968105316162]
 ['Emilien  Jeannière' 0.21578001976013184]
 ['Luca  Mozzato' 0.15530943870544434]
 ['Max  Kanter' 0.1947934627532959]
 ['Bram  Welten' 0.03877139091491699]
 ['Milan  Menten' 0.2554280757904053]
 ['Fernando  Gaviria' 0.22877955436706543]
 ['Laurence  Pithie' 0.21078085899353027]
 ['Gerben  Thijssen' 0.2526834011077881]
 ['Luke  Lamperti' 0.7631039023399353]
 ['Edward  Theuns' 0.10984015464782715]
 ['Piet  Allegaert' 0.22406363487243652]
 ['Owain  Doull' 0.2155168056488037]
 ['Stefan  Bissegger' 0.22149443626403809]
 ['Erlend  Blikra' -0.04027283191680908]
 ['Mikkel  Bjerg' 0.0705583095550

In [None]:
import pandas as pd
df = pd.read_csv('../common/final_data.csv')

# Filter for races in 2024
df_2024 = df[df['year'] == 2024]

# Group by race name and extract rider names for each race
riders_per_race = df_2024.groupby('name')['rider_name'].apply(list)

# Convert the result to a dictionary of arrays (one array per race)
riders_dict = riders_per_race.to_dict()

# Print arrays for each race
for race, riders in riders_dict.items():
    print(f"Race: {race}")


Race: amstel-gold-race one_day
Race: bretagne-classic one_day
Race: classic-brugge-de-panne one_day
Race: cyclassics-hamburg one_day
Race: dauphine stage-1
Race: dauphine stage-2
Race: dauphine stage-3
Race: dauphine stage-4
Race: dauphine stage-5
Race: dauphine stage-6
Race: dauphine stage-7
Race: dauphine stage-8
Race: dwars-door-vlaanderen one_day
Race: e3-harelbeke one_day
Race: eschborn-frankfurt one_day
Race: gent-wevelgem one_day
Race: giro-d-italia stage-1
Race: giro-d-italia stage-10
Race: giro-d-italia stage-12
Race: giro-d-italia stage-13
Race: giro-d-italia stage-14
Race: giro-d-italia stage-15
Race: giro-d-italia stage-16
Race: giro-d-italia stage-17
Race: giro-d-italia stage-18
Race: giro-d-italia stage-19
Race: giro-d-italia stage-2
Race: giro-d-italia stage-20
Race: giro-d-italia stage-21
Race: giro-d-italia stage-3
Race: giro-d-italia stage-4
Race: giro-d-italia stage-5
Race: giro-d-italia stage-6
Race: giro-d-italia stage-7
Race: giro-d-italia stage-8
Race: giro-d-ita

In [38]:
import pandas as pd

data = pd.read_csv('../common/race_names.csv')

# sort data by name then by stage
data = data.sort_values(by=['name', 'stage'])

pd.set_option('display.max_rows', None)
print(data)

                        name     stage  index
47          amstel-gold-race   one_day     47
139         bretagne-classic   one_day    139
37   classic-brugge-de-panne   one_day     37
143       cyclassics-hamburg   one_day    143
77                  dauphine   stage-1     77
78                  dauphine   stage-2     78
79                  dauphine   stage-3     79
80                  dauphine   stage-4     80
81                  dauphine   stage-5     81
82                  dauphine   stage-6     82
83                  dauphine   stage-7     83
84                  dauphine   stage-8     84
40     dwars-door-vlaanderen   one_day     40
38              e3-harelbeke   one_day     38
56        eschborn-frankfurt   one_day     56
39             gent-wevelgem   one_day     39
57             giro-d-italia   stage-1     57
66             giro-d-italia  stage-10     66
67             giro-d-italia  stage-12     67
68             giro-d-italia  stage-13     68
69             giro-d-italia  stag