In [1]:
import duckdb
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import numpy as np
import pickle
import mlflow
import mlflow.sklearn
import pandas as pd
import torch
import xgboost as xgb


from sentence_transformers import SentenceTransformer
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

class MovieRatingPredictor:
	def __init__(self, df, model_type_s="", n_jobs=1):
		"""
		Initializes the MovieRatingPredictor with the dataframe.
		"""

		mlflow_track_server=os.getenv("MLFLOW_TRACK_SERVER")
		mlflow.set_tracking_uri(mlflow_track_server)    
		mlflow.set_tag("model_version", "1.0")

		self.df = df
		self.encoder = OneHotEncoder(handle_unknown='ignore')
		self.mlb = None  # Store the MultiLabelBinarizer
		self.model_type = model_type_s

		# Load a more advanced BERT model for richer embeddings
		self.bert_model = SentenceTransformer('all-MPNet-base-v2')  # Richer BERT model with 768 dimensions

		print(f"Model Type: {self.model_type}")

		# Initialize the model based on the user's choice
		if self.model_type == 'decision_tree':
			self.model = DecisionTreeRegressor(max_depth=10)
		elif self.model_type == 'random_forest':
			self.model = RandomForestRegressor(
				max_features=0.4,
				n_estimators=1000,
				max_depth=50,
				min_samples_split=10,
			)
		elif self.model_type == 'svr':
			self.model = SVR(kernel='sigmoid', C=1.0, epsilon=0.1)
		elif self.model_type == 'mlp':
			# The basic MLP setup (will be tuned later)
			self.model = MLPRegressor(
				hidden_layer_sizes=(256, 128, 64),
				activation='relu',
				solver='adam',
				alpha=0.001,
				learning_rate_init=0.001,
				early_stopping=True,
				max_iter=1000
			)
		elif self.model_type == 'xgboost':
			self.model = xgb.XGBRegressor(
				objective='reg:squarederror',
				n_estimators=100,
				max_depth=6,
				learning_rate=0.1,
				n_jobs=n_jobs
			)
		else:  # Default to Linear Regression
			self.model = LinearRegression(n_jobs=n_jobs)

	def tune_mlp_hyperparameters(self, X_train, y_train):
		"""
		Perform hyperparameter tuning for the MLP model using GridSearchCV.
		"""
		param_grid = {
			'hidden_layer_sizes': [(512, 256, 128), (256, 128, 64), (128, 64, 32)],
			'alpha': [0.0001, 0.001, 0.01],  # L2 regularization
			'learning_rate_init': [0.001, 0.01],
			'solver': ['adam', 'sgd'],
			'early_stopping': [True]
		}

		grid_search = GridSearchCV(MLPRegressor(max_iter=1000), param_grid, cv=3)
		grid_search.fit(X_train, y_train)

		# Update the model with the best parameters
		self.model = grid_search.best_estimator_

		print("Best hyperparameters found: ", grid_search.best_params_)

	def preprocess_text(self, text):
		"""
		Uses BERT to generate embeddings from the text data.
		"""
		return self.bert_model.encode(text, convert_to_tensor=False)

	def prepare_features(self, text_field_name, classification_field):
		"""
		Prepares the features (BERT embeddings and encoded 'genre_ids') for the model.
		"""
		print("Prepare features")
		# Generate BERT embeddings for the text field
		self.df[f'{text_field_name}_embedding'] = self.df[text_field_name].apply(lambda x: self.preprocess_text(x))

		# Multi-hot encoding for 'genre_ids'
		self.mlb = MultiLabelBinarizer()
		genre_ids_encoded = self.mlb.fit_transform(self.df['genre_ids'])

		# Combine embeddings and encoded genre ids
		X = np.hstack((self.df[f'{text_field_name}_embedding'].tolist(), genre_ids_encoded))
		y = self.df[classification_field]
		return X, y

	def train_and_evaluate(self, text_field, classification_field):
		"""
		Trains the regression model and evaluates its performance.
		"""
		with mlflow.start_run(nested=True, run_name=f"train_and_evaluate_{self.model.__class__.__name__}"):
			print("Train and evaluate")
			X, y = self.prepare_features(text_field, classification_field)
			X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

			# Normalize the input features (Scaling)
			scaler = StandardScaler()
			X_train_scaled = scaler.fit_transform(X_train)
			X_test_scaled = scaler.transform(X_test)

			# Hyperparameter tuning for MLP
			if self.model_type == 'mlp':
				self.tune_mlp_hyperparameters(X_train_scaled, y_train)

			# Train the model
			self.model.fit(X_train_scaled, y_train)

			# Make predictions
			y_pred = self.model.predict(X_test_scaled)

			# Evaluate the model
			mse = mean_squared_error(y_test, y_pred)
			r2 = r2_score(y_test, y_pred)
			mae = mean_absolute_error(y_test, y_pred)
			mape = mean_absolute_percentage_error(y_test, y_pred)

			print(f'Mean Squared Error: {mse}')
			print(f'R-squared: {r2}')
			print(f'Mean Absolute Error: {mae}')
			print(f'Mean Absolute Percentage Error (MAPE): {mape * 100:.2f}%')

			# Log model parameters, metrics, and timestamp
			mlflow.log_params(self.model.get_params())
			mlflow.log_metric("mse", mse)
			mlflow.log_metric("r2_score", r2)
			mlflow.log_metric("mae", mae)
			mlflow.log_metric("mape", mape)

			# Save the trained model
			timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
			mlflow.sklearn.log_model(self.model, f"model_{self.model_type}_{timestamp}")
			mlflow.set_tag("timestamp", timestamp)

	def predict(self, text, genre_ids):
		# Preprocess the input data (generate BERT embeddings)
		text_embedding = self.preprocess_text(text)
		genre_ids_encoded = self.mlb.transform([genre_ids])
		X = np.hstack((text_embedding.reshape(1, -1), genre_ids_encoded))

		# Normalize the features before prediction
		scaler = StandardScaler()
		X_scaled = scaler.transform(X)

		# Make prediction
		return self.model.predict(X_scaled)[0]

	def save_to_mlflow(self, model_name="movie_rating_predictor"):
		def predict_using_instance(model_input):
			text, genre_ids = model_input
			return self.predict(text, genre_ids)

		# Provide an input example using DataFrame
		input_example = pd.DataFrame({
			"overview": ["A thrilling action movie with breathtaking stunts."],
			"genre_ids": [[28, 12]]
		})

		# Save MultiLabelBinarizer to a file
		mlb_path = "mlb.pkl"
		with open(mlb_path, 'wb') as f:
			pickle.dump(self.mlb, f)

		artifacts = {
			'mlb': mlb_path,
		}

		mlflow.pyfunc.log_model(
			artifact_path="model",
			python_model=predict_using_instance,
			artifacts=artifacts,
			registered_model_name=model_name,
			input_example=input_example
		)



  from tqdm.autonotebook import tqdm, trange
2024-09-29 10:41:52.929567: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-29 10:41:54.169558: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-29 10:41:54.558116: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-29 10:41:57.166489: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

def test():


    with mlflow.start_run(run_name="test_predictions"):
        model_uri = "models:/movie_rating_predictor/latest" 
        loaded_model = mlflow.pyfunc.load_model(model_uri)


        new_movie_overview = "A small team of scientists must race against time to stop what seems to be a cascade of global disasters signaling the possible apocalypse and end of days."
        new_movie_genre_ids = [878, 27] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (60): {predicted_rating}. Distance: {predicted_rating - 6.00}")


        new_movie_overview = "Heather bumps into Carla, having not spoken to her in years, and presents her with a very unexpected proposition that could change both of their lives forever."
        new_movie_genre_ids = [878, 27] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (58): {predicted_rating}. Distance: {predicted_rating - 5.80}")

        new_movie_overview = "In a futuristic dystopia with enforced beauty standards, a teen awaiting mandatory cosmetic surgery embarks on a journey to find her missing friend."
        new_movie_genre_ids = [878, 12] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (60): {predicted_rating}. Distance: {predicted_rating - 6.00}")


        new_movie_overview = "A talented martial artist who can't walk past a person in need unites with a probation officer to fight and prevent crime as a martial arts officer."
        new_movie_genre_ids = [28, 35, 80] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (79): {predicted_rating}. Distance: {predicted_rating - 7.90}")

        new_movie_overview = "A talented martial artist who can't walk past a person in need unites with a probation officer to fight and prevent crime as a martial arts officer."
        new_movie_genre_ids = [28, 35, 80] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (79): {predicted_rating}. Distance: {predicted_rating - 7.90}")

        new_movie_overview = "A detective begins to investigate a series of mysterious murders that are connected to a demonic book that brings dolls to life. As the body count begins to rise, the detective soon learns the curse of the demonic Friday and must find a way to stop it before any others disappear."
        new_movie_genre_ids = [27] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (32): {predicted_rating}. Distance: {predicted_rating - 3.20}")


        new_movie_overview = "When a group of ex-military members is hired to retrieve a lost bag of stolen money, their mission becomes more difficult after a lone hunter finds the bag first."
        new_movie_genre_ids = [28, 53, 10770] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (45): {predicted_rating}. Distance: {predicted_rating - 4.50}")

        new_movie_overview = "A young teenager named Mikey Walsh finds an old treasure map in his father's attic. Hoping to save their homes from demolition, Mikey and his friends Data Wang, Chunk Cohen, and Mouth Devereaux run off on a big quest to find the secret stash of Pirate One-Eyed Willie."
        new_movie_genre_ids = [12, 35, 10751] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (75): {predicted_rating}. Distance: {predicted_rating - 7.50}")


        new_movie_overview = "Imprisoned in the 1940s for the double murder of his wife and her lover, upstanding banker Andy Dufresne begins a new life at the Shawshank prison, where he puts his accounting skills to work for an amoral warden. During his long stretch in prison, Dufresne comes to be admired by the other inmates -- including an older prisoner named Red -- for his integrity and unquenchable sense of hope."
        new_movie_genre_ids = [18, 80] 
        predicted_rating = loaded_model.predict((new_movie_overview, new_movie_genre_ids))
        print(f"Predicted rating for the new movie (87): {predicted_rating}. Distance: {predicted_rating - 8.70}")

In [3]:
def train(df, model_type):
	mlflow.end_run()
	try:
		with mlflow.start_run(run_name=f"train_{model_type}"):
			predictor = MovieRatingPredictor(
				df,
				model_type_s=model_type,
				n_jobs=10,
			)

			# Train and evaluate the main model
			predictor.train_and_evaluate("overview", "vote_average")

			# Save the entire predictor instance to MLflow
			predictor.save_to_mlflow()

	except Exception as e:
		print(f"Error during training {model_type}: {e}")
		raise  # To propagate the error further if needed

In [4]:
#tmdb_token = os.getenv("TMDB_TOKEN")
#output_movies_folder = os.getenv("OUTPUT_MOVIES_FOLDER")
#mlflow_track_server = os.getenv("MLFLOW_TRACK_SERVER")



output_movies_folder="/mnt/projects/fiap-proj-int-03/app/get_movies/output_files/*.json"
tmdb_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiI1NWFlZjk4ZWVkNTM1NWRmYmUyNGVjNmZiOWU2ZGZjOCIsIm5iZiI6MTcyNjk0NTM5Mi4zMzgyOCwic3ViIjoiNjZlZjE3NzI2YzNiN2E4ZDY0OGQzYWM1Iiwic2NvcGVzIjpbImFwaV9yZWFkIl0sInZlcnNpb24iOjF9.Y45bt_CPC9FnKCAPe4T2-nYnA3u48ZE6Hmm912zxzLs"
#output_movies_folder="C:/projects/fiap-proj-int-03/app/get_movies/output_files/*.json"


def get_data():
    conn = duckdb.connect(config = {'threads': 5})

    conn.execute(f"CREATE TABLE movies AS (SELECT * FROM read_json('{output_movies_folder}'));")

    #df = conn.execute(f"SELECT genre_ids, title, vote_average FROM read_json('{output_movies_folder}/*.json')").fetchdf()

    df = conn.execute(f"""
        SELECT 
            --*
            id
            , genre_ids
            , title
            , vote_average
            , overview
        FROM movies
        WHERE overview is not null 
        AND trim(overview) <> ''
                    
    """).fetchdf()

    print(f"number of lines: {df.count()}")

    return df

In [5]:
#mlflow_track_server="http://user:pswd@10.96.132.192"
mlflow_track_server="http://localhost:5000"
mlflow.set_tracking_uri(mlflow_track_server)    
mlflow.set_tag("model_version", "1.0")


df = get_data()
model_types = [ 'mlp', 'xgboost', 'random_forest']
for model_type in model_types:
	train(df, model_type)
#train(df, 'svr')
#train(df, 'decision_tree')
#train(df, 'random_forest')
#train(df, 'mlp')

2024/09/29 10:42:15 INFO mlflow.tracking._tracking_service.client: üèÉ View run hilarious-panda-446 at: http://localhost:5000/#/experiments/0/runs/0fce4442d4ad4e3ab045a67cc70e9978.
2024/09/29 10:42:15 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:5000/#/experiments/0.


number of lines: id              6616
genre_ids       6616
title           6616
vote_average    6616
overview        6616
dtype: int64


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Model Type: mlp
Train and evaluate
Prepare features
Best hyperparameters found:  {'alpha': 0.01, 'early_stopping': True, 'hidden_layer_sizes': (512, 256, 128), 'learning_rate_init': 0.001, 'solver': 'adam'}
Mean Squared Error: 2.8401308990019802
R-squared: 0.27031028118452916
Mean Absolute Error: 1.3503642142965915
Mean Absolute Percentage Error (MAPE): 33.04%


2024/09/29 11:09:58 INFO mlflow.tracking._tracking_service.client: üèÉ View run train_and_evaluate_MLPRegressor at: http://localhost:5000/#/experiments/0/runs/a3e9fee9d6774ea99718882d9b4c1f62.
2024/09/29 11:09:58 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:5000/#/experiments/0.


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

NotFittedError("This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.")Traceback (most recent call last):


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/utils/_capture_modules.py", line 165, in load_model_and_predict
    model.predict(input_example, params=params)


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/pyfunc/model.py", line 637, in predict
    return self.python_model.predict(


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/pyfunc/model.py", line 173, in predict
    return self.func(model_input)


  File "/tmp/ipykernel_7281/1080127857.py", line 180, in predict_using_instance


  File "/tmp/ipykernel_7281/1080127857.py", line 172, in predict


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/sklearn/utils/_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)


  File "/home/fa

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  "dataframe_split": {
    "columns": [
      "overview",
      "genre_ids"
    ],
    "data": [
      [
        "A thrilling action movie with breathtaking stunts.",
        [
          28,
          12
        ]
      ]
    ]
  }
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
2024/09/29 11:10:26 INFO mlflow.tracking._tracking_service.client: üèÉ View run train_mlp at: http://localhost:5000/#/experiments/0/runs/729022d5fcf24507b1a49771057fcd9f.
2024/09/29 11:10:26 INFO mlflow.tracking._tracking_service.client

Model Type: xgboost
Train and evaluate
Prepare features
Mean Squared Error: 2.9118562482526995
R-squared: 0.251882521412932
Mean Absolute Error: 1.390168991613244
Mean Absolute Percentage Error (MAPE): 34.24%


2024/09/29 11:28:04 INFO mlflow.tracking._tracking_service.client: üèÉ View run train_and_evaluate_XGBRegressor at: http://localhost:5000/#/experiments/0/runs/a7382b1e2b2a4e30988bc1d3b69cae77.
2024/09/29 11:28:04 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:5000/#/experiments/0.


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

NotFittedError("This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.")Traceback (most recent call last):


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/utils/_capture_modules.py", line 165, in load_model_and_predict
    model.predict(input_example, params=params)


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/pyfunc/model.py", line 637, in predict
    return self.python_model.predict(


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/pyfunc/model.py", line 173, in predict
    return self.func(model_input)


  File "/tmp/ipykernel_7281/1080127857.py", line 180, in predict_using_instance


  File "/tmp/ipykernel_7281/1080127857.py", line 172, in predict


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/sklearn/utils/_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)


  File "/home/fa

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  "dataframe_split": {
    "columns": [
      "overview",
      "genre_ids"
    ],
    "data": [
      [
        "A thrilling action movie with breathtaking stunts.",
        [
          28,
          12
        ]
      ]
    ]
  }
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
2024/09/29 11:28:37 INFO mlflow.tracking._tracking_service.client: üèÉ View run train_xgboost at: http://localhost:5000/#/experiments/0/runs/65fb8e31794c4630a4e4b097db28122a.
2024/09/29 11:28:37 INFO mlflow.tracking._tracking_service.cl

Model Type: random_forest
Train and evaluate
Prepare features
Mean Squared Error: 2.9833696077225147
R-squared: 0.233509226301282
Mean Absolute Error: 1.4429038889303802
Mean Absolute Percentage Error (MAPE): 35.98%


2024/09/29 12:07:50 INFO mlflow.tracking._tracking_service.client: üèÉ View run train_and_evaluate_RandomForestRegressor at: http://localhost:5000/#/experiments/0/runs/19d0a7d3d1cb4c7b98c9ff2fb75a8a06.
2024/09/29 12:07:50 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://localhost:5000/#/experiments/0.


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

NotFittedError("This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.")Traceback (most recent call last):


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/utils/_capture_modules.py", line 165, in load_model_and_predict
    model.predict(input_example, params=params)


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/pyfunc/model.py", line 637, in predict
    return self.python_model.predict(


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/mlflow/pyfunc/model.py", line 173, in predict
    return self.func(model_input)


  File "/tmp/ipykernel_7281/1080127857.py", line 180, in predict_using_instance


  File "/tmp/ipykernel_7281/1080127857.py", line 172, in predict


  File "/home/falcao/venv/fiap-tech-003/lib/python3.10/site-packages/sklearn/utils/_set_output.py", line 316, in wrapped
    data_to_wrap = f(self, X, *args, **kwargs)


  File "/home/fa

Downloading artifacts:   0%|          | 0/8 [00:00<?, ?it/s]

  "dataframe_split": {
    "columns": [
      "overview",
      "genre_ids"
    ],
    "data": [
      [
        "A thrilling action movie with breathtaking stunts.",
        [
          28,
          12
        ]
      ]
    ]
  }
}. Alternatively, you can avoid passing input example and pass model signature instead when logging the model. To ensure the input example is valid prior to serving, please try calling `mlflow.models.validate_serving_input` on the model uri and serving input example. A serving input example can be generated from model input example using `mlflow.models.convert_input_example_to_serving_input` function.
Got error: This StandardScaler instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
2024/09/29 12:08:25 INFO mlflow.tracking._tracking_service.client: üèÉ View run train_random_forest at: http://localhost:5000/#/experiments/0/runs/3bcca7a9ab744acea822fec3b80aaa50.
2024/09/29 12:08:25 INFO mlflow.tracking._tracking_serv

In [6]:
#test()

In [7]:
import mlflow
mlflow_track_server="http://user:pswd@10.96.132.192"
mlflow.set_tracking_uri(mlflow_track_server)

# Create an MLflow client (adjust tracking URI if needed)
client = mlflow.tracking.MlflowClient()

# List all registered models 
registered_models = client.search_registered_models()

# Extract model URIs
model_uris = [model.latest_versions[0].source for model in registered_models]

print(model_uris) 

MlflowException: API request to http://user:pswd@10.96.132.192/api/2.0/mlflow/registered-models/search failed with timeout exception HTTPConnectionPool(host='10.96.132.192', port=80): Max retries exceeded with url: /api/2.0/mlflow/registered-models/search?max_results=100 (Caused by ConnectTimeoutError(<urllib3.connection.HTTPConnection object at 0x7c5ec7d7c190>, 'Connection to 10.96.132.192 timed out. (connect timeout=120)')). To increase the timeout, set the environment variable MLFLOW_HTTP_REQUEST_TIMEOUT (default: 120, type: int) to a larger value.