In [46]:
%run ../course_helpers/init_working_environment.ipynb

Loaded 1565 rows from /workspaces/mlops-course/data/comments.csv
First ten rows of the dataframe `df`:


Unnamed: 0,content,is_spam
0,Best Music Ever!!!,0
1,please look up DHG SONGS this is my playlist with a bunch of amazing songs,1
2,just :( superr!!!,0
3,Check out this playlist on YouTube:,1
4,subscribed :) btw you have a good style keep it up brother :)),1
5,she is horrible at acting. cringe-worhty.,0
6,https://m.facebook.com/story.php?story_fbid=764484966942313&amp;id=754989901225153&amp;ref=stream gf,1
7,WOw,0
8,I loved this song when I was in my teenage years!,0
9,Where did she find all that make up in a freakin jungle?!,0


In [47]:
my_experiment = mlflow_connect()

OK - mlflow server is up and running. Setting Tracking URI to http://localhost:5000. Setting Experiment to 'Spam Detection'


In [49]:
import mlflow

mlflow_client = mlflow.tracking.MlflowClient()
runs = mlflow_client.search_runs(experiment_ids=my_experiment.experiment_id, filter_string="tags.mlflow.runName = 'Basic Spam Classification Model'")
if len(runs) == 0:
    raise Exception(f"No runs found for run name 'Basic Spam Classification Model' in experiment id {my_experiment.experiment_id}")
my_run = runs[0]
my_run

<Run: data=<RunData: metrics={'training_accuracy_score': 1.0,
 'training_f1_score': 1.0,
 'training_log_loss': 0.04988526434653382,
 'training_precision_score': 1.0,
 'training_recall_score': 1.0,
 'training_roc_auc': 1.0,
 'training_score': 1.0}, params={'CountVectorizer': 'CountVectorizer()',
 'CountVectorizer__analyzer': 'word',
 'CountVectorizer__binary': 'False',
 'CountVectorizer__decode_error': 'strict',
 'CountVectorizer__dtype': "<class 'numpy.int64'>",
 'CountVectorizer__encoding': 'utf-8',
 'CountVectorizer__input': 'content',
 'CountVectorizer__lowercase': 'True',
 'CountVectorizer__max_df': '1.0',
 'CountVectorizer__max_features': 'None',
 'CountVectorizer__min_df': '1',
 'CountVectorizer__ngram_range': '(1, 1)',
 'CountVectorizer__preprocessor': 'None',
 'CountVectorizer__stop_words': 'None',
 'CountVectorizer__strip_accents': 'None',
 'CountVectorizer__token_pattern': '(?u)\\b\\w\\w+\\b',
 'CountVectorizer__tokenizer': 'None',
 'CountVectorizer__vocabulary': 'None',
 'Ra

In [53]:
messages = ["Click this link and subscribe to my channel!", "This is the best ever video about MLflow!"]

artifact_path = f"runs:/{my_run.info.run_id}/spam_pipeline_model"
print(f"Loading model from {artifact_path}")
print()

reloaded_model = mlflow.pyfunc.load_model(artifact_path)

predictions = reloaded_model.predict(messages)

for message, prediction in zip(messages, predictions):
    print(f"Comment: {message} \t| Spam? (prediction): {prediction}")


Loading model from runs:/48da05d957e240188fa5bb36ed5a7c6e/spam_pipeline_model

Comment: Click this link and subscribe to my channel! 	| Spam? (prediction): 1
Comment: This is the best ever video about MLflow! 	| Spam? (prediction): 0


In [67]:
import time

MODEL_NAME_IN_REGISTRY="Spam Classifier"

try:
    mlflow_client.delete_registered_model(name=MODEL_NAME_IN_REGISTRY)
except Exception:
    pass

mlflow.register_model(model_uri=artifact_path, name=MODEL_NAME_IN_REGISTRY)
time.sleep(2)

model_info = mlflow_client.get_model_version(name=MODEL_NAME_IN_REGISTRY, version=1)
print(f"{model_info.name} version {model_info.version} is in stage '{model_info.current_stage}'")

Successfully registered model 'Spam Classifier'.
2023/10/23 09:10:49 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: Spam Classifier, version 1
Created version '1' of model 'Spam Classifier'.


Spam Classifier version 1 is in stage 'None'


In [68]:
updated_model_info = mlflow_client.transition_model_version_stage(name=MODEL_NAME_IN_REGISTRY, version=1, stage="Production")
print(f"{updated_model_info.name} version {updated_model_info.version} is now in stage '{updated_model_info.current_stage}'")

Spam Classifier version 1 is now in stage 'Production'


In [77]:
for model_version in mlflow_client.search_model_versions(f"name='{MODEL_NAME_IN_REGISTRY}'"):
    print(f"Model '{model_version.name}' version {model_version.version} is in stage '{model_version.current_stage}'. Artifact path: {model_version.source}")

Model 'Spam Classifier' version 1 is in stage 'Production'. Artifact path: /workspaces/mlops-course/.mlflow_data/artifacts/1/48da05d957e240188fa5bb36ed5a7c6e/artifacts/spam_pipeline_model


In [39]:
import datetime

shutil.rmtree(os.path.join(PROJECT_ROOT_DIR, "model", "spam_pipeline_model"), ignore_errors=True)

model_dir = os.path.join(PROJECT_ROOT_DIR, "model", "spam_pipeline_model")
pip_requirements = [
    "numpy==1.24.4",
    "pandas==1.5.3",
    "scikit-learn==1.2.2",
    "mlflow==2.5",
]


class ModelWithPreprocess(mlflow.pyfunc.PythonModel):
    def __init__(self, pipe_model):
        self.model = pipe_model

    def preprocess_input(self, payload):
        if (
            not isinstance(payload, dict)
            or "data" not in payload
            or not isinstance(payload["data"], str)
        ):
            raise TypeError(
                "Request payload must be a dict in " + '{"data": "message"} format',
            )
        return payload["data"]

    def predict(self, context, model_input):
        processed_model_input = self.preprocess_input(model_input.copy())
        prediction = self.model.predict([processed_model_input])[0]
        return prediction


model_w_preprocess = ModelWithPreprocess(pipe)

mlflow.pyfunc.save_model(
    path=model_dir,
    python_model=model_w_preprocess,
    code_path=[os.path.join(PROJECT_ROOT_DIR, "train_model", "model_code")],
    pip_requirements=pip_requirements,
    metadata={
        "model_type": "spam_classifier",
        "model_name": "spam_model",
        "model_version": "v1",
        "model_description": "Spam classifier trained on YouTube comments",
        "trained_at": datetime.datetime.utcnow().isoformat(),
       #"accuracy": accuracy,
    },
)

m2 = mlflow.pyfunc.load_model(model_dir)
print("Prediction: " + str(m2.predict({"data": "Come subscribe to my channel"})))
print(m2.metadata.metadata)


 - numpy (current: 1.26.0, required: numpy==1.24.4)
 - pandas (current: 2.1.1, required: pandas==1.5.3)
 - scikit-learn (current: 1.3.1, required: scikit-learn==1.2.2)
 - mlflow (current: 2.7.1, required: mlflow==2.5)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.




Prediction: 1
{'model_description': 'Spam classifier trained on YouTube comments', 'model_name': 'spam_model', 'model_type': 'spam_classifier', 'model_version': 'v1', 'trained_at': '2023-10-23T06:53:01.914158'}
