In [6]:
import pandas  as pd
import matplotlib.pyplot as plt 
import numpy as np 
import seaborn as sns 
import mlflow 
import mlflow.sklearn
from sklearn import metrics
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier

In [7]:
# data processing
df = pd.read_csv(r"Bank_churn_modelling.csv")
x = df[['CreditScore', 'Geography', 'Gender', 'Age', 'Balance', 'NumOfProducts', 'IsActiveMember']]
y =df['Exited']
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,stratify=y,random_state=5)

transformer = ColumnTransformer([('ohe',OneHotEncoder(drop="first"),[1,2]),],remainder='passthrough')
#transformer.fit(xtrain)

In [5]:
mlflow.set_experiment("W1D2")

Traceback (most recent call last):
  File "C:\Users\anshu\AppData\Roaming\Python\Python310\site-packages\mlflow\store\tracking\file_store.py", line 290, in search_experiments
    exp = self._get_experiment(exp_id, view_type)
  File "C:\Users\anshu\AppData\Roaming\Python\Python310\site-packages\mlflow\store\tracking\file_store.py", line 383, in _get_experiment
    meta = FileStore._read_yaml(experiment_dir, FileStore.META_DATA_FILE_NAME)
  File "C:\Users\anshu\AppData\Roaming\Python\Python310\site-packages\mlflow\store\tracking\file_store.py", line 1096, in _read_yaml
    return _read_helper(root, file_name, attempts_remaining=retries)
  File "C:\Users\anshu\AppData\Roaming\Python\Python310\site-packages\mlflow\store\tracking\file_store.py", line 1089, in _read_helper
    result = read_yaml(root, file_name)
  File "C:\Users\anshu\AppData\Roaming\Python\Python310\site-packages\mlflow\utils\file_utils.py", line 215, in read_yaml
    raise MissingConfigException("Yaml file '%s' does not ex

<Experiment: artifact_location='file:///d:/AI/MLOps/EYMAY23/MLOps-with-Azure/mlruns/292143052935320181', creation_time=1683603960438, experiment_id='292143052935320181', last_update_time=1683603960438, lifecycle_stage='active', name='W1D2', tags={}>

In [9]:
evaldata = xtest
evaldata['label'] = ytest

In [11]:
from mlflow.models import make_metric

In [14]:

mlflow.sklearn.autolog()
with mlflow.start_run():
    model_pipeline = Pipeline([("transformer",transformer),
                                ("model",DecisionTreeClassifier(criterion='gini',min_samples_leaf=20,max_depth=8,
                               class_weight='balanced',random_state=5))])
    model_pipeline.fit(xtrain,ytrain)

    def custom_metric_anshu(evaldata,_builtin_metric):
        return 0.5
    model_uri = mlflow.get_artifact_uri("model")
    #model evaluation

    result = mlflow.evaluate(model=model_uri,data=evaldata,
    targets='label',model_type='classifier',
    evaluators=['default'],
    evaluator_config={"default":{"metric_prefix":"test_"}},
    custom_metrics=[make_metric(eval_fn=custom_metric_anshu,greater_is_better=True)]
    )

2023/05/09 09:29:54 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/05/09 09:29:54 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.


In [15]:
model_uri

'file:///d:/AI/MLOps/EYMAY23/MLOps-with-Azure/mlruns/292143052935320181/e9ad5e069a3c4e538c05babdbd969c9b/artifacts/model'

In [16]:
mlflow.sklearn.autolog()
with mlflow.start_run(run_name="Run1"):
    model_pipeline = Pipeline([("transformer",transformer),
                                ("model",DecisionTreeClassifier(criterion='gini',min_samples_leaf=20,max_depth=8,
                               class_weight='balanced',random_state=5))])
    model_pipeline.fit(xtrain,ytrain)



# Preparing model for deployment

In [17]:
from mlflow.models.signature import infer_signature,ModelSignature
from mlflow.types.schema import Schema, ColSpec

In [20]:

mlflow.sklearn.autolog()
with mlflow.start_run():
    model_pipeline = Pipeline([("transformer",transformer),
                                ("model",DecisionTreeClassifier(criterion='gini',min_samples_leaf=20,max_depth=8,
                               class_weight='balanced',random_state=5))])
    model_pipeline.fit(xtrain,ytrain)

    def custom_metric_anshu(evaldata,_builtin_metric):
        return 0.5
    model_uri = mlflow.get_artifact_uri("model")
    #model evaluation

    result = mlflow.evaluate(model=model_uri,data=evaldata,
    targets='label',model_type='classifier',
    evaluators=['default'],
    evaluator_config={"default":{"metric_prefix":"test_"}},
    custom_metrics=[make_metric(eval_fn=custom_metric_anshu,greater_is_better=True)]
    )

    conda_env = {"channels":['conda-forge'],
                "dependencies":['python=3.8.8','pip'],
                "name":'mlflow-env'}

    # infer signature automatically
    signature1 = infer_signature(xtrain,model_pipeline.predict(xtrain))
    print(signature1)

    # specify signature manually
    input_schema = Schema([
        ColSpec("double",'CreditScore'),
        ColSpec("string",'Geography'),
        ColSpec("string",'Gender'),
        ColSpec("double",'Age'),
        ColSpec("double",'Balance'),
        ColSpec("double",'NumOfProducts'),
        ColSpec("integer",'IsActiveMember'),
    ])
    output_schema = Schema([ColSpec("integer",'Exited'),])
    signature2 = ModelSignature(inputs=input_schema,outputs=output_schema)

    example = {"CreditScore":652.0,"Geography":'France',"Gender":"Male","Age":45.0,
                "Balance":152432.0,"NumOfProducts":2.0,"IsActiveMember":0}
    mlflow.sklearn.log_model(model_pipeline,"churn_model",
                                # conda_env = conda_env,
                                signature = signature2,
                                input_example=example
                                )

2023/05/09 10:42:11 INFO mlflow.models.evaluation.base: Evaluating the model with the default evaluator.
2023/05/09 10:42:11 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
  inputs = _infer_schema(model_input)


inputs: 
  ['CreditScore': long, 'Geography': string, 'Gender': string, 'Age': long, 'Balance': double, 'NumOfProducts': long, 'IsActiveMember': long]
outputs: 
  [Tensor('int64', (-1,))]



### Register Model

In [22]:
last_run = mlflow.last_active_run()
runid = last_run.info.run_id
runid

'6a8a6f76dcf0482a877913af15707b21'

In [None]:
# mlflow.set_experiments("W2D1")
# runid = "192e755fd3e5471eba2d0002f0155914"

In [23]:
# model registration
model_uri = f"runs:/{runid}/churn_model"
model_reg_name = "churn_prediction_name"
mlflow.register_model(model_uri,model_reg_name)

Successfully registered model 'churn_prediction_name'.
2023/05/09 11:38:25 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: churn_prediction_name, version 1
Created version '1' of model 'churn_prediction_name'.


<ModelVersion: aliases=[], creation_timestamp=1683612505617, current_stage='None', description=None, last_updated_timestamp=1683612505617, name='churn_prediction_name', run_id='6a8a6f76dcf0482a877913af15707b21', run_link=None, source='file:///d:/AI/MLOps/EYMAY23/MLOps-with-Azure/mlruns/292143052935320181/6a8a6f76dcf0482a877913af15707b21/artifacts/churn_model', status='READY', status_message=None, tags={}, user_id=None, version=1>