# Develop ML model with MLflow

This a notbook which summarise some of the steps from the tutorial [Develop ML model with MLflow and deploy to Kubernetes](https://mlflow.org/docs/latest/deployment/deploy-model-to-kubernetes/tutorial.html#develop-ml-model-with-mlflow-and-deploy-to-kubernetes) using a model the [Heart Disease Prediction using Logistic Regression](https://github.com/eduai-repo/ML-Demo/blob/main/2%20Classification/2.%20One%20with%20Heart%20Disease%20Prediction.ipynb)

## Out of Scope
* Deploying the Model to Kubernetes

## Requirements

* PyENV already installed. Please check the [link section](#links) (required to run mlserver)
* A python virtual environment
* mflow on the python virtual environment
* ipykernel

First thing first, we will run mflow server ui on port 5000

Then, we checkout if there is a process using port 5000

WARNING: If you experience any issue, please check out if there is a process using port 5000 by executing ```lsof -i :5000```

In [None]:
import subprocess, socket


with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
    sock.settimeout(5)
    result = sock.connect_ex(('localhost',5000))

if result == 0:
    print('Server already running')
else:
    command = "mlflow ui --port 5000"
    
    process = subprocess.Popen(command, shell=True)
    
    print(f"Started background process with PID: {process.pid}")


## Training the Model

In [None]:
import mlflow

import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from mlflow.models import infer_signature
from mlflow.client import MlflowClient
from mlflow.models import infer_signature


### Loading data and trasnforming

In [None]:
dataset_source_path = 'data/heart.csv'
raw_data = pd.read_csv(dataset_source_path)
raw_data.head()

#### Converting categorical variables into numbers

In [None]:
history_mapping = {'Absent': 0,'Present': 1}
raw_data["famhist"] = raw_data["famhist"].map(history_mapping)

raw_data.head()

In [None]:

def eval_metrics(pred, actual):
    rmse = np.sqrt(metrics.mean_squared_error(actual, pred))
    mae = metrics.mean_absolute_error(actual, pred)
    r2 = metrics.r2_score(actual, pred)
    return rmse, mae, r2


# Extract the features and target data separately
X=raw_data[['tobacco','ldl','adiposity','famhist','typea','obesity', 'alcohol','age']]
y=raw_data[['chd']] 

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)


### Defining a mflow experiment

In [None]:

EXPERIMENT_NAME = "heart-disease"
DEFAULT_RUN_NAME=f"{EXPERIMENT_NAME}-default-params"

# Set th experiment name
mlflow.set_experiment(EXPERIMENT_NAME)

# Enable auto-logging to MLflow
mlflow.sklearn.autolog()


# Start a run and train a model
with mlflow.start_run(run_name=DEFAULT_RUN_NAME) as run:
    lr = LogisticRegression( C=100, penalty='l2',solver='liblinear')

    # Fit
    lr.fit(X_train, y_train)

    # Build the Evaluation Dataset from the test set
    y_pred = lr.predict(X_test)



    metrics = eval_metrics(y_pred, y_test)
    



# Retrieve the run information
logged_run = mlflow.get_run(run.info.run_id)

# Retrieve the Dataset object
logged_dataset = logged_run.inputs.dataset_inputs[0].dataset

# View some of the recorded Dataset information
print(f"Dataset name: {logged_dataset.name}")
print(f"Dataset digest: {logged_dataset.digest}")
print(f"Dataset profile: {logged_dataset.profile}")
print(f"Dataset schema: {logged_dataset.schema}")



### Logging the model

In [None]:
REGISTERED_MODEL_NAME = "heart-disease"
ARTIFACT_PATH = f"{REGISTERED_MODEL_NAME}-model"
MODEL = "LogisticRegression"


# Start an MLflow run
with mlflow.start_run(run_name="logging-model") as run:

    lr = LogisticRegression( C=100, penalty='l2',solver='liblinear')

    # Fit
    lr.fit(X_train, y_train)

    # Build the Evaluation Dataset from the test set
    y_pred = lr.predict(X_test)
    
    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag(REGISTERED_MODEL_NAME, f"Basic {MODEL} model for {REGISTERED_MODEL_NAME}")

    # Infer the model signature
    # https://mlflow.org/docs/latest/model/signatures.html
    signature = infer_signature(X_train, lr.predict(X_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path=ARTIFACT_PATH,
        signature=signature,
        input_example=X_train,
        registered_model_name=REGISTERED_MODEL_NAME,
    )
    # set extra tags on the model
    client = MlflowClient(mlflow.get_tracking_uri())
    model_info = client.get_latest_versions(REGISTERED_MODEL_NAME)[0]
    client.set_model_version_tag(
        name=REGISTERED_MODEL_NAME,
        version=model_info.version,
        key='model',
        value=MODEL
    )

    print(f'Model Info: {model_info}')


### Testing Model Serving Locally

To test the model we need to run mlserver locally according to [step 6](https://mlflow.org/docs/latest/deployment/deploy-model-to-kubernetes/tutorial.html#step-6-testing-model-serving-locally) 

To get a mlserver up and running we will run the next cell to have the command bellow with all information ```mlflow models serve -m runs:/{run.info.run_id}/{ARTIFACT_PATH} -p {port} --enable-mlserver```

In [None]:
import subprocess, socket

### Getting runID
port  = 1234

with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
    sock.settimeout(5)
    result = sock.connect_ex(('localhost',port))

if result == 0:
    print('Server already running')
else:
    command = f"mlflow models serve -m runs:/{run.info.run_id}/{ARTIFACT_PATH} -p {port} --enable-mlserver"
    
    print(f' Please execute this command bellow:\n {command}')

Then we will query the endpoint using the next python cell

In [None]:
import requests

url = f'http://localhost:{port}/invocations'

headers = {'Content-Type': 'application/json'}

data = {"inputs": [{'tobacco': 12,
        'ldl': 5.73,
        'adiposity': 23.11,
        'famhist' : 1,
        'typea' : 49,
        'obesity' :  25.3,
        'alcohol' : 97.2,
        'age' : 52 }]}

try:
    response = requests.post(url,headers=headers, json=data)
    print(f'Status Code: {response.status_code}, Result: {response.json()}')
except Exception as e:
    print(f'ERROR!!: An expected error happened: {e}')