# MLOpsPreprocessing

This notebook give a exemple on how to use MLOps to deploy a preprocessing

## Imports

In [None]:
from mlops_codex.preprocessing import MLOpsPreprocessingClient
from mlops_codex.model import MLOpsModelClient

## MLOpsPreprocessingClient

In [None]:
client = MLOpsPreprocessingClient()

## Creating sync pre processing

In [None]:
PATH = './samples/syncPreprocessing/'

In [None]:
sync_preprocessing = client.create(
    preprocessing_name='Teste preprocessing Sync', # model_name
    preprocessing_reference='process', # name of the scoring function
    source_file=PATH+'app.py', # Path of the source file
    requirements_file=PATH+'requirements.txt', # Path of the requirements file, 
    schema=PATH+'schema.json', # Path of the schema file, but it could be a dict (only required for Sync models)
    # env=PATH+'.env'  #  File for env variables (this will be encrypted in the server)
    # extra_files=[PATH+'utils.py'], # List with extra files paths that should be uploaded along (they will be all in the same folder)
    python_version='3.9', # Can be 3.8 to 3.10
    operation="Sync", # Can be Sync or Async
    group='<group>' # Model group (create one using the client)
)

In [None]:
sync_preprocessing.set_token('<group_token>')

In [None]:
result = sync_preprocessing.run(
    data={'variable' : 100}
)
result

## Creating async pre processing

In [None]:
PATH = './samples/asyncPreprocessing/'

async_preprocessing = client.create(
    preprocessing_name='Teste preprocessing Async', # preprocessing_name
    preprocessing_reference='build_df', # name of the scoring function
    source_file=PATH+'app.py', # Path of the source file
    requirements_file=PATH+'requirements.txt', # Path of the requirements file, 
    # env=PATH+'.env',  #  File for env variables (this will be encrypted in the server)
    # extra_files=[PATH+'input.csv'], # List with extra files paths that should be uploaded along (they will be all in the same folder)
    schema=PATH+'schema.csv',
    python_version='3.9', # Can be 3.8 to 3.10
    operation="Async", # Can be Sync or Async
    group='<group>', # Model group (create one using the client)
    input_type='csv',
    wait_complete=True
)

In [None]:
async_preprocessing.set_token('<group_token>')

In [None]:
execution = async_preprocessing.run(data=PATH+'input.csv')

In [None]:
execution.get_status()

In [None]:
execution.wait_ready()
execution.download_result()

## Access created pre processing

In [None]:
client.search_preprocessing()

In [None]:
preprocessing = client.get_preprocessing(preprocessing_id='<preprocessing_hash>', group='<group>')

## Access created executions

In [None]:
old_execution = async_preprocessing.get_preprocessing_execution(exec_id='<exec_id>')

execution_4.download_result()

## Using preprocessing with models

In [None]:
model_client = MLOpsModelClient()

#### Sync Model

In [None]:
sync_model = model_client.get_model(group='<group>', model_id='<model_hash>')

sync_model.set_token('<group_token>')

data = {
 "mean_radius": 17.99,
 "mean_texture": 10.38,
 "mean_perimeter": 122.8,
 "mean_area": 1001.0,
 "mean_smoothness": 0.1184,
 "mean_compactness": 0.2776,
 "mean_concavity": 0.3001,
 "mean_concave_points": 0.1471,
 "mean_symmetry": 0.2419,
 "mean_fractal_dimension": 0.07871,
 "radius_error": 1.095,
 "texture_error": 0.9053,
 "perimeter_error": 8.589,
 "area_error": 153.4,
 "smoothness_error": 0.006399,
 "compactness_error": 0.04904,
 "concavity_error": 0.05373,
 "concave_points_error": 0.01587,
 "symmetry_error": 0.03003,
 "fractal_dimension_error": 0.006193,
 "worst_radius": 25.38,
 "worst_texture": 17.33,
 "worst_perimeter": 184.6,
 "worst_area": 2019.0,
 "worst_smoothness": 0.1622,
 "worst_compactness": 0.6656,
 "worst_concavity": 0.7119,
 "worst_concave_points": 0.2654,
 "worst_symmetry": 0.4601,
 "worst_fractal_dimension": 0.1189
}

sync_model.predict(data=data, preprocessing=sync_preprocessing)

#### Async Model

In [None]:
async_model = model_client.get_model(group='<group>', model_id='<model_hash>')

PATH = './samples/asyncModel/'

async_model.set_token('<group_hash>')

execution = async_model.predict(data=PATH+'input.csv', preprocessing=async_preprocessing)
execution.wait_ready()

In [None]:
execution.download_result()

-----

## New preprocessing

We're rebuilding the process module. The main feature is the end multiples datasets to MLOps server. Check the code below

In [None]:
PATH = "./samples/asyncPreprocessingMultiple/"

schemas = [
    ("base_cadastral", PATH+'base_cadastral.csv'),
    ("base_pagamentos", PATH+'base_pagamentos.csv'),
    ("base_info", PATH+'base_info.csv'),
]

preprocess = client.create(
    preprocessing_name='test_preprocessing', # model_name
    preprocessing_reference='build_df', # name of the scoring function
    source_file=PATH+'app.py', # Path of the source file
    requirements_file=PATH+'requirements.txt', # Path of the requirements file,
    schema=schemas, # Path of the schema file, but it could be a dict (only required for Sync models)
    # env=PATH+'.env'  #  File for env variables (this will be encrypted in the server)
    # extra_files=[PATH+'utils.py'], # List with extra files paths that should be uploaded along (they will be all in the same folder)
    python_version='3.9', # Can be 3.8 to 3.10
    operation="Async", # Can be Sync or Async
    group='<group>', # Model group (create one using the client)
    wait_complete=True
)

In [None]:
inputs = [
    ("base_cadastral", PATH+'base_cadastral.csv'),
    ("base_pagamentos", PATH+'base_pagamentos.csv'),
    ("base_info", PATH+'base_info.csv'),
]

run = preprocess.run(
    data=inputs,
    wait_complete=True
)

In [None]:
run.download()