In [None]:
!pip install azure-ai-ml
!pip install azure-identity
!pip install mlflow==2.3.2
!pip install scikit-learn==1.1
!pip install pandas
!pip install mlxtend
!pip install azure-ai-ml mlflow

In [None]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import Workspace
from azure.identity import DefaultAzureCredential


# authenticate
credential = DefaultAzureCredential()
subscription_id = '<YourSubscriptionId>'
resource_group = '<YourResourceGroup'
workspace_name = 'YourWorkspace'


# Get a handle to the workspace
ml_client = MLClient(
    credential=credential,
    subscription_id=subscription_id,
    resource_group_name=resource_group,
    workspace_name=workspace_name,
)

model_name = 'Apriori-ItemIds'
compute_name = "cpu-cluster"

In [None]:
%%writefile src/train.py
import os
import argparse
import pandas as pd
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential
from azure.ai.ml.entities import Data
from mlxtend.frequent_patterns import apriori, association_rules
import mlflow
import mlflow.pyfunc

class AprioriModelWrapper(mlflow.pyfunc.PythonModel):
    def __init__(self, frequent_itemsets, rules):
        self.frequent_itemsets = frequent_itemsets
        self.rules = rules

    def predict(self, current_order):
        relevant_rules = self.rules[[any(product in list(antecedents) for product in current_order) for antecedents in self.rules['antecedents']]]        
        # Rank consequent items by support
        next_items = relevant_rules.groupby('consequents').agg({'support': 'mean'}).sort_values(by='support', ascending=False)
        
        return next_items

def main():
    # input and output arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--data", type=str, help="Path to the input file")
    parser.add_argument("--model_name", type=str, help="Path to the input file")
    args = parser.parse_args()   

    #Start an MLflow run
    mlflow.start_run()

    # Enable autologging
    mlflow.autolog()

    print(" ".join(f"{k}={v}" for k, v in vars(args).items()))
    print("input data:", args.data)

    #transform data
    df = pd.read_csv(args.data)

    basket = df.pivot_table(index='salesid', columns='itemid', aggfunc='size', fill_value=0)
    basket = basket.applymap(lambda x: 1 if x > 0 else 0)

    basket_bool = basket.astype(bool)
 
    # Train the Apriori model
    frequent_itemsets = apriori(basket_bool, min_support=0.01, use_colnames=True)
    rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

    
    # Log the frequent itemsets and rules as artifacts
    frequent_itemsets.to_csv('frequent_itemsets.csv', index=False)
    rules.to_csv('association_rules.csv', index=False)
    mlflow.log_artifact("frequent_itemsets.csv", artifact_path=args.model_name)
    mlflow.log_artifact("association_rules.csv", artifact_path=args.model_name)

    # Log the model itself
    mlflow.pyfunc.log_model(
        artifact_path=args.model_name,
        python_model=AprioriModelWrapper(frequent_itemsets, rules)
    )

    # Stop Logging
    mlflow.end_run()

if __name__ == "__main__":
    main()


In [None]:
%%writefile pytorch-env.yml
name: pytorch-env
channels:
  - conda-forge
dependencies:
  - python=3.8
  - scikit-learn=1.2.2
  - pandas
  - mlxtend
  - azure-ai-ml
  - pip
  - pip:
      - mlflow
      - azureml-mlflow
      - mltable


In [None]:
from azure.ai.ml import MLClient, command, Input
from azure.ai.ml.entities import Environment

# set up the environment
env = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="pytorch-env.yml",
    name="pytorch-env"
)

# register the environment
ml_client.environments.create_or_update(env)

In [None]:
from azure.ai.ml.entities import AmlCompute
# Create the compute cluster

compute_cluster = AmlCompute(
    name=compute_name,
    size="STANDARD_D2_V2",
    min_instances=0,
    max_instances=4
)

# Provision the compute cluster
ml_client.compute.begin_create_or_update(compute_cluster).result()

In [None]:
from azure.ai.ml import Input, command

command_job = command(
    inputs=dict(
        data=Input(
            type="uri_file",
            path=ml_client.data._get_latest_version(name="salesline").path,
        ),
        model_name=model_name,
    ),
    code="./src",
    command="python train.py --data ${{inputs.data}} --model_name ${{inputs.model_name}}",
    environment=f"{env.name}:{env.version}",
    compute=compute_name,
    display_name="apriori-training-job",
    experiment_name="apriori-experiment"
)


# Submit the job
returned_job = ml_client.jobs.create_or_update(command_job)
ml_client.jobs.stream(returned_job.name)

In [None]:
from azure.ai.ml.entities import Model, Asset
from azure.ai.ml.constants import ModelType

# Register frequent_itemsets.csv as a model
model = Model(
    path=f'runs:/{returned_job.name}/{model_name}/',
    name=f"{model_name}",
    description=f"{model_name} modelfor F&O data",
    type="mlflow_model"
)
registered_model = ml_client.models.create_or_update(model)
print(f"Registered Model ID: {registered_model.id}")

In [None]:
%%writefile conda.yml
channels:
  - defaults
  - conda-forge
dependencies:
  - python=3.8
  - pip
  - pip:
      - azureml-defaults
      - azureml-contrib-services
      - numpy
      - pandas
      - scikit-learn
      - inference-schema[numpy,pandas]


In [None]:
from azure.ai.ml import MLClient, command, Input
from azure.ai.ml.entities import Environment

# Define the environment
webservice_env = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    conda_file="conda.yaml",
    name="apriori_webservice_env",
)
ml_client.environments.create_or_update(webservice_env)

In [None]:
%%writefile score.py
import json
import logging
import os
import pandas as pd
import numpy as np

from azureml.core.model import Model
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType

logger = logging.getLogger('azureml.automl.core.scoring_script_v2')
logging.basicConfig(level=logging.INFO)

# Sample input and output for schema
sample_prediction = {
    'antecedents': ["ItemId1", "ItemId2"],
    'consequents': ["RecommendedItemId1", "RecommendedItemId2"],
    'support': 0.123456789,
    'confidence': 0.987654321,
    'lift': 12.3456789012
}

sample_input = StandardPythonParameterType(["ItemId1", "ItemId2"])
sample_output = StandardPythonParameterType({
    "predictions": [sample_prediction]
})

def predict_next_items(current_basket, rules, top_n=5):
    """
    Predicts the next items in the basket based on the current order using association rules.
    
    :param current_basket: list of items currently in the basket
    :param rules: DataFrame containing the association rules
    :param top_n: Number of top predictions to return
    :return: DataFrame containing the top predictions with their respective metrics
    """
    logger.info(f"Current basket: {current_basket}")
    logger.info(f"Total number of rules: {len(rules)}")
    
    # Filter rules where all items in the antecedents are in the current basket
    applicable_rules = rules[rules['antecedents'].apply(lambda x: all(item in current_basket for item in x))]
    
    logger.info(f"Number of applicable rules: {len(applicable_rules)}")
    
    if applicable_rules.empty:
        return pd.DataFrame(columns=['antecedents', 'consequents', 'support', 'confidence', 'lift'])

    # Sort rules by confidence and lift
    applicable_rules = applicable_rules.sort_values(by=['confidence', 'lift'], ascending=False)

    # Select top N rules
    top_rules = applicable_rules.head(top_n)
    
    # Extract the consequents
    predictions = top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]
    
    # Convert frozensets to lists for JSON serialization
    predictions['antecedents'] = predictions['antecedents'].apply(list)
    predictions['consequents'] = predictions['consequents'].apply(list)
    
    logger.info(f"Top predictions: {predictions}")
    
    return predictions

def init():
    global rules
    # Load the model
    model_path = os.path.join(os.getenv("AZUREML_MODEL_DIR"), "Apriori-ItemIds")
    model_path += "/association_rules.csv"
    rules = pd.read_csv(model_path)
    logger.info(f"Loaded rules from {model_path}")
    logger.info(f"Rules sample: {rules.head()}")
    # Ensure the 'antecedents' and 'consequents' columns are properly processed if they are string representations of lists
    if rules['antecedents'].dtype == object:
        rules['antecedents'] = rules['antecedents'].apply(lambda x: x.strip("frozenset({})").replace("'", "").replace(" ", "").split(","))
    if rules['consequents'].dtype == object:
        rules['consequents'] = rules['consequents'].apply(lambda x: x.strip("frozenset({})").replace("'", "").replace(" ", "").split(","))
    logger.info(f"Processed rules sample: {rules.head()}")

@input_schema('Inputs', sample_input)
@output_schema(sample_output)
def run(Inputs):
    try:
        # Parse input data
        current_basket = Inputs

        # Debugging: Log the input data
        logger.info(f"Received input: {Inputs}")

        # Get the top predictions
        top_predictions = predict_next_items(current_basket, rules, top_n=5)

        # Debugging: Log the number of predictions found
        logger.info(f"Number of predictions found: {len(top_predictions)}")

        # Convert predictions to JSON format
        predictions_json = top_predictions.to_json(orient="records")

        # Format the response as required
        response = {
            "predictions": json.loads(predictions_json)
        }

        # Return the formatted response
        return response
    except Exception as e:
        # Log the exception for debugging
        logger.error(f"Exception occurred: {str(e)}")
        return str(e)

In [None]:
from azure.ai.ml.entities import CodeConfiguration

# Define the inference configuration
inference_config = CodeConfiguration(
    code="",
    scoring_script="score.py"
)


In [None]:
from azure.ai.ml import MLClient
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment
from azure.identity import DefaultAzureCredential

# Define the endpoint
endpoint = ManagedOnlineEndpoint(
    name=f"{model_name}-endpoint".lower(),
    description=f"Endpoint for {model_name} model",
    auth_mode="key"
)

ml_client.online_endpoints.begin_create_or_update(endpoint)


In [None]:
from azure.ai.ml.entities import Model
from azure.ai.ml.constants import AssetTypes

#Get Model Name and version from previous job 
#models = ml_client.models.list(name=registered_model.name)
#highest_model_version = registered_model.version

#alternative get name and highest version
models = ml_client.models.list(name=model_name)
highest_model_version = max(models, key=lambda model: model.version)

# Define the deployment
deployment = ManagedOnlineDeployment(
    name=f"{model_name}-v{highest_model_version.version}-deployment",
    endpoint_name=endpoint.name,
    model=highest_model_version, 
    environment=webservice_env,
    code_configuration=inference_config,
    instance_type="Standard_DS3_v2",
    instance_count=1
)

webservice_deployment = ml_client.online_deployments.begin_create_or_update(deployment)

In [None]:
# Update the endpoint to route traffic to the deployment
endpoint.traffic = {deployment.name: 100}
ml_client.online_endpoints.begin_create_or_update(endpoint)

In [None]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

# Request data goes here
# The example below assumes JSON formatting which may be updated
# depending on the format your endpoint expects.
# More information can be found here:
# https://docs.microsoft.com/azure/machine-learning/how-to-deploy-advanced-entry-script
data = {"Inputs": ['D0001',"L0001"]} 

body = str.encode(json.dumps(data))

url = '<your-Endpoint>/score'
# Replace this with the primary/secondary key, AMLToken, or Microsoft Entra ID token for the endpoint
api_key = '<your Api Key>'
if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")

# The azureml-model-deployment header will force the request to go to a specific deployment.
# Remove this header to have the request observe the endpoint traffic rules
headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key), 'azureml-model-deployment': 'apriori-itemids-v1-deployment' }

req = urllib.request.Request(url, body, headers)

print(('Bearer '+ api_key))

try:
    response = urllib.request.urlopen(req)

    result = response.read()
    print(result)
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))

    # Print the headers - they include the requert ID and the timestamp, which are useful for debugging the failure
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

In [None]:
import requests

swagger_url = "<your-Endpoint>/swagger.json"
api_key = '<your Api Key>'
# Headers for the request
headers = {
    "Authorization": f"Bearer {api_key}"
}

# Make the GET request to the Swagger URL
response = requests.get(swagger_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    swagger_json = response.json()
    
    # Save the Swagger JSON to a file
    with open('swagger.json', 'w') as json_file:
        json.dump(swagger_json, json_file, indent=4)
    print("Swagger documentation downloaded successfully.")
else:
    print(f"Failed to download Swagger documentation. Status code: {response.status_code}")
    print(f"Response: {response.text}")