## Simple notebook to test the common functions

### Imports

In [1]:
import sys
import os

%load_ext autoreload
%autoreload 1

module_path = os.path.abspath(os.path.join('..'))

if module_path not in sys.path:
    sys.path.append(module_path)


from baseproject.common.commonfunctions import CommonFunctions
from baseproject.common.azuremlutils import AzureMlUtils
from spacy.lang.fr.stop_words import STOP_WORDS
from azureml.core import Datastore, Workspace

In [2]:
# Object instances
ia_functions = CommonFunctions()
azuremlutils = AzureMlUtils()

The return of this function is already a pandas dataframe
```Python
def read_tabular_dataset_from_azure(self, dataset_name: str, dataset_version:int = 1):
        try:
            dataset = Dataset.get_by_name(self.ws, name=dataset_name, version=dataset_version)
            return dataset.to_pandas_dataframe()
        except Exception as e:
            logging.exception('Exception read_tabular_dataset_from_azure:')
```

In [None]:
# Reading the data from Azure in pandas dataframe format
dataset = azuremlutils.read_tabular_dataset_from_azure("dataset-hackathon", 2)

In [None]:
dataset.head()

In [None]:
stop_words = set(STOP_WORDS)
remove_words = ['ah', 'ailleurs', 'allaient']

In [None]:
len(stop_words)

```Python
def nlp_function_remove_no_stop_words(self, word_to_check: str, stop_words_list: list = set(STOP_WORDS)):
    if word_to_check in stop_words_list:
        stop_words_list.remove(word_to_check)
        print(word_to_check + " was removed")
    else:
        print(word_to_check + " doesn't exist on dictionary")
```

In [None]:
# The list is passed as a reference and it will be updated automatically by the method.
ia_functions.nlp_function_remove_no_stop_words('ailleurs', stop_words)

In [None]:
# Check if the word was removed
len(stop_words)

```Python
def plot_confusion_matrix(self,
                            cf_matrix: list = [[23,  5],[ 3, 30]], 
                            annot_value: bool = True , 
                            cmap_value: str = 'Blues', 
                            fmt_value: str = '.1f',
                            fig_size: tuple = (11.7,8.27),
                            title: str = "Confusion matrix") -> None:

    print(cf_matrix)
    sns.set(rc={'figure.figsize':fig_size})
    ax = plt.axes()
    sns.heatmap(cf_matrix, annot=annot_value, cmap=cmap_value, fmt=fmt_value)
    ax.set_title(title)
    plt.show()
```

In [None]:
# Function plot confusion matrix
array = [[50,  5],[ 3, 30]]
ia_functions.plot_confusion_matrix(array)

```Python
def create_experiment(self, experiment_name: str):
    try:
        experiment = Experiment(self.ws, experiment_name)
        return experiment
    except Exception as e:
        logging.exception(e)
```

In [None]:
# Creating an experiment
azuremlutils.create_experiment('test')

```Python
def get_experiment(self, experiment_name: str):
    try:
        return self.ws.experiments[experiment_name]
    except Exception as e:
        logging.exception('Exception get_experiment: ')
```

In [None]:
# Creating an experiment
azuremlutils.get_experiment('test')

### Saving dataframe in Azure


To save a pandas dataframe in Azure, you need to execute the line below.

To check if the file was saved. you can access the web version and follow that steps:

![Azure Machine learning Dashboard](../assets/azuremldash.png "Azure Machine learning dashboard")

![Azure Machine learning datasets list](../assets/datasets.png "Azure Machine learning datasets list")

In [None]:
# Saving pandas dataframe on Azure
azuremlutils.save_pandas_dataframe_in_azure_datastore('workspaceblobstore', 'test_save_dataframe_azure', dataset)

In [None]:
ws = Workspace.from_config()
default_datastore = ws.get_default_datastore()

data = await azuremlutils.read_data_from_blob(default_datastore.container_name, "UI/10-29-2021_034203_UTC/assurance_data_processed_rnn.csv")

## Create experiment, to register and deploy a machine learning model

In [None]:
mlflow_experiment = azuremlutils.create_experiment_with_mlflow("test")

In [None]:
with mlflow_experiment.start_run() as run:
    try:
        # samples and labels
        X = df_data.iloc[:,7]
        y = df_data.iloc[:,8]

        # Applying LabelEncoder
        label_encoder = LabelEncoder()
        y = label_encoder.fit_transform(y)

        vectorizer = TfidfVectorizer(min_df = 2)

        X = vectorizer.fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) 
        print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep = '\n')

        SVM_ = CalibratedClassifierCV(LinearSVC())

        SVM_.fit(X_train, y_train)

        pickle.dump(vectorizer, open('vectorizer.pkl',"wb"))

        pickle.dump(label_encoder, open('label_encoder.pkl',"wb"))
        # saving vectorizer
        mlflow_experiment.log_artifact("label_encoder.pkl", artifact_path="outputs/label_encoder/label_encoder.pkl")
        # saving vectorizer
        mlflow_experiment.log_artifact("vectorizer.pkl", artifact_path="outputs/vectorizer/vectorizer.pkl")
        # saving model
        mlflow_experiment.sklearn.log_model(SVM_, artifact_path="outputs")


        y_pred = SVM_.predict(X_test)

        cf_matrix = confusion_matrix(y_test, y_pred)
    
        metrics = classification_report(y_test, y_pred, target_names=label_encoder.classes_, output_dict=True)
        
        mlflow_experiment.log_metric("accuracy", metrics["accuracy"])    
        mlflow_experiment.log_metrics({str(key1): float(value) for (key1, value) in metrics["Negative"].items()})
        mlflow_experiment.log_metrics({str(key1): float(value) for (key1, value) in metrics["Positive"].items()})
        mlflow_experiment.log_metrics({str(key1): float(value) for (key1, value) in metrics["Neutral"].items()})
        
        fig = plt.figure(1)
        image = ia_functions.plot_confusion_matrix(cf_matrix)
        fig.savefig("confusion_matrix.png")
        mlflow_experiment.log_artifact("confusion_matrix.png")
    except Exception as e:
        print('Experiment Failed:' + str(e))
        mlflow_experiment.end_run()

### Registering a experiment

In [None]:
result_extraction = mlflow_experiment.register_model(rund_id, folder_path, model_name)

## Get registred model

In [None]:
from azureml.core.model import Model
model = Model(ws, model_name, version=6)

## Create an Environment

In [None]:
from azureml.core.environment import Environment

environment = Environment("LocalDeploy")
environment.python.conda_dependencies.add_pip_package("inference-schema[numpy-support]")
environment.python.conda_dependencies.add_pip_package("joblib")
environment.python.conda_dependencies.add_pip_package("scikit-learn=={}".format(sklearn.__version__))

## Inference code

In [None]:
# Inference code

from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script="predict.py",
                                   environment=environment)

## Deploy 

In [None]:
## Deploy 
from azureml.core.webservice import LocalWebservice
from azureml.core.model import Model

# This is optional, if not provided Docker will choose a random unused port.
deployment_config = LocalWebservice.deploy_configuration(port=6789)


local_service = Model.deploy(ws, "nlp-hackathon-model-2", [model], inference_config)

local_service.wait_for_deployment()