#  Automated ML with azureml

The dependencies are imported

In [1]:
import os
import pandas as pd
from azureml.core import Dataset, Datastore, Workspace, Experiment
# from azureml.train.automl import AutoMLConfig
from azureml.widgets import RunDetails

## Dataset

### Overview

We will try to predict the rating of modified version of the **Kaggle Trip advisor dataset**.

The Dataset contains a Trip Advisor hotel review text column as well as a Rating column with Ratings from 0 - 5 stars. 

> The Tripadvisor Hotel Review Dataset file, is derived from the publication: 
>
>_Alam, M. H., Ryu, W.-J., Lee, S., 2016. Joint multi-grain topic senti- ment: modeling semantic aspects for online >reviews. Information Sciences 339, 206–223._ 
>
> You can download the Dataset with the link:
> [trip-advisor-hotel-reviews](https://www.kaggle.com/andrewmvd/trip-advisor-hotel-reviews)

In the original Dataset the target **Rating** column contains the values 0* - 5*.

In a modified version of the dataset we will try to predict the **norm_rating** column based on the **Review** text column as a **classification task** with:

* class 0 - Negative reviews (1* & 2* rating)
* class 1 - Neutral reviews (3* rating)
* class 2 - Positive reviews (4* & 5* rating)


## Initialize the Workspace and create an Experiment

In [2]:
ws = Workspace.from_config()

# choose a name for experiment
experiment_name = 'automl_review_classifier'

experiment=Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
automl_review_classifier,quick-starts-ws-134076,Link to Azure Machine Learning studio,Link to Documentation


In [3]:
print(f"subscription key {ws.subscription_id}")
print(f"resource group {ws.resource_group}")
print(f"workspace name {ws.name}")

subscription key f9d5a085-54dc-4215-9ba6-dad5d86e60a0
resource group aml-quickstarts-134076
workspace name quick-starts-ws-134076


## Load the Dataset and perform a train test split

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

filepath_2_dataset = r"hotel_reviews_featurized_roberta.csv"
# Read the Dataset as a pandas dataframe
hotel_review_dataset = pd.read_csv(filepath_2_dataset)

In [5]:
print(f"Dataset Shape: {hotel_review_dataset.shape}")
hotel_review_dataset.describe()

Dataset Shape: (20491, 808)


Unnamed: 0,topic_0,topic_1,topic_2,topic_3,topic_4,topic_5,topic_6,topic_7,topic_8,topic_9,...,dim_758,dim_759,dim_760,dim_761,dim_762,dim_763,dim_764,dim_765,dim_766,dim_767
count,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,...,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0,20491.0
mean,0.001939,0.004714,0.011617,0.001034,0.001708,0.010981,0.00334,0.033029,0.00134,0.019453,...,-0.027524,0.003075,-0.156707,0.102556,-0.103847,-0.022158,0.124463,-0.32934,0.020014,0.032116
std,0.004887,0.007468,0.006196,0.003457,0.004591,0.006425,0.006819,0.008321,0.003971,0.00533,...,0.060321,0.047068,0.076493,0.05061,0.063625,0.048991,0.128676,0.105728,0.040554,0.058754
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0148,0.0,0.0,...,-0.283,-0.3079,-0.484,-0.2524,-0.3863,-0.2547,-0.3674,-1.0168,-0.1846,-0.2166
25%,0.0,0.0,0.0106,0.0,0.0,0.0103,0.0,0.0281,0.0,0.0163,...,-0.0669,-0.0278,-0.2077,0.071,-0.14475,-0.0555,0.03585,-0.3995,-0.0061,-0.0065
50%,0.0,0.0,0.0115,0.0,0.0,0.0112,0.0,0.0303,0.0,0.0176,...,-0.0277,0.0025,-0.1551,0.1039,-0.1031,-0.0239,0.1142,-0.3331,0.0198,0.0303
75%,0.0,0.0104,0.0141,0.0,0.0,0.014,0.0,0.036,0.0,0.0222,...,0.0116,0.0338,-0.1044,0.13565,-0.0622,0.0084,0.2037,-0.2644,0.0462,0.06855
max,0.0568,0.073,0.0484,0.042,0.0559,0.0808,0.0563,0.1455,0.0333,0.0829,...,0.2625,0.2319,0.1587,0.3221,0.1717,0.2433,0.9088,0.1528,0.2006,0.3707


### First the same train test split is performed for the Dataset to make it available to both AutoML and Hyperdrive

In [7]:
# Get hotel review text and normalized rating
X = hotel_review_dataset.drop(columns=['norm_rating'])
y = list(hotel_review_dataset.norm_rating)
X_train, X_test, y_train, y_test = train_test_split(hotel_review_dataset, y, test_size=0.2, random_state=42)
print(f"X_train: {X_train.shape}\nX_test: {X_test.shape}\ny_train: {len(y_train)}\ny_test: {len(y_test)}")

X_train: (16392, 808)
X_test: (4099, 808)
y_train: 16392
y_test: 4099


### The training set and test sets will be registered separately to ensure strict separation

In [8]:
X_train['norm_rating'] = y_train
X_test['norm_rating'] = y_test 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [9]:
print(X_train.shape)
print(X_test.shape)

(16392, 808)
(4099, 808)


### The AutoML train/testsets should contain just the text column and norm rating column (no feature engineering)

#### Upload the different train/test sets

In [12]:
X_train_automl = X_train.loc[:, ['text', 'norm_rating']]
X_test_automl = X_test.loc[:, ['text', 'norm_rating']]

os.makedirs("data", exist_ok=True)
# Upload the training/test data in the default datastore
train_dataset_path_automl = "data/train_set_automl.csv"
X_train_automl.to_csv(train_dataset_path_automl)
test_dataset_path_automl = "data/test_set_automl.csv"
X_test_automl.to_csv(test_dataset_path_automl)

X_train_hyper = X_train.drop(columns =["text"])
X_test_hyper = X_test.drop(columns = ["text"])

train_dataset_path = "data/train_set_hyper.csv"
X_train_hyper.to_csv(train_dataset_path)
test_dataset_path = "data/test_set_hyper.csv"
X_test_hyper.to_csv(test_dataset_path)

datastore = ws.get_default_datastore()
datastore.upload(src_dir="data", target_path="data")

Uploading an estimated of 4 files
Uploading data/test_set_automl.csv
Uploaded data/test_set_automl.csv, 1 files out of an estimated total of 4
Uploading data/train_set_automl.csv
Uploaded data/train_set_automl.csv, 2 files out of an estimated total of 4
Uploading data/test_set_hyper.csv
Uploaded data/test_set_hyper.csv, 3 files out of an estimated total of 4
Uploading data/train_set_hyper.csv
Uploaded data/train_set_hyper.csv, 4 files out of an estimated total of 4
Uploaded 4 files


$AZUREML_DATAREFERENCE_c3faa4c3234246f59fe09397b7fd5fbf

### Load the training and test Datasets and register them

In [15]:
dataset_training = Dataset.Tabular.from_delimited_files(path = [(datastore, ("data/train_set_automl.csv"))])
dataset_training = dataset_training.register(workspace=ws, name="auto-ml-training-data", description="Hotel Review AutoML Training Data")

dataset_test =  Dataset.Tabular.from_delimited_files(path = [(datastore, ("data/test_set_automl.csv"))])
dataset_test = dataset_training.register(workspace=ws, name="auto-ml-test-data", description="Hotel Review AutoML Test Data")

## Define a Compute Target for AutoML

In [16]:
## Define a Compute Target for AutoML
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cpu_cluster_name = "cpu-cluster-1"
try:
    compute_target = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print("Found existing Compute Target")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size = "Standard_D2_V2", max_nodes=4)
    compute_target = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)

Found existing Compute Target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## AutoML Configuration

* _experiment_timeout_minutes_: was set to prevent the experiment from running for long timer periods with high cost
* _max_concurrent_iterations_: was set to 4 since only 4 compute target nodes are available for paralle child runs
* _primary_metric_: was set to AUC_weighted since this includes a balance between false positive and true positive rate
* _n_cross_validations_: 5 crossvalidations were selected, since this results in a more robust mean/std estimation for each model

* _enable_early_stopping_: to prevent unproductive runs which lead to no improvement and costs
* _compute_target_: needs to be define to perform the AutoML computations
* _task_: needs to be classification since the label column is defining separate classes
* _training_data_: corresponds to the training set
* _label_column_: corresponds to the target/label column defining the separate classes
* _debug_log_: defined to enable detailed logging of automl errors

In [17]:
from azureml.train.automl.automlconfig import AutoMLConfig

In [22]:
## Define key AutoML Settings
automl_settings = {
    "experiment_timeout_minutes": 20,
    "max_concurrent_iterations": 4,
    "primary_metric": "f1_score_weighted",
    "n_cross_validations": 5
}

## Setup an AutoMLConfig object
automl_config = AutoMLConfig(
    compute_target=compute_target,
    task="classification",
    training_data=dataset_training,
    label_column_name="norm_rating",
    enable_early_stopping=True,
    debug_log="automl_errors.log",
    **automl_settings
)

In [23]:
# The Experiment needs to be submitted in order to execute the AutoML run
automl_run = experiment.submit(automl_config)

Running on remote.


## Run Details

Write about the different models trained and their performance. Why do you think some models did better than others?

In [24]:
from azureml.widgets import RunDetails
RunDetails(automl_run ).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [25]:
automl_run.wait_for_completion(show_output=True)


Current status: FeaturesGeneration. Generating features for the dataset.
Current status: DatasetFeaturization. Beginning to fit featurizers and featurize the dataset.
Current status: DatasetBalancing. Performing class balancing sweeping
Current status: DatasetCrossValidationSplit. Generating individually featurized CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Class balancing detection
STATUS:       ALERTED
DESCRIPTION:  To decrease model bias, please cancel the current run and fix balancing problem.
              Learn more about imbalanced data: https://aka.ms/AutomatedMLImbalancedData
DETAILS:      Imbalanced data can lead to a falsely perceived positive effect of a model's accuracy because the input data has bias towards one class.
+---------------------------------+---------------------------------+-------------------------

{'runId': 'AutoML_4f1c1746-a2b1-43c7-b465-6cbe3ef014af',
 'target': 'cpu-cluster-1',
 'status': 'Completed',
 'startTimeUtc': '2021-01-09T13:06:47.850464Z',
 'endTimeUtc': '2021-01-09T13:40:40.939213Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': '5',
  'target': 'cpu-cluster-1',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"7053849c-a6f9-416c-ac1d-2b34fd270a0c\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetDatastoreFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"datastores\\\\\\": [{\\\\\\"datastoreName\\\\\\": \\\\\\"workspaceblobstore\\\\\\", \\\\\\"path\\\\\\": \\\\\\"data/train_set_automl.csv\\\\\\", \\\\\\"resourceGroup\\\\\\": \\\\\\"aml-quickstarts-134076\\\\\\", \\\\\\"subscription\\\\\\": \\\\\\"f9d5a085-54dc-4215-9ba6-dad5d86e60a0\\\\\\", \

## Performance metrics and Best Model

TODO: In the cell below, get the best model from the automl experiments and display all the properties of the model.


### Get the best model and the best run

In [39]:
best_child = automl_run.get_best_child()
print(best_child.get_file_names())
best_model = best_child.register_model(model_name="best-automl-model", model_path="outputs/model.pkl")

['accuracy_table', 'automl_driver.py', 'azureml-logs/55_azureml-execution-tvmps_eb47bd6a9b1ab2010f5df288203402455c5bfa974ec862472bb033d8b6747185_d.txt', 'azureml-logs/65_job_prep-tvmps_eb47bd6a9b1ab2010f5df288203402455c5bfa974ec862472bb033d8b6747185_d.txt', 'azureml-logs/70_driver_log.txt', 'azureml-logs/75_job_post-tvmps_eb47bd6a9b1ab2010f5df288203402455c5bfa974ec862472bb033d8b6747185_d.txt', 'azureml-logs/process_info.json', 'azureml-logs/process_status.json', 'confusion_matrix', 'logs/azureml/103_azureml.log', 'logs/azureml/azureml_automl.log', 'logs/azureml/dataprep/python_span_30f0a700-ebc8-4a6e-afc9-ada4d4d1e68b.jsonl', 'logs/azureml/dataprep/python_span_9eb95a04-8aeb-4a2d-bf3e-3369f5ea3a58.jsonl', 'logs/azureml/job_prep_azureml.log', 'logs/azureml/job_release_azureml.log', 'outputs/conda_env_v_1_0_0.yml', 'outputs/env_dependencies.json', 'outputs/model.pkl', 'outputs/pipeline_graph.json', 'outputs/scoring_file_v_1_0_0.py']


## Model Deployment

In the cell below, register the model, create an inference config and deploy the model as a web service.

In [44]:
from azureml.core.environment import Environment
from azureml.core.model import Model
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice

# Create the environment
myenv = Environment(name="myenv")
conda_dep = CondaDependencies()

# Define the packages needed by the model and scripts
conda_dep.add_conda_package("pandas")
conda_dep.add_conda_package("numpy")
conda_dep.add_conda_package("scikit-learn")
conda_dep.add_conda_package("xgboost")
# You must list azureml-defaults as a pip dependency
conda_dep.add_pip_package("azureml-defaults")

# Adds dependencies to PythonSection of myenv
myenv.python.conda_dependencies=conda_dep

inference_config = InferenceConfig(entry_script="score.py",
                                   environment=myenv)

In [45]:
service_name = 'automl-review-classification'
aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[best_model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)
service.wait_for_deployment(show_output=True)
print("scoring URI: " + service.scoring_uri)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...........................................................................
Failed


Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 11c00b57-4b4a-4322-8ec5-dc75580aadd6
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: automl-review-classification. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image 1482e13b39f24b188bfd175723d72779.azurecr.io/azureml/azureml_50ad9199f6abe9333ff46bc0245c9350 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more inf

WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 11c00b57-4b4a-4322-8ec5-dc75580aadd6
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "message": "Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: automl-review-classification. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image 1482e13b39f24b188bfd175723d72779.azurecr.io/azureml/azureml_50ad9199f6abe9333ff46bc0245c9350 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Your container application crashed. This may be caused by errors in your scoring file's init() function.\nPlease check the logs for your container instance: automl-review-classification. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\nYou can also try to run image 1482e13b39f24b188bfd175723d72779.azurecr.io/azureml/azureml_50ad9199f6abe9333ff46bc0245c9350 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information."
    },
    {
      "code": "AciDeploymentFailed",
      "message": "Your container application crashed. Please follow the steps to debug:\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\n3. You can also interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\n4. View the diagnostic events to check status of container, it may help you to debug the issue. {\"restartCount\":5,\"currentState\":{\"state\":\"Waiting\",\"startTime\":null,\"exitCode\":null,\"finishTime\":null,\"detailStatus\":\"CrashLoopBackOff: Back-off restarting failed\"},\"previousState\":{\"state\":\"Terminated\",\"startTime\":\"2021-01-09T14:50:16.476Z\",\"exitCode\":111,\"finishTime\":\"2021-01-09T14:50:20.127Z\",\"detailStatus\":\"Error\"},\"events\":null}"
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: 11c00b57-4b4a-4322-8ec5-dc75580aadd6\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"message\": \"Aci Deployment failed with exception: Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: automl-review-classification. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\nYou can also try to run image 1482e13b39f24b188bfd175723d72779.azurecr.io/azureml/azureml_50ad9199f6abe9333ff46bc0245c9350 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Your container application crashed. This may be caused by errors in your scoring file's init() function.\\nPlease check the logs for your container instance: automl-review-classification. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. \\nYou can interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\nYou can also try to run image 1482e13b39f24b188bfd175723d72779.azurecr.io/azureml/azureml_50ad9199f6abe9333ff46bc0245c9350 locally. Please refer to https://aka.ms/debugimage#service-launch-fails for more information.\"\n    },\n    {\n      \"code\": \"AciDeploymentFailed\",\n      \"message\": \"Your container application crashed. Please follow the steps to debug:\\n1. From the AML SDK, you can run print(service.get_logs()) if you have service object to fetch the logs. Please refer to https://aka.ms/debugimage#dockerlog for more information.\\n2. If your container application crashed. This may be caused by errors in your scoring file's init() function. You can try debugging locally first. Please refer to https://aka.ms/debugimage#debug-locally for more information.\\n3. You can also interactively debug your scoring file locally. Please refer to https://docs.microsoft.com/azure/machine-learning/how-to-debug-visual-studio-code#debug-and-troubleshoot-deployments for more information.\\n4. View the diagnostic events to check status of container, it may help you to debug the issue. {\\\"restartCount\\\":5,\\\"currentState\\\":{\\\"state\\\":\\\"Waiting\\\",\\\"startTime\\\":null,\\\"exitCode\\\":null,\\\"finishTime\\\":null,\\\"detailStatus\\\":\\\"CrashLoopBackOff: Back-off restarting failed\\\"},\\\"previousState\\\":{\\\"state\\\":\\\"Terminated\\\",\\\"startTime\\\":\\\"2021-01-09T14:50:16.476Z\\\",\\\"exitCode\\\":111,\\\"finishTime\\\":\\\"2021-01-09T14:50:20.127Z\\\",\\\"detailStatus\\\":\\\"Error\\\"},\\\"events\\\":null}\"\n    }\n  ]\n}"
    }
}

In [None]:
service.get_logs()

TODO: In the cell below, send a request to the web service you deployed to test it.

In [None]:
import requests
import json
from azureml.core.authentication import InteractiveLoginAuthentication

# Get a token to authenticate to the compute instance from remote
interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()

# Create and submit a request using the auth header
headers = auth_header
# Add content type header
headers.update({'Content-Type':'application/json'})

# Sample data to send to the service
test_sample = json.dumps({'data': [
    [1,2,3,4,5,6,7,8,9,10],
    [10,9,8,7,6,5,4,3,2,1]
]})
test_sample = bytes(test_sample, encoding = 'utf8')

# Replace with the URL for your compute instance, as determined from the previous section
service_url = service.endpoint
# for a compute instance, the url would be https://vm-name-6789.northcentralus.instances.azureml.net/score
response = requests.post(service_url, test_sample, headers=headers)
print("prediction:", response.text)

TODO: In the cell below, print the logs of the web service and delete the service

In [None]:
print(local_service.get_logs())

In [None]:
mport os
import pickle
import json
import time
from keras.models import load_model
from keras.preprocessing.sequence import pad_sequences
from gensim.models.word2vec import Word2Vec

# SENTIMENT
POSITIVE = "POSITIVE"
NEGATIVE = "NEGATIVE"
NEUTRAL = "NEUTRAL"
SENTIMENT_THRESHOLDS = (0.4, 0.7)
SEQUENCE_LENGTH = 300

# Called when the deployed service starts
def init():
    global model
    global tokenizer
    global encoder
    global w2v_model

    # Get the path where the deployed model can be found.
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), './models')
    # load models
    model = load_model(model_path + '/model.h5')
    w2v_model = Word2Vec.load(model_path + '/model.w2v')

    with open(model_path + '/tokenizer.pkl','rb') as handle:
        tokenizer = pickle.load(handle)

    with open(model_path + '/encoder.pkl','rb') as handle:
        encoder = pickle.load(handle)

# Handle requests to the service
def run(data):
    try:
        # Pick out the text property of the JSON request.
        # This expects a request in the form of {"text": "some text to score for sentiment"}
        data = json.loads(data)
        prediction = predict(data['text'])
        #Return prediction
        return prediction
    except Exception as e:
        error = str(e)
        return error

# Determine sentiment from score
def decode_sentiment(score, include_neutral=True):
    if include_neutral:
        label = NEUTRAL
        if score <= SENTIMENT_THRESHOLDS[0]:
            label = NEGATIVE
        elif score >= SENTIMENT_THRESHOLDS[1]:
            label = POSITIVE
        return label
    else:
        return NEGATIVE if score < 0.5 else POSITIVE

# Predict sentiment using the model
def predict(text, include_neutral=True):
    start_at = time.time()
    # Tokenize text
    x_test = pad_sequences(tokenizer.texts_to_sequences([text]), maxlen=SEQUENCE_LENGTH)
    # Predict
    score = model.predict([x_test])[0]
    # Decode sentiment
    label = decode_sentiment(score, include_neutral=include_neutral)

    return {"label": label, "score": float(score),
       "elapsed_time": time.time()-start_at}  