# Deploy & Run Online Inference

Now that we've trained the model, and we are (presumably) happy with the results of training, we can deploy the model to a Vertex AI endpoint and use online predictions in order to test out a sample datapoint.

In [1]:
PROJECT_NAME = 'mwpmltr'
LOCATION = "us-central1"
MODEL_NAME = "beatles_automl_file_out_2200_tags"
TARGET_COLUMN = "Like_The_Beatles"

In [2]:
from google.cloud import aiplatform

In [3]:
def create_endpoint(
    project: str,
    display_name: str,
    location: str,
):
    """Create an Vertex AI Model Endpoint in the given project and location"""
    
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint.create(
        display_name=display_name,
        project=project,
        location=location,
    )

    print(endpoint.display_name)
    print(endpoint.resource_name)
    return endpoint

In [4]:
# Note that you don't have to create an endpoint every time you run this notebook
# create_endpoint(PROJECT_NAME, f'{MODEL_NAME}_endpoint', LOCATION)

In [5]:
def deploy_model(
    project: str,
    location: str,
    model_name: str,
    endpoint_name: str
):
    """
    model_name: A fully-qualified model resource name or model ID.
    endpoint_name: A fully-qualified endpoint resource name or endpoint ID.
    """

    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_name=model_name)
    endpoint = aiplatform.Endpoint(endpoint_name=endpoint_name)

    model.deploy(
        endpoint=endpoint,
        machine_type="e2-standard-4"
    )

    model.wait()

    print(model.display_name)
    print(model.resource_name)
    return model

In [6]:
# deploy_model(
#     PROJECT_NAME,
#     LOCATION,
#     "projects/55590906972/locations/us-central1/models/8993391587719380992",
#     "projects/55590906972/locations/us-central1/endpoints/3602110043756953600"
# )

Deploying model to Endpoint : projects/55590906972/locations/us-central1/endpoints/3602110043756953600
Deploy Endpoint model backing LRO: projects/55590906972/locations/us-central1/endpoints/3602110043756953600/operations/4494827517445668864


KeyboardInterrupt: 

Now that the model is deployed to the prediction endpoint, we will use our test data point and make an API call to the Vertex AI online inference service, in order to predict whether this user would like the Beatles or not.

In [7]:
from typing import List, Dict

def predict_tabular_classification(
    project: str,
    location: str,
    endpoint_name: str,
    instances: List[Dict],
):
    """
    Args
        project: Your project ID or project number.
        location: Region where Endpoint is located. For example, 'us-central1'.
        endpoint_name: A fully qualified endpoint name or endpoint ID. Example: "projects/123/locations/us-central1/endpoints/456" or
               "456" when project and location are initialized or passed.
        instances: A list of one or more instances (examples) to return a prediction for.
    """
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint(endpoint_name)

    response = endpoint.predict(instances=instances)

    for prediction_ in response.predictions:
        print(prediction_)

In [8]:
import pandas as pd

inference_sample = pd.read_feather('test_data/inference_sample.feather')

In [9]:
import json

In [10]:
inference_sample

Unnamed: 0,user_name,30_Seconds_to_Mars,65daysofstatic,A_Perfect_Circle,A_Tribe_Called_Quest,ABBA,ACDC,Adele,Aerosmith,Air,...,tag_shoegazer,tag_hair_metal,tag_rapcore,tag_underground_hip_hop,tag_symphonic_black_metal,tag_darkwave,tag_world,tag_latin,tag_spanish,Like_The_Beatles
0,thegiant,1.0,,,,,,11.0,1.0,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
1,nezter,,,,,,,,,3.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,False
2,augustohp,,52.0,502.0,,1.0,452.0,1.0,215.0,14.0,...,0.0,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,True
3,stalphonzo,,,,,,6.0,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
4,davenall,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
5,Andy_Greenwell,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,True
6,lilyean,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
7,absentbebnim,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
8,adherr,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,False
9,auserzz,,,,,,,25.0,,,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,False


In [12]:
for index, row in inference_sample.iterrows():
    instance = json.loads(row.astype(str).to_json())
    inference_results = predict_tabular_classification(PROJECT_NAME, LOCATION, 'projects/55590906972/locations/us-central1/endpoints/3602110043756953600', [instance])

{'scores': [0.3613875508308411, 0.6386125087738037], 'classes': ['True', 'False']}
{'classes': ['True', 'False'], 'scores': [0.4743289351463318, 0.5256710648536682]}
{'scores': [0.9762881994247437, 0.02371174097061157], 'classes': ['True', 'False']}
{'scores': [0.5234130620956421, 0.4765869975090027], 'classes': ['True', 'False']}
{'classes': ['True', 'False'], 'scores': [0.03440927714109421, 0.9655907154083252]}
{'classes': ['True', 'False'], 'scores': [0.0653722807765007, 0.9346277117729187]}
{'scores': [0.04001647979021072, 0.9599835276603699], 'classes': ['True', 'False']}
{'scores': [0.03490455448627472, 0.9650955200195312], 'classes': ['True', 'False']}
{'classes': ['True', 'False'], 'scores': [0.4869522750377655, 0.5130476951599121]}
{'scores': [0.0671602338552475, 0.9328398108482361], 'classes': ['True', 'False']}


In [13]:
import json

with open('sample_request.json', 'w') as outfile:
    outfile.write(inference_sample.iloc[0].astype(str).to_json())


### Undeploy Model from Vertex AI Endpoint

### Pricing Notes
Resources that incur costs
Answer: you pay for three main activities
- Training the model
    - Price per node hour of tabular classification is $21.252, so that's the charge I incur every time I train the AutoML Beatles Model
- Deploying the model to an endpoint (models must be deployed before they can make either online predictions or online evaluations)
    - You pay for each model deployed to an endpoint, even if no prediction is made
    - Must undeploy your model to stop incurring further charges
    - Models that are not deployed or have failed to deploy are not charged
- Using the model to make predictions; this is for both batch and online predictions (which I think is BS, since we're also paying to host the model at an endpoint, but whatever)