In [21]:
# Example of simple model load and evaluate

# ===============LICENSE_START=======================================================
# Apache-2.0
# ===================================================================================
# Copyright (C) 2019 AT&T Intellectual Property  All rights reserved.
# ===================================================================================
# This software file is distributed by AT&T
# under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# This file is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ===============LICENSE_END=========================================================


In [25]:
import numpy as np
import pandas as pd
import os,sys,shutil  # file checks
import dill as pickle   # serialize functions and data as compressed binary 
import gzip  # compression 
import yaml   # configuration file
import time  # time tracking

from acumos.modeling import Model, List, Dict, create_namedtuple, create_dataframe
from acumos.wrapped import load_model, _pack_pb_msg
from google.protobuf.json_format import MessageToJson, MessageToDict

# load our configutaion
config_path = 'config.yaml'
if not os.path.isfile(config_path):
    print("Sorry, can't find the configuration file {}, aborting.".format(config_path))
    sys.exit(-1)
config = yaml.safe_load(open(config_path))

In [7]:
## PART 1 - load and start a local model runner 
# https://pypi.org/project/acumos/#using-dataframes-with-scikit-learn

# read our larger datasets as binary files
with gzip.open(config["path"]["etl"], 'rb') as f:
    df = pickle.load(f)
print(df["X_test"].columns)
print(df["X_test"].sample(4))

Index(['helpful', 'reviewText', 'summary', 'unixReviewTime', 'categories',
       'description'],
      dtype='object')
      helpful                                         reviewText  \
5069   [0, 0]  I have been using a remote mouse for Easy Wors...   
427    [1, 1]  The Scotch Gift Wrap Tape lives up to its clai...   
15943  [0, 1]  I liked these labels and couldn't wait to desi...   
2950   [0, 0]  For the sake of my review, I must compare the ...   

                                      summary  unixReviewTime  \
5069                I enjoy the precision ...      1301616000   
427                               Just Great!      1288051200   
15943  Labels are good, Avery templates so-so      1324512000   
2950               PREFERRED OVER COMPETITION      1393891200   

                                              categories  \
5069   [office products, office & school supplies, de...   
427    [office products, office & school supplies, ta...   
15943  [office products, office &

In [36]:
# load model from disk, see that it is a nicely "wrapped" model
wrapped_model = load_model(config["publish"]["name_model2"])
print(wrapped_model)

<acumos.wrapped.WrappedModel object at 0x10f294f98>


In [49]:
def score_local_model(wrapped_model, item_values=[], verbose=False):
    if verbose:
        print(wrapped_model.classify)
    TransIn = wrapped_model.classify._input_type
    TransOut = wrapped_model.classify._output_type

    # start the clock for runtime ellapsed
    time_start = time.time()

    res_list = []
    for i in range(len(item_values)):
        x = item_values[i]
        trans_in = TransIn(*x)
        # trans_out = TransOut(*out)
        print(trans_in)

        # pack into protobuf message
        trans_in_pb = _pack_pb_msg(trans_in, model.classify._module)
        # trans_out_pb = _pack_pb_msg(trans_out, wrapped_model.transform._module)

        # save to protobuf bytes? if you wanted to send via direct HTTP request?
        trans_in_pb_bytes = trans_in_pb.SerializeToString()
        # trans_out_pb_bytes = trans_out_pb.SerializeToString()

        trans_in_dict = MessageToDict(trans_in_pb)
        # trans_out_dict = MessageToDict(trans_out_pb)
        if verbose and i==0:
            print(trans_in_dict)

        trans_in_json = MessageToJson(trans_in_pb, indent=0)
        # trans_out_json = MessageToJson(trans_out_pb, indent=0)
        if verbose and i==0:
            print(trans_in_dict)
        
        print(wrapped_model.classify.from_pb_bytes)
        resp = wrapped_model.classify.from_pb_bytes(trans_in_pb_bytes)
        print(resp)

    print("Evaluation time for {} items, {:0.3f} sec".format(len(item_values), time.time()-time_start))
    if verbose:
        print("---- Help and Input Type Description ----")
        print(help(TransIn))

# actual evaluation code...

# test 1 - take an example of our own sample (where we can see format from the model)
X = [[[0,0], "I've had better", "not much", int(time.time()), ["office products"], "rubbery item"]]
score_local_model(wrapped_model, X, True)



<acumos.wrapped.WrappedFunction object at 0x10f294588>
ReviewDataFrame(helpful=[0, 0], reviewText="I've had better", summary='not much', unixReviewTime=1553837166, categories=['office products'], description='rubbery item')
{'helpful': ['0', '0'], 'reviewText': "I've had better", 'summary': 'not much', 'unixReviewTime': '1553837166', 'categories': ['office products'], 'description': 'rubbery item'}
{'helpful': ['0', '0'], 'reviewText': "I've had better", 'summary': 'not much', 'unixReviewTime': '1553837166', 'categories': ['office products'], 'description': 'rubbery item'}
<bound method WrappedFunction.from_pb_bytes of <acumos.wrapped.WrappedFunction object at 0x10f294588>>
Preprocessing text column 'reviewText'...


TypeError: tuple indices must be integers or slices, not str

In [50]:
# test 2 - evaluate our test data

# read our larger datasets as binary files
with gzip.open(config["path"]["etl"], 'rb') as f:
    df = pickle.load(f)
results = score_local_model(wrapped_model, df["X_test"].values, False)

ReviewDataFrame(helpful=[1, 1], reviewText="There's nothing more dispiriting than spending hours in highlighting and tabbing the important parts of a document with tape flags and then finding, after filing the document away, that some of the flags have fallen off or are folded into the document, no longer drawing attention to the sections carefully identified.  Using these Post-It durable tabs will prevent this from happening, and also permit writing on the tab flagging the page with a marker.  Another benefit is that these tabs adhere well enough to allow a document which has already been reviewed and tabbed to be run through a photocopy machine's feeder if a copy is needed without the tabs coming off in the feeder mechanism.", summary='These tabs perform as promised', unixReviewTime=1293840000, categories=['office products', 'office & school supplies', 'labels, indexes & stamps', 'index tabs & tab inserts', 'tab inserts'], description='Post-it(R) Tabs in 1 inch size create necessary 

TypeError: tuple indices must be integers or slices, not str