# Simulation for inference

In [18]:
import json
import time
import boto3
import requests
import pandas as pd
from spacy.lang.en import English

In [19]:
strTestPath = "s3://sagemaker-ap-northeast-2-419974056037/preprocessing/output/test_data/test.csv"
strAPI = "https://m07tmppemg.execute-api.ap-northeast-2.amazonaws.com/dev/dat-api"

In [20]:
class sim_preprocess():
    
    def __init__(self, strDataPath):
        
        self.strDataPath = strDataPath
        self.index_to_label = {0: 'NotHelpful', 1: 'Helpful'} 
        
        nlp = English()
        self.tokenizer = nlp.tokenizer
        self.index_to_label = {0: 'NotHelpful', 1: 'Helpful'} 
    
    def _labelize_df(self, df):
        return '__label__' + df['is_helpful'].apply(lambda is_helpful: self.index_to_label[is_helpful])

    def _tokenize_sent(self, sent, max_length=1000):
        return ' '.join([token.text for token in self.tokenizer(sent)])[:max_length]

    def _tokenize_df(self, df):
        return (df['review_headline'].apply(self._tokenize_sent) + ' ' + 
                df['review_body'].apply(self._tokenize_sent))
    
    def execution(self, ):
        
        pdData = pd.read_csv(self.strDataPath)
        pdInput = self._tokenize_df(pdData).to_list()
        
        return pdInput
        
class simulator():
    
    def __init__(self, ):        
        
        pass
            
    def inference(self, strDataPath, strAPI, fInterval):
            
        print ("Preparing simulator...")
        eval_prep = sim_preprocess(strDataPath)
        listInferenceInput = eval_prep.execution()
        
        pdData = pd.read_csv(strTestPath)
        pdData['review_id'] = [''.join(['r-', str(n)]) for n in range(pdData.shape[0])]
        
        for idx, (strReview, strReviewID, strProductID) in enumerate(zip(listInferenceInput, pdData.review_id, pdData.product_id)):
            #print (f"Gen-idx: {idx}, \nReviewID: {strReviewID}, \nProductID: {strProductID}, \nReview: {strReview}")
            #print (f"Interval: {fInterval}")
            #print ("===")
            
            data = {
                "mode": "inference", 
                "generator_idx": idx,
                "review_id" :strReviewID,
                "product_id": strProductID,
                "review": strReview
            }
            
            try:
                response = requests.post(strAPI, json.dumps(data))
                resp = json.loads(response.content)
                res = resp["body"]
                
                if idx % 100 == 0:
                    print (idx)
                    print (data)
                #time.sleep(fInterval) 
            except Exception as e:
                print ("ERROR", e, strReviewID)
                
            #if idx >200: break
                
    def invocation_conversion_simulator(self, strDataPath, strAPI, fInterval):
        
        pdData = pd.read_csv(strTestPath)
        pdData['review_id'] = [''.join(['r-', str(n)]) for n in range(pdData.shape[0])]
        pdData['user_id'] = [''.join(['u-', str(n)]) for n in range(pdData.shape[0])]

        for idx, tupleRecords in enumerate(pdData.itertuples()):
                         
            strUserID, strProductID = getattr(tupleRecords, 'user_id'), getattr(tupleRecords, 'product_id')
            bIsHelpful, strReviewID = getattr(tupleRecords, 'is_helpful'), getattr(tupleRecords, 'review_id')
                       
            data = {
                "mode": "invocation", 
                "user_id" :strUserID,
                "product_id": strProductID,
            }
            
            try:
                response = requests.post(strAPI, json.dumps(data))
                resp = json.loads(response.content)
                res = resp["body"]
                if idx % 100 == 0:
                    print (idx)
                    print (data)
                #time.sleep(fInterval) 
            except Exception as e:
                print ("err", e)
            
            if bIsHelpful:
                
                data = {
                    "mode": "conversion", 
                    "user_id" :strUserID,
                    "product_id": strProductID,
                    "review_id": strReviewID
                }
                try:
                    response = requests.post(strAPI, json.dumps(data))
                    resp = json.loads(response.content)
                    res = resp["body"]
                    #time.sleep(fInterval) 
                except Exception as e:
                    print ("err", e)        

In [21]:
%%time
sim = simulator()
sim.inference(strTestPath, strAPI, fInterval=0.00001)

Preparing simulator...
0
{'mode': 'inference', 'generator_idx': 0, 'review_id': 'r-0', 'product_id': 'B000EPLP3C', 'review': 'Excellent Item This is the perfect media device for anyone who enjoys having music , videos and pictures at your fingertips . Would definettley recommend it over the Ipod Video'}
100
{'mode': 'inference', 'generator_idx': 100, 'review_id': 'r-100', 'product_id': 'B00AF56QA8', 'review': "Very good for the price range ! I bought this sound bar because it seems like the perfect fit for my TV stand to hold it without sticking out of the sides and for my small living room . I also wanted a sound bar with a sub woofer on it or included . This definitely did all of that.<br /><br />It fits perfectly on the TV stand and the sound is huge on it ! The bass is almost too much . For TV I have to turn the bass down to -3 just so it does n't overwhelm the treble . Beats out the TV speakers by a huge margin , not even close . The treble is clear , highs and mids and base all a