In [5]:
import json
import time
import requests
import pandas as pd
from spacy.lang.en import English

In [6]:
strTestPath = "s3://sagemaker-ap-northeast-2-419974056037/preprocessing/output/test_data/test.csv"
strAPI = "https://m07tmppemg.execute-api.ap-northeast-2.amazonaws.com/dev/dat-api"

In [8]:
class sim_preprocess():
    
    def __init__(self, strDataPath):
        
        self.strDataPath = strDataPath
        self.index_to_label = {0: 'NotHelpful', 1: 'Helpful'} 
        
        nlp = English()
        self.tokenizer = nlp.tokenizer
        self.index_to_label = {0: 'NotHelpful', 1: 'Helpful'} 
    
    def _labelize_df(self, df):
        return '__label__' + df['is_helpful'].apply(lambda is_helpful: self.index_to_label[is_helpful])

    def _tokenize_sent(self, sent, max_length=1000):
        return ' '.join([token.text for token in self.tokenizer(sent)])[:max_length]

    def _tokenize_df(self, df):
        return (df['review_headline'].apply(self._tokenize_sent) + ' ' + 
                df['review_body'].apply(self._tokenize_sent))
    
    def execution(self, ):
        
        pdData = pd.read_csv(self.strDataPath)
        pdInput = self._tokenize_df(pdData).to_list()
        
        return pdInput
        
class simulator():
    
    def __init__(self, ):        
        
        pass
            
    def inference(self, strDataPath, strAPI, fInterval):
            
        print ("Preparing simulator...")
        eval_prep = sim_preprocess(strDataPath)
        listInferenceInput = eval_prep.execution()
        
        pdData = pd.read_csv(strTestPath)
        pdData['review_id'] = [''.join(['r-', str(n)]) for n in range(pdData.shape[0])]
        
        for idx, (strReview, strReviewID, strProductID) in enumerate(zip(listInferenceInput, pdData.review_id, pdData.product_id)):
            #print (f"Gen-idx: {idx}, \nReviewID: {strReviewID}, \nProductID: {strProductID}, \nReview: {strReview}")
            #print (f"Interval: {fInterval}")
            #print ("===")
            
            data = {
                "mode": "inference", 
                "generator_idx": idx,
                "review_id" :strReviewID,
                "product_id": strProductID,
                "review": strReview
            }
            
            try:
                #if strProductID == 'B000EPLP3C':
                response = requests.post(strAPI, json.dumps(data))
                resp = json.loads(response.content)
                res = resp["body"]
                if idx % 100 == 0: print (idx)
                #time.sleep(fInterval) 
            except Exception as e:
                print ("ERROR", e, strReviewID)
                
                
    def invocation_conversion_simulator(self, strDataPath, strAPI, fInterval):
        
        pdData = pd.read_csv(strTestPath)
        pdData['review_id'] = [''.join(['r-', str(n)]) for n in range(pdData.shape[0])]
        pdData['user_id'] = [''.join(['u-', str(n)]) for n in range(pdData.shape[0])]

        for idx, tupleRecords in enumerate(pdData.itertuples()):
                         
            strUserID, strProductID = getattr(tupleRecords, 'user_id'), getattr(tupleRecords, 'product_id')
            bIsHelpful, strReviewID = getattr(tupleRecords, 'is_helpful'), getattr(tupleRecords, 'review_id')
                       
            data = {
                "mode": "invocation", 
                "user_id" :strUserID,
                "product_id": strProductID,
            }
            
            try:
                response = requests.post(strAPI, json.dumps(data))
                resp = json.loads(response.content)
                res = resp["body"]
                if idx % 100 == 0:
                    print (idx)
                    print (data)
                #time.sleep(fInterval) 
            except Exception as e:
                pass
                #print ("err", e)
            
            if bIsHelpful:
                
                data = {
                    "mode": "conversion", 
                    "user_id" :strUserID,
                    "product_id": strProductID,
                    "review_id": strReviewID
                }
                try:
                    response = requests.post(strAPI, json.dumps(data))
                    resp = json.loads(response.content)
                    res = resp["body"]
                    #time.sleep(fInterval) 
                    if idx % 100 == 0:
                        print (idx)
                        print (data)
                except Exception as e:
                    pass

In [9]:
%%time
sim = simulator()
sim.invocation_conversion_simulator(strTestPath, strAPI, fInterval=0.00001)

100
{'mode': 'invocation', 'user_id': 'u-100', 'product_id': 'B00AF56QA8'}
100
{'mode': 'conversion', 'user_id': 'u-100', 'product_id': 'B00AF56QA8', 'review_id': 'r-100'}
200
{'mode': 'invocation', 'user_id': 'u-200', 'product_id': 'B000094FVE'}
200
{'mode': 'conversion', 'user_id': 'u-200', 'product_id': 'B000094FVE', 'review_id': 'r-200'}
300
{'mode': 'invocation', 'user_id': 'u-300', 'product_id': 'B0001A967W'}
400
{'mode': 'invocation', 'user_id': 'u-400', 'product_id': 'B001IAKS3U'}
400
{'mode': 'conversion', 'user_id': 'u-400', 'product_id': 'B001IAKS3U', 'review_id': 'r-400'}
500
{'mode': 'invocation', 'user_id': 'u-500', 'product_id': 'B00IPKETBS'}
600
{'mode': 'invocation', 'user_id': 'u-600', 'product_id': 'B00H2QUD9S'}
700
{'mode': 'invocation', 'user_id': 'u-700', 'product_id': 'B002Q0ZCNC'}
800
{'mode': 'invocation', 'user_id': 'u-800', 'product_id': 'B00GZC35YK'}
900
{'mode': 'invocation', 'user_id': 'u-900', 'product_id': 'B006V386UG'}
900
{'mode': 'conversion', 'user_i