#### Imports 

In [2]:
import boto3
import json

#### Essentials

In [3]:
# ENDPOINT_NAME = 'ENTER DEPLOYMENT ENDPOINT HERE'
# ENDPOINT_NAME = 'jumpstart-dft-news-sentiment-classification'
ENDPOINT_NAME = "huggingface-text2text-flan-t5-xl-1682211931"
CONTENT_TYPE = 'application/json'

In [4]:
sagemaker = boto3.client('runtime.sagemaker')

#### Invoke SageMaker Endpoint for Prediction

In [26]:
def predict(context):
    MAX_LENGTH = 20
    NUM_RETURN_SEQUENCES = 1
    TOP_K = 0
    TOP_P = 0.7
    DO_SAMPLE = True 

    
    prompt = f'{context}\nWhat is the overall sentiment and sentiment score?\nOPTIONS:\n-positive\n-negative\n-neural'

    payload = {'text_inputs': prompt, 
               'max_length': MAX_LENGTH, 
               'num_return_sequences': NUM_RETURN_SEQUENCES,
               'top_k': TOP_K,
               'top_p': TOP_P,
               'do_sample': DO_SAMPLE}
    payload = json.dumps(payload).encode('utf-8')
    response = sagemaker.invoke_endpoint(EndpointName=ENDPOINT_NAME, 
                                  ContentType=CONTENT_TYPE, 
                                  Body=payload)
    
    model_predictions = json.loads(response['Body'].read())
    generated_text = model_predictions['generated_texts'][0]
    return generated_text

In [27]:
text = 'Apple plans to develop in stages an area of no less than 100,000 sq. meters in order to host companies working in information technologies and telecommunications'
text

'Apple plans to develop in stages an area of no less than 100,000 sq. meters in order to host companies working in information technologies and telecommunications'

In [28]:
prediction = predict(text)
print(f'Predicted sentiment: {prediction}')

Predicted sentiment: positive


## Benchmark

In [29]:
import pandas as pd

label_map = {0:"negative", 1:"positive", 2:"neural"}

df = pd.read_csv('./data/data.csv', names=['label', 'headline'])
df.head()

Unnamed: 0,label,headline
0,2,"According to Gran , the company has no plans t..."
1,2,Technopolis plans to develop in stages an area...
2,0,The international electronic industry company ...
3,1,With the new production plant the company woul...
4,1,According to the company 's updated strategy f...


In [35]:
df.loc[1].values[1]

'Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said .'

In [31]:
predict(df.loc[2].values[1])

'negative'

In [34]:
%%time

preds = []
for _, row in df.iterrows():
    label, text = row.values
    pred = predict(text)
    preds.append(pred)
df["pred"] = preds
df.head(20)

CPU times: user 12.1 s, sys: 472 ms, total: 12.6 s
Wall time: 6min 34s


Unnamed: 0,label,headline,pred
0,2,"According to Gran , the company has no plans t...",negative
1,2,Technopolis plans to develop in stages an area...,positive
2,0,The international electronic industry company ...,negative
3,1,With the new production plant the company woul...,positive
4,1,According to the company 's updated strategy f...,negative
5,1,FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...,positive
6,1,"For the last quarter of 2010 , Componenta 's n...",positive
7,1,"In the third quarter of 2010 , net sales incre...",positive
8,1,Operating profit rose to EUR 13.1 mn from EUR ...,positive
9,1,"Operating profit totalled EUR 21.1 mn , up fro...",positive
