In [None]:
import sagemaker.huggingface

In [None]:
import sagemaker

sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

role = sagemaker.get_execution_role()
sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

In [None]:
import boto3
import os

training_job_name = "huggingface-finetune-twitter2023-03-17--2023-03-17-03-24-18-446"
s3_model_location = 's3://{}/{}/output/model.tar.gz'.format(sagemaker_session_bucket, training_job_name)
s3_model_location

In [None]:
!pygmentize ./scripts/inference_nlp_bert_sm_compatible.py

In [None]:
instance_type = "ml.g4dn.xlarge"

In [None]:
s3_model_location

In [None]:
from sagemaker.huggingface.model import HuggingFaceModel

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_model_location, # path to your model and script
   entry_point="./scripts/inference_nlp_bert_sm_compatible.py",
   source_dir = './scripts',
   role=role,
   transformers_version="4.6",  # transformers version used
   pytorch_version="1.7",        # pytorch version used
   py_version='py36',            # python version used
)

In [None]:
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
   initial_instance_count=1,
   instance_type=instance_type
)

In [None]:
predictions = predictor.predict("I am so happy about this situation")

In [None]:
predictions

In [None]:
#predictor.delete_endpoint()

## Inference with SDK Boto3

In [None]:
import boto3
import json

In [None]:
runtime = boto3.client("sagemaker-runtime")

In [None]:
endpoint_name = "huggingface-pytorch-inference-2023-03-17-05-59-15-741"

In [None]:
input_json = "I am so happy about this situation"
#input_json = "Dont worry Indian army is on its ways to dispatch all Terrorists to Hell"

In [None]:
def prediction_with_endpoint(endpoint_name,runtime,text):
    response = runtime.invoke_endpoint(
        EndpointName=endpoint_name,
        Body=json.dumps(text),
        ContentType="application/json",
    )
    results = response["Body"].read()
    return json.loads(results)

In [None]:
print(prediction_with_endpoint(endpoint_name, runtime, input_json))

## Evaluation on twitter dataset

In [None]:
import pandas as pd 
import numpy as np

In [None]:
df = pd.read_csv("./dataset/sem_eval_2018_task_1_test.csv")

In [None]:
label_name = [label for label in df.columns if label not in ['ID', 'Tweet']]
id2label = {idx:label for idx, label in enumerate(label_name)}
id2label

In [None]:
label_index = df.iloc[:,2:].astype(int)
label_index.head(5)

In [None]:
text_tweet = df.loc[:,["Tweet"]].values.tolist()

In [None]:
text_tweet[2]

In [None]:
results = prediction_with_endpoint(endpoint_name, runtime, text_tweet[2])
results

In [None]:
def proba_to_label(prediction_proba,id2label,threshold):
    prediction_label = []
    for index in range(len(prediction_proba)):
        if prediction_proba[index] > threshold:
            prediction_label.append(id2label[index])
            
    return prediction_label

In [None]:
print(proba_to_label(results[0],id2label,0.5))

### Prediction over entire dataset

In [None]:
predictions_proba = np.zeros_like(label_index,dtype=float)

#### real-time endpoints that make one prediction at a time, over http 

In [None]:
for index in range(len(text_tweet)):
    result = prediction_with_endpoint(endpoint_name, runtime, text_tweet[index])
    result = np.squeeze(np.array(result), axis=0)    
    predictions_proba[index,:] = result    

In [None]:
from sklearn.metrics import f1_score, precision_score, recall_score

In [None]:
def multi_label_metrics(labels, predictions_proba, threshold = 0.5):
    y_true = labels

    predictions = np.zeros_like(labels)
    predictions[np.where(predictions_proba >= threshold)] = 1
    
    y_pred = predictions
    precision = precision_score(y_true, y_pred, average='micro')
    recall = recall_score(y_true, y_pred, average='micro')
    f1_micro_average = f1_score(y_true, y_pred, average='micro')
    
    metrics = {'precision': precision,
               'recall': recall,
               'f1': f1_micro_average}
    return metrics

In [None]:
metrics = multi_label_metrics(label_index, predictions_proba, threshold = 0.3)

In [None]:
metrics