In [None]:
!pip install sagemaker --quiet --upgrade --force-reinstall
!pip install ipywidgets==7.0.0 --quiet
!pip install langchain --quiet --upgrade

## 01. Set-up

In [17]:
import sagemaker
from sagemaker.predictor_async import AsyncPredictor
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

In [13]:
session = sagemaker.session.Session()
bucket = sagemaker.Session().default_bucket()
prefix = "async-sagemaker-tests/inputs" 

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /root/.config/sagemaker/config.yaml


## 02. Deploy Falcon model from JumpStart using Asynchronous Inference

In [None]:
%%time
from sagemaker.jumpstart.model import JumpStartModel, AsyncInferenceConfig
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer


model_id, model_version = "huggingface-llm-falcon-40b-instruct-bf16", "*"
my_model = JumpStartModel(model_id=model_id)
predictor = my_model.deploy(
    initial_instance_count=0,
    instance_type="ml.g5.12xlarge",
    async_inference_config=AsyncInferenceConfig()
)

## 03. Start making predictions

In [10]:
endpoint_name="hf-llm-falcon-40b-instruct-bf16-2023-10-10-22-01-55-410"

In [18]:
predictor = AsyncPredictor(Predictor(endpoint_name=endpoint_name, 
                              sagemaker_session=session,
                              serializer=JSONSerializer(),
                              deserializer=JSONDeserializer()))

In [27]:
import json
payload = {
    "inputs": "Write a program to compute factorial in python:", 
    "parameters": {
        "max_new_tokens": 400
    }
}

In [None]:
import uuid

response = predictor.predict(
        data=payload,
        input_path="s3://{}/{}/payload-{}".format(bucket, prefix,uuid.uuid4())
)

In [26]:
print(f"\033[1m Output:\033[0m {response[0]['generated_text']}")


[1m Output:[0m 
You can compute factorial in Python using the built-in function `math.factorial()`. Here's an example:

```python
import math

n = 5
factorial = math.factorial(n)
print(factorial)
```

This will output `120`, which is the factorial of 5.
