In [4]:
!pip install transformers einops accelerate bitsandbytes

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [5]:
!pip install langchain

Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com


In [6]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
import torch
import base64

In [9]:
checkpoints = "MBZUAI/LaMini-Flan-T5-248M"

In [10]:
tokenizer = AutoTokenizer.from_pretrained(checkpoints)
base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoints, device_map = "auto", torch_dtype = torch.float32)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.50k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

In [18]:
# Now let's define LLM Pipeline
from langchain.llms import HuggingFacePipeline
def llm_pipeline():
    pipe = pipeline(
        'text2text-generation',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 256,
        do_sample = True,
        temperature = 0.3,
        top_p = 0.95,
    )
    local_llm = HuggingFacePipeline(pipeline = pipe)
    return local_llm

In [19]:
input_prompt = "Write a an article about Large Language Model"

In [20]:
model = llm_pipeline()
generated_text = model(input_prompt)
generated_text

'Large Language Models (LLMs) are a type of machine learning model that uses large amounts of data to generate coherent and meaningful outputs. LLMs are used in a variety of applications, including speech recognition, natural language processing, and chatbots. LLMs are used to analyze large amounts of text data, such as text messages, emails, and text messages. LLMs are designed to handle large amounts of text data, such as text, images, and videos. They are designed to handle large amounts of text data, such as text, images, and videos. One of the most significant features of LLMs is their ability to handle large amounts of text data. LLMs are capable of handling large amounts of text data, such as text, images, and videos. They can handle large amounts of text data, such as text, images, and videos. LLMs are also designed to handle large amounts of text data cosmically. LLMs are used in various applications, such as chatbots, chatbots, and chatbots. They are used in various industrie

In [23]:
!pip uninstall -y sagemaker # uninstall once because deployment issue
!pip install -U sagemaker

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Found existing installation: sagemaker 2.168.0
Uninstalling sagemaker-2.168.0:
  Successfully uninstalled sagemaker-2.168.0
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Looking in indexes: https://pypi.org/simple, https://pip.repos.neuron.amazonaws.com
Collecting sagemaker
  Downloading sagemaker-2.175.0.tar.gz (857 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m857.4/857.4 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Preparing 

In [24]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'MBZUAI/LaMini-Flan-T5-248M',
    'HF_TASK' : 'text2text-generation',
    'device_map':'auto',
    'torch_dtype':'torch.float32'
}


# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="0.9.3"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g4dn.2xlarge",   # we are changing this from ml.g5.2xlarge to ml.g4dn.2xlarge
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "Write a short article on Metaverse and digital twin",
})

----------!

[{'generated_text': 'Metaverse and digital twin are two popular espionage games that have gained popularity in recent'}]

In [25]:
# Now let's do one more this prompt
prompt = "Write a article about ChatGPT"

# hyparaparameter Payload

payload = {
    "inputs" : prompt,
    "parameters":{
        "do_sample":True,
        "top_p":0.7,
        "temperature":0.3,
        "top_k":50,
        "max_new_tokens":512,
        "repetition_penalty": 1.03,
    }
}

# send request to endpoints
response = predictor.predict(payload)

print(response)

[{'generated_text': 'ChatGPT is a popular messaging app that allows users to connect with friends and family through text messages. It is designed to provide a seamless and convenient way for users to communicate with each other. ChatGPT is a popular messaging app that offers a variety of features, including voice and video calls, chatbots, and chatbots. The app is designed to be user-friendly and easy to use, making it a great option for businesses and individuals looking to improve their communication skills. ChatGPT is also known for its user-friendly interface, which makes it easy to navigate and find what they need. Users can easily set up their chatbots and chatbots, and they can also customize their chatbots to suit their specific needs. ChatGPT is also known for its ability to handle large volumes of text messages, making it a popular choice for businesses and individuals looking to improve their communication skills. Overall, ChatGPT is a powerful messaging app that offers a w

In [26]:
response[0]['generated_text']

'ChatGPT is a popular messaging app that allows users to connect with friends and family through text messages. It is designed to provide a seamless and convenient way for users to communicate with each other. ChatGPT is a popular messaging app that offers a variety of features, including voice and video calls, chatbots, and chatbots. The app is designed to be user-friendly and easy to use, making it a great option for businesses and individuals looking to improve their communication skills. ChatGPT is also known for its user-friendly interface, which makes it easy to navigate and find what they need. Users can easily set up their chatbots and chatbots, and they can also customize their chatbots to suit their specific needs. ChatGPT is also known for its ability to handle large volumes of text messages, making it a popular choice for businesses and individuals looking to improve their communication skills. Overall, ChatGPT is a powerful messaging app that offers a wide range of feature

In [27]:
# Now Let's use this end point to call this
ENDPOINT = "huggingface-pytorch-tgi-inference-2023-08-09-08-48-18-358"

In [28]:
import boto3

In [29]:
runtime = boto3.client("runtime.sagemaker")

In [30]:
response = runtime.invoke_endpoint(EndpointName = ENDPOINT, ContentType = "application/json", Body = json.dumps(payload))

In [31]:
print(response)

{'ResponseMetadata': {'RequestId': 'c29a7bc8-c874-4aa5-9ca0-277253966096', 'HTTPStatusCode': 200, 'HTTPHeaders': {'x-amzn-requestid': 'c29a7bc8-c874-4aa5-9ca0-277253966096', 'x-amzn-invoked-production-variant': 'AllTraffic', 'date': 'Wed, 09 Aug 2023 09:02:17 GMT', 'content-type': 'application/json', 'content-length': '1384', 'connection': 'keep-alive'}, 'RetryAttempts': 0}, 'ContentType': 'application/json', 'InvokedProductionVariant': 'AllTraffic', 'Body': <botocore.response.StreamingBody object at 0x7fdd5a5868c0>}


In [32]:
prediction = json.loads(response['Body'].read().decode('utf-8'))

In [33]:
prediction

[{'generated_text': 'ChatGPT is a popular mobile application that allows users to connect with their friends and family through a mobile device. It is a powerful messaging app that allows users to send and receive messages, send messages, and send messages to their friends and family. ChatGPT is a free and open-source app that allows users to create, manage, and share their own chats, chats, and other online services. ChatGPT is a free app that allows users to create, manage, and share their own chats, chats, and other online services. The app is designed to be user-friendly and easy to use, making it a great option for businesses and individuals looking to connect with their friends and family. ChatGPT is also compatible with many different mobile devices, including smartphones, tablets, and laptops. Users can use the app to send and receive messages, send messages, and send messages. ChatGPT is also compatible with many other mobile devices, such as smartphones, tablets, and laptops.

In [34]:
prediction[0]['generated_text']

'ChatGPT is a popular mobile application that allows users to connect with their friends and family through a mobile device. It is a powerful messaging app that allows users to send and receive messages, send messages, and send messages to their friends and family. ChatGPT is a free and open-source app that allows users to create, manage, and share their own chats, chats, and other online services. ChatGPT is a free app that allows users to create, manage, and share their own chats, chats, and other online services. The app is designed to be user-friendly and easy to use, making it a great option for businesses and individuals looking to connect with their friends and family. ChatGPT is also compatible with many different mobile devices, including smartphones, tablets, and laptops. Users can use the app to send and receive messages, send messages, and send messages. ChatGPT is also compatible with many other mobile devices, such as smartphones, tablets, and laptops. The app is availabl