In [1]:
!pip install transformers einops accelerate bitsandbytes



In [2]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import pipeline
import torch
import base64



In [3]:
checkpoint = "MBZUAI/LaMini-T5-738M"

In [4]:
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, device_map="auto", torch_dtype=torch.float32)

In [5]:
!pip install langchain



In [6]:
from langchain.llms import HuggingFacePipeline

In [7]:
def llm_pipeline():
    pipe = pipeline(
        'text2text-generation',
        model = base_model,
        tokenizer = tokenizer,
        max_length = 256,
        do_sample=True,
        temperature = 0.3,
        top_p = 0.95
    )
    local_llm = HuggingFacePipeline(pipeline=pipe)
    return local_llm

In [8]:
input_prompt = "How to pass an exam"

In [9]:
model = llm_pipeline()
generated_text = model(input_prompt)
generated_text

'To pass an exam, you should: 1. Practice regularly 2. Understand the material thoroughly 3. Use active learning techniques such as summarizing, quizzing yourself, and teaching the material to someone else 4. Get enough sleep, exercise, and eat well 5. Manage stress and anxiety 6. Seek help from your teacher or tutor if needed.'

In [13]:
!pip uninstall -y sagemaker

Found existing installation: sagemaker 2.168.0
Uninstalling sagemaker-2.168.0:
  Successfully uninstalled sagemaker-2.168.0


In [14]:
!pip install sagemaker

Collecting sagemaker
  Downloading sagemaker-2.202.1-py2.py3-none-any.whl.metadata (13 kB)
Downloading sagemaker-2.202.1-py2.py3-none-any.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25hInstalling collected packages: sagemaker
Successfully installed sagemaker-2.202.1


In [15]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
	role = sagemaker.get_execution_role()
except ValueError:
	iam = boto3.client('iam')
	role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

# Hub Model configuration. https://huggingface.co/models
hub = {
	'HF_MODEL_ID':'MBZUAI/LaMini-T5-738M',
    'HF_TASK': 'text2text-generation',
    'device_map': 'auto',
    'torch_dtype': 'torch.float32'
	# 'SM_NUM_GPUS': json.dumps(1)
}



# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
	image_uri=get_huggingface_llm_image_uri("huggingface",version="1.1.0"),
	env=hub,
	role=role, 
)

# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1,
	instance_type="ml.g4dn.xlarge",
	container_startup_health_check_timeout=300,
  )
  
# send request
predictor.predict({
	"inputs": "Write a short artical on cow.",
})

----------!

[{'generated_text': "Cow, oh cow, how you love me! I'm a big fan of"}]

In [33]:
prompt = "Write a short article on Human"

payload = {
    "inputs": prompt,
    "parameters": {
        "do_sample": True,
        "top_p": 0.7,
        "temperature": 0.3,
        "top_k": 50,
        "max_new_tokens": 512,
        "repetition_penalty": 1.03
    }
}

response = predictor.predict(payload)
print(response)

[{'generated_text': 'Humans are the basic unit of life on Earth. They are capable of performing various tasks such as walking, running, eating, and sleeping. Humans are also capable of adapting to new environments and challenges. They are social creatures that are meant to be a source of companionship and support. Humans are also capable of learning and developing new skills through experience and education. They are constantly evolving and adapting to new technologies and ways of life. Humans are also capable of adapting to changing environments and challenges. They are constantly evolving and adapting to new experiences and challenges. Humans are also constantly evolving and adapt'}]


In [34]:
ENDPOINT = "huggingface-pytorch-tgi-inference-2023-12-23-11-21-55-717"

In [23]:
import boto3

In [24]:
runtime = boto3.client('runtime.sagemaker')

In [26]:
response = runtime.invoke_endpoint(EndpointName=ENDPOINT, ContentType='application/json', Body=json.dumps(payload))

In [27]:
response

{'ResponseMetadata': {'RequestId': '9efeb44b-c46a-40c2-b88a-24439406ee13',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': '9efeb44b-c46a-40c2-b88a-24439406ee13',
   'x-amzn-invoked-production-variant': 'AllTraffic',
   'date': 'Sat, 23 Dec 2023 11:38:15 GMT',
   'content-type': 'application/json',
   'content-length': '600',
   'connection': 'keep-alive'},
  'RetryAttempts': 0},
 'ContentType': 'application/json',
 'InvokedProductionVariant': 'AllTraffic',
 'Body': <botocore.response.StreamingBody at 0x7f85dc7b88b0>}

In [30]:
prediction = json.loads(response['Body'].read().decode('utf-8'))

In [31]:
prediction

[{'generated_text': 'Cows are domesticated animals that are raised for their meat, milk, and eggs. They are known for their distinctive horns, long necks, and distinctive horn-like structures. Cows are also known for their ability to produce milk, which is used for milking cows. Cows are important agricultural products, as they are used for milk production, meat production, and as a source of protein for humans. They are also important for the environment, as they help to regulate the growth of crops and animals. Cows are also important for the economy, as they provide a source of income for'}]

In [32]:
prediction[0]['generated_text']

'Cows are domesticated animals that are raised for their meat, milk, and eggs. They are known for their distinctive horns, long necks, and distinctive horn-like structures. Cows are also known for their ability to produce milk, which is used for milking cows. Cows are important agricultural products, as they are used for milk production, meat production, and as a source of protein for humans. They are also important for the environment, as they help to regulate the growth of crops and animals. Cows are also important for the economy, as they provide a source of income for'