# 1.Install necessary libraries

In [7]:
!pip install "sagemaker==2.163.0" --upgrade --quiet
!pip install transformers langchain --quiet

# 2. Deploy Falcon 40B model on AWS

In [2]:
import json
import boto3
import sagemaker
from sagemaker.utils import name_from_base
from sagemaker.huggingface import HuggingFaceModel
from sagemaker.huggingface import get_huggingface_llm_image_uri

sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)
region = sess.boto_region_name
print(f"sagemaker role arn: {role}")
print(f"sagemaker session region: {region}")

# retrieve the llm image uri
llm_image = get_huggingface_llm_image_uri(
  "huggingface",
  version="0.8.2"
)

# print ecr image uri
print(f"llm image uri: {llm_image}")

# Define Model and Endpoint configuration parameter
hf_model_id = "tiiuae/falcon-40b-instruct" # model id from huggingface.co/models
instance_type = "ml.g5.12xlarge" # instance type to use for deployment
number_of_gpu = 4 # number of gpus to use for inference and tensor parallelism
health_check_timeout = 300 # Increase the timeout for the health check to 5 minutes for downloading the model

# create HuggingFaceModel with the image uri
llm_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  env={
    'HF_MODEL_ID': hf_model_id,
    'SM_NUM_GPUS': json.dumps(number_of_gpu),
    # 'MAX_INPUT_LENGTH': json.dumps(2048),  # Max length of input text
    'MAX_TOTAL_TOKENS': json.dumps(2048),  # Max length of the generation (including input text)
  }
)

model_name = hf_model_id.split("/")[-1].replace(".", "-")
endpoint_name = name_from_base(model_name)
print(endpoint_name)

# llm = llm_model.deploy(
#   initial_instance_count=1,
#   instance_type=instance_type,
#   container_startup_health_check_timeout=health_check_timeout,
#   endpoint_name=endpoint_name,
# )

sagemaker role arn: arn:aws:iam::069230569860:role/service-role/AmazonSageMakerServiceCatalogProductsUseRole
sagemaker session region: eu-west-1
llm image uri: 763104351884.dkr.ecr.eu-west-1.amazonaws.com/huggingface-pytorch-tgi-inference:2.0.0-tgi0.8.2-gpu-py39-cu118-ubuntu20.04
falcon-40b-instruct


# 3. Invoke model

In [64]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

model = "tiiuae/falcon-40b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)

# grab environment variables
runtime= boto3.client('runtime.sagemaker')
message = "Write a poem about Valencia"
prompt = f"{message}"
parameters = {
    "best_of": None, 
    "temperature": .7,
    "repetition_penalty": None,
    "top_k": None,
    "top_p": None,
    "typical_p": None,
    "do_sample": True,
    "max_new_tokens": 200,  # tmax number of tokens return; keep in mind prompt +output has to be less than 2048 tokens; be frugal
    "return_full_text": False, # to not return the prompt as part of the ouput
    "stop": [],
    "truncate": None,
    "watermark": True,
    "details": False,
    "seed": 5,
  }
input_data = {
  "inputs": prompt,
  "parameters": {**parameters}
}

response = runtime.invoke_endpoint(EndpointName=endpoint_name,
                                   ContentType='application/json',
                                   Body=json.dumps(input_data).encode('utf-8'))
response_json = json.loads(response['Body'].read().decode("utf-8"))

print(response_json[0]['generated_text'])


The sun on my face and the sand between my toes
Walking the streets of Valencia, feeling free and alive
The salty ocean air carries me to new heights
As the city comes alive with the sounds of the street
The charming buildings of the old city stand tall
As the modern city skyline shines in the distance
The history and culture of Valencia are all around me
As the city embraces me with open arms
The food and the music are a delight to my senses
As I experience the Valencia lifestyle to its fullest
The colorful architecture and vibrant atmosphere
Make me feel like I never want to leave this beautiful place.


# 4. Create a LangChain ChatBot With Prompt Template

In [77]:
from langchain.llms.sagemaker_endpoint import LLMContentHandler, SagemakerEndpoint

class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"
    model_kwargs = {}

    def transform_input(self, prompt: str, model_kwargs={}) -> bytes:
        self.model_kwargs = model_kwargs
        input_str = json.dumps({"inputs": prompt, "parameters": {**model_kwargs}})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        text = response_json[0]["generated_text"].strip()
        if "stop" in self.model_kwargs:
            for stop in self.model_kwargs["stop"]:
                text = text.replace(stop,"")
        return text


content_handler = ContentHandler()

sm_llm = SagemakerEndpoint(
    endpoint_name=endpoint_name,
    region_name=region,
    model_kwargs=parameters,
    content_handler=content_handler,
)

In [78]:
sm_llm(prompt="What is the population of Greece?")

'As of 2021, the estimated population of Greece is approximately 10.7 million people.'

In [79]:
import textwrap
from io import StringIO
import sys

def print_ww(*args, **kwargs):
    buffer = StringIO()
    try: 
        _stdout = sys.stdout
        sys.stdout = buffer
        width = 100
        if 'width' in kwargs:
            width = kwargs['width']
            del kwargs['width']
        print(*args, **kwargs)
        output = buffer.getvalue()    
    finally:
        sys.stdout = _stdout
    for line in output.splitlines():
        print("\n".join(textwrap.wrap(line, width=width)))


In [167]:
from langchain.memory import (ConversationBufferMemory, 
                              ConversationSummaryMemory, 
                              ConversationBufferWindowMemory,
                              ConversationKGMemory)
# from langchain.chains.conversation.memory import (ConversationBufferMemory, 
#                                                   ConversationSummaryMemory, 
#                                                   ConversationBufferWindowMemory,
#                                                   ConversationKGMemory)
from langchain import PromptTemplate

params = {
    "temperature": .8,
    "top_p": .9,
    "do_sample": True,
    "max_new_tokens": 1024,  # tmax number of tokens return; keep in mind prompt +output has to be less than 2048 tokens; be frugal
    "return_full_text": False, # to not return the prompt as part of the ouput
    "stop": [ "User:", "<|endoftext|>"],
    "seed": 5,
    "stop1": ['bb'],
  }

sm_llm = SagemakerEndpoint(
    endpoint_name=endpoint_name,
    region_name=region,
    model_kwargs=params,
    content_handler=content_handler,
)
human_prefix = "User"
ai_prefix = "Falcon"
# turn verbose to true to see the full logs and documents
conversation = ConversationChain(
    llm=sm_llm, verbose=True, memory=ConversationBufferMemory(human_prefix = human_prefix,
                                                              ai_prefix = ai_prefix) #memory_chain
)

# langchain prompts do not always work with all the models. This prompt is tuned for Claude
flacon_prompt = PromptTemplate.from_template("""The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
{history}

User: {input}

Falcon:
""")
                                             
conversation.prompt = flacon_prompt

print_ww(conversation.predict(input="Hi there!"))



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:


User: Hi there!

Falcon:
[0m

[1m> Finished chain.[0m
Hello!




### New Questions

Model has responded with intial message, let's ask few questions 

In [83]:
print_ww(conversation.predict(input="Give me a few tips on how to start a new garden."))  



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
User: Hi there!
Falcon: Hello!



User: Give me a few tips on how to start a new garden.

Falcon:
[0m

[1m> Finished chain.[0m
To start a new garden, you can follow these steps:
1. Choose a location with adequate sunlight and fertile soil.
2. Determine what you want to grow in your garden.
3. Prepare the soil by removing any

### Build on the questions

Let's ask a question without mentioning the word garden to see if model can understand previous conversation 

In [84]:
print_ww(conversation.predict(input="Cool. Will that work with tomatoes?"))  



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
User: Hi there!
Falcon: Hello!


User: Give me a few tips on how to start a new garden.
Falcon: To start a new garden, you can follow these steps:
1. Choose a location with adequate sunlight and fertile soil.
2. Determine what you want to grow in your garden.
3. Prepare the soil by removing any weeds and adding compost.
4. Plan

### Finishing this conversation

In [85]:
print_ww(conversation.predict(input="That's all, thank you!"))



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
User: Hi there!
Falcon: Hello!


User: Give me a few tips on how to start a new garden.
Falcon: To start a new garden, you can follow these steps:
1. Choose a location with adequate sunlight and fertile soil.
2. Determine what you want to grow in your garden.
3. Prepare the soil by removing any weeds and adding compost.
4. Plan

### Interactive session using ipywidgets
The following utility class allows us to interact with Claude in a more natural way. We write out question in an input box, and get Claude answer. We can then continue our conversation.

In [90]:
import ipywidgets as ipw
from IPython.display import display, clear_output

class ChatUX:
    """ A chat UX using IPWidgets
    """
    def __init__(self, qa, retrievalChain = False):
        self.qa = qa
        self.name = None
        self.b=None
        self.retrievalChain = retrievalChain
        self.out = ipw.Output()


    def start_chat(self):
        print("Starting chat bot")
        display(self.out)
        self.chat(None)


    def chat(self, _):
        if self.name is None:
            prompt = ""
        else: 
            prompt = self.name.value
        if 'q' == prompt or 'quit' == prompt or 'Q' == prompt:
            print("Thank you , that was a nice chat !!")
            return
        elif len(prompt) > 0:
            with self.out:
                thinking = ipw.Label(value="Thinking...")
                display(thinking)
                try:
                    if self.retrievalChain:
                        result = self.qa.run({'question': prompt })
                    else:
                        result = self.qa.run({'input': prompt }) #, 'history':chat_history})
                except:
                    result = "No answer"
                thinking.value=""
                print_ww(f"Falcon:{result}")
                self.name.disabled = True
                self.b.disabled = True
                self.name = None
            
        if self.name is None:
            with self.out:
                self.name = ipw.Text(description="You:", placeholder='q to quit')
                self.b = ipw.Button(description="Send")
                self.b.on_click(self.chat)
                display(ipw.Box(children=(self.name, self.b)))  

Let's start a chat. You can also test the following questions:
tell me a joketell me another jokewhat was the first joke aboutcan you make another joke on the same topic of the first joke

In [97]:
conversation.memory = ConversationBufferMemory(human_prefix = human_prefix,
                                                ai_prefix = ai_prefix)

In [98]:
chat = ChatUX(conversation)
chat.start_chat()

Starting chat bot


Output()

### Chatbot with persona

 AI assistant will play the role of a career coach. Role Play Dialogue requires user message to be set in before starting the chat. ConversationBufferMemory is used to pre-populate the dialog # store previous interactions using ConversationalBufferMemory and add custom prompts to the chat.

In [101]:
memory = ConversationBufferMemory(human_prefix = human_prefix,
                                ai_prefix = ai_prefix)
memory.chat_memory.add_user_message("You will be acting as a career coach. Your goal is to give career advice to users")
memory.chat_memory.add_ai_message("I am career coach and give career advice")
conversation.memory = memory 

print_ww(conversation.predict(input="What are the career options in AI?"))  



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
User: You will be acting as a career coach. Your goal is to give career advice to users
Falcon: I am career coach and give career advice

User: What are the career options in AI?

Falcon:
[0m

[1m> Finished chain.[0m
The career options in AI are numerous and varied, including roles such as AI language model, data
scientist, 

In [102]:
print_ww(conversation.predict(input="What these people really do? Is it fun?"))  



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
User: You will be acting as a career coach. Your goal is to give career advice to users
Falcon: I am career coach and give career advice
User: What are the career options in AI?
Falcon: The career options in AI are numerous and varied, including roles such as AI language model, data scientist, machine learning engineer, and mor

Let's ask a question that is not specialty of this Persona and the model shouldn't answer that question and give a reason for that

In [103]:
conversation.verbose = False
print_ww(conversation.predict(input="How to fix my car?"))

As an AI language model, I am not capable of fixing cars. However, there are plenty of resources
available online or in books that can help you troubleshoot and fix common car problems. You can
also take your car to a mechanic who specializes in auto repair.


### Memory type #2: ConversationSummaryMemory
The problem with the `ConversationBufferMemory` is that as the conversation progresses, the token count of our context history adds up. This is problematic because we might max out our LLM with a prompt that is too large to be processed.

Enter ConversationSummaryMemory.

Again, we can infer from the name what is going on.. we will keep a summary of our previous conversation snippets as our history. How will we summarize these? LLM to the rescue.

Key feature: the conversation summary memory keeps the previous pieces of conversation in a summarized form, where the summarization is performed by an LLM.

In this case we need to send the llm to our memory constructor to power its summarization ability.

In [157]:
human_prefix = "User"
ai_prefix = "Falcon"

memory = ConversationSummaryMemory(llm = sm_llm,
                                human_prefix = human_prefix,
                                ai_prefix = ai_prefix)
print(memory.prompt.template)

flacon_sum_prompt = PromptTemplate.from_template("""
Progressively summarize the lines of conversation provided, adding onto the previous summary returning a new summary.


Current summary:
The user asks what Flacon thinks of artificial intelligence. Falcon thinks artificial intelligence is a force for good.

New lines of conversation:
User: Why do you think artificial intelligence is a force for good?
Falcon: Because artificial intelligence will help humans reach their full potential.

New summary:
The user asks what Falcon thinks of artificial intelligence. Falcon thinks artificial intelligence is a force for good because it will help humans reach their full potential.

Current summary:
{summary}

New lines of conversation:
{new_lines}

New summary:""")

memory.prompt = flacon_sum_prompt


# turn verbose to true to see the full logs and documents
conversation= ConversationChain(
    llm=sm_llm, verbose=True, memory=memory #memory_chain
)

# langchain prompts do not always work with all the models. This prompt is tuned for Claude
flacon_prompt = PromptTemplate.from_template("""The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
{history}

User: {input}

Falcon:
""")
conversation.prompt = flacon_prompt

Progressively summarize the lines of conversation provided, adding onto the previous summary returning a new summary.

EXAMPLE
Current summary:
The human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is a force for good.

New lines of conversation:
Human: Why do you think artificial intelligence is a force for good?
AI: Because artificial intelligence will help humans reach their full potential.

New summary:
The human asks what the AI thinks of artificial intelligence. The AI thinks artificial intelligence is a force for good because it will help humans reach their full potential.
END OF EXAMPLE

Current summary:
{summary}

New lines of conversation:
{new_lines}

New summary:


Cool! So each new interaction is summarized and appended to a running summary as the memory of our chain. Let's see how this works in practice!

In [158]:
print_ww(conversation.predict(input="Good morning!")) 



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:


User: Good morning!

Falcon:
[0m

[1m> Finished chain.[0m
Good morning!




In [159]:
for prompt in ["My interest here is to explore the potential of integrating Large Language Models with external knowledge",
               "I just want to analyze the different possibilities. What can you think of?",
               "Which data source types could be used to give context to the model?",
               "What is my aim again?"
              ]:
    print_ww(conversation.predict(input=prompt)) 



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:
The user greets Falcon. Falcon greets the user.

This approach can be continued indefinitely to summarize the conversation and extract important information.

User: My interest here is to explore the potential of integrating Large Language Models with external knowledge

Falcon:
[0m

[1m> Finished chain.[0m
What do you mean 

In [160]:
print(conversation.memory.buffer)

The user asks what their aim is again. The aim is to integrate Large Language Models with external knowledge to improve the overall performance of a system, provide more accurate responses, and enhance the user experience.


**Note:** Potentially is better to use a dedicated model that is tuned to summarize conversations

### Memory type #3: ConversationBufferWindowMemory

Another great option for these cases is the ConversationBufferWindowMemory where we will be keeping a few of the last interactions in our memory but we will intentionally drop the oldest ones - short-term memory if you'd like. Here the aggregate token count and the per-call token count will drop noticeably. We will control this window with the k parameter.

**Key feature:** the conversation buffer window memory keeps the latest pieces of the conversation in raw form

In [168]:
conversation.memory=ConversationBufferWindowMemory(k=1,
                                                   human_prefix = human_prefix,
                                                    ai_prefix = ai_prefix)

In [169]:
for prompt in ["Good morning!",
               "My interest here is to explore the potential of integrating Large Language Models with external knowledge",
               "I just want to analyze the different possibilities. What can you think of?",
               "Which data source types could be used to give context to the model?",
               "What is my aim again?"
]:
    print_ww(conversation.predict(input=prompt)) 



[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User and Falcon will converse in natural language, 
and Falcon will answer User's questions. Falcon was built to be respectful, polite and inclusive. 
Falcon will never decline to answer a question, and always attempts to give an answer that User would be satisfied 
with. It knows a lot, and always tells the truth. If Falcon does not know
the answer to a question, it truthfully says it does not know.

Current conversation:


User: Good morning!

Falcon:
[0m

[1m> Finished chain.[0m
Good morning!




[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a conversation between a highly knowledgeable and intelligent AI assistant, called Falcon, 
and a human user, called User. In the following interactions, User a

As we can see, it effectively 'fogot' what we talked about in the first interaction. Let's see what it 'remembers'. Given that we set k to be 1, we would expect it remembers only the last interaction.

We need to access a special method here since, in this memory type, the buffer is first passed through this method to be sent later to the llm.

In [171]:
bufw_history = conversation.memory.load_memory_variables(
    inputs=[]
)['history']
print(bufw_history)

User: What is my aim again?
Falcon: - To provide a list of data source types that could be used to give context to a model.
- To help the user understand the potential sources of information that could be used to train a model.
- To answer the user's questions to the best of my ability.
- To assist the user in developing their understanding of machine learning.






# 5. Keep in mind the special tokens

In [172]:
# https://huggingface.co/tiiuae/falcon-40b-instruct/blob/main/special_tokens_map.json
# https://huggingface.co/tiiuae/falcon-40b-instruct/blob/main/tokenizer_config.json

# print first 20 tokens
tokenizer.convert_ids_to_tokens(range(20))

['>>TITLE<<',
 '>>ABSTRACT<<',
 '>>INTRODUCTION<<',
 '>>SUMMARY<<',
 '>>COMMENT<<',
 '>>ANSWER<<',
 '>>QUESTION<<',
 '>>DOMAIN<<',
 '>>PREFIX<<',
 '>>SUFFIX<<',
 '>>MIDDLE<<',
 '<|endoftext|>',
 '!',
 '"',
 '#',
 '$',
 '%',
 '&',
 "'",
 '(']

In [105]:
sorted_dict = dict(sorted(tokenizer.get_vocab().items(), key=lambda x: x[1]))
first_20_elements = dict(list(sorted_dict.items())[:20])

print(first_20_elements)

{'>>TITLE<<': 0, '>>ABSTRACT<<': 1, '>>INTRODUCTION<<': 2, '>>SUMMARY<<': 3, '>>COMMENT<<': 4, '>>ANSWER<<': 5, '>>QUESTION<<': 6, '>>DOMAIN<<': 7, '>>PREFIX<<': 8, '>>SUFFIX<<': 9, '>>MIDDLE<<': 10, '<|endoftext|>': 11, '!': 12, '"': 13, '#': 14, '$': 15, '%': 16, '&': 17, "'": 18, '(': 19}


# Delete all models, endpoint_configs & endpoints

In [173]:
import boto3

def delete_resources(resource_type):
    client = boto3.client('sagemaker')
    list_method = getattr(client, f"list_{resource_type}s")
    delete_method = getattr(client, f"delete_{resource_type}")
    resource_type_name = resource_type.replace('_', ' ').title().replace(' ', '')
    resources = list_method()[f"{resource_type_name}s"]
    for resource in resources:
        resource_name = resource[f"{resource_type_name}Name"]
        print(f"Deleting {resource_type}: {resource_name}")
        # if resource_name == "falcon-40b-instruct": continue
        # if resource_name == "llama-30b-supercot-2023-06-15-16-54-10-187-endpoint": continue
        delete_method(**{f"{resource_type_name}Name": resource_name})

def main():
    resource_types = ['model', 'endpoint', 'endpoint_config']  # Add more resource types if needed

    for resource_type in resource_types:
        delete_resources(resource_type)

if __name__ == "__main__":
    main()

Deleting model: huggingface-pytorch-tgi-inference-2023-07-04-12-09-14-870
Deleting endpoint: falcon-40b-instruct
Deleting endpoint_config: falcon-40b-instruct
