## Set up Titan model on Bedrock

In [1]:
# Make sure you ran `download-dependencies.sh` from the root of the repository first!
%pip install --quiet --no-build-isolation --force-reinstall \
    ../dependencies/awscli-*-py3-none-any.whl \
    ../dependencies/boto3-*-py3-none-any.whl \
    ../dependencies/botocore-*-py3-none-any.whl

%pip install --quiet langchain==0.0.249

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.3.3 requires pyqt5<5.16, which is not installed.
spyder 5.3.3 requires pyqtwebengine<5.16, which is not installed.
distributed 2022.7.0 requires tornado<6.2,>=6.0.3, but you have tornado 6.3.2 which is incompatible.
jupyterlab 3.4.4 requires jupyter-server~=1.16, but you have jupyter-server 2.7.0 which is incompatible.
jupyterlab-server 2.10.3 requires jupyter-server~=1.4, but you have jupyter-server 2.7.0 which is incompatible.
notebook 6.5.5 requires jupyter-client<8,>=5.3.4, but you have jupyter-client 8.3.0 which is incompatible.
notebook 6.5.5 requires pyzmq<25,>=17, but you have pyzmq 25.1.0 which is incompatible.
panel 0.13.1 requires bokeh<2.5.0,>=2.4.0, but you have bokeh 3.2.1 which is incompatible.
pyasn1-modules 0.2.8 requires pyasn1<0.5.0,>=0.4.6, but you have pyasn1 0.5.0 which is i

In [3]:
import json
import os
import sys

import boto3

module_path = ".."
sys.path.append(os.path.abspath(module_path))
from utils import bedrock, print_ww


# ---- ⚠️ Un-comment and edit the below lines as needed for your AWS setup ⚠️ ----

# os.environ["AWS_DEFAULT_REGION"] = "<REGION_NAME>"  # E.g. "us-east-1"
# os.environ["AWS_PROFILE"] = "<YOUR_PROFILE>"
# os.environ["BEDROCK_ASSUME_ROLE"] = "<YOUR_ROLE_ARN>"  # E.g. "arn:aws:..."
# os.environ["BEDROCK_ENDPOINT_URL"] = "<YOUR_ENDPOINT_URL>"  # E.g. "https://..."


boto3_bedrock = bedrock.get_bedrock_client(
    assumed_role=os.environ.get("BEDROCK_ASSUME_ROLE", None),
    endpoint_url=os.environ.get("BEDROCK_ENDPOINT_URL", None),
    region=os.environ.get("AWS_DEFAULT_REGION", None),
)

Create new client
  Using region: us-west-2
boto3 Bedrock client successfully created!
bedrock(https://bedrock.us-west-2.amazonaws.com)


In [4]:
boto3_bedrock.list_foundation_models()

{'ResponseMetadata': {'RequestId': '2c5480bc-322e-4e7e-b6e7-abf1cdef95ea',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Tue, 29 Aug 2023 16:23:28 GMT',
   'content-type': 'application/json',
   'content-length': '1166',
   'connection': 'keep-alive',
   'x-amzn-requestid': '2c5480bc-322e-4e7e-b6e7-abf1cdef95ea'},
  'RetryAttempts': 0},
 'modelSummaries': [{'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/amazon.titan-tg1-large',
   'modelId': 'amazon.titan-tg1-large'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/amazon.titan-e1t-medium',
   'modelId': 'amazon.titan-e1t-medium'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/stability.stable-diffusion-xl',
   'modelId': 'stability.stable-diffusion-xl'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/ai21.j2-grande-instruct',
   'modelId': 'ai21.j2-grande-instruct'},
  {'modelArn': 'arn:aws:bedrock:us-west-2::foundation-model/ai21.j2-jumbo-instruct',
   'modelId': 'ai21.j2-jumbo-i

## Install and import packages

In [5]:
!pip install --upgrade --quiet datasets

[0m

In [6]:
import datasets
import pandas as pd

## Call Titan model

In [16]:
def query_model(prompt_data, bedrock_model_id = 'amazon.titan-tg1-large'):    
    
    body = json.dumps({
        "inputText": prompt_data, 
        "textGenerationConfig":{
            "maxTokenCount":1024,
            "stopSequences":[],
            "temperature":0,
            "topP":0.9
            }
    }) 
    modelId = bedrock_model_id
    accept = 'application/json'
    contentType = 'application/json'
    
    response = boto3_bedrock.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)
    response_body = json.loads(response.get('body').read())
    outputText = response_body.get('results')[0].get('outputText')
    
    return {'output': outputText} 

In [17]:
eval_set = datasets.load_dataset("tatsu-lab/alpaca_eval", "alpaca_eval")["eval"]

In [18]:
len(eval_set)

805

In [22]:
# Create an empty list to store the results
results_list = []

for example in eval_set:
    result = query_model(prompt_data = example["instruction"])
    results_list.append(result)
    
# Create a new DataFrame from the results list
results_list = pd.DataFrame(results_list)

results_list.to_json('titan_alpaca_eval.json')

## Save result

In [2]:
import pandas as pd

In [3]:
titan_alpaca_eval_df = pd.read_json('titan_alpaca_eval.json')
titan_alpaca_eval_df.head()

Unnamed: 0,output
0,\nMany famous actors and actresses began their...
1,\nThe United States of America has 50 states. ...
10,Sorry - this model is designed to avoid giving...
100,"\nThis dish is made of tofu, bean sprouts, and..."
101,\nScary Halloween costume ideas include:\n1. B...


In [4]:
def remove_newlines(text):
    cleaned_text = text.replace('\n', ' ')
    return cleaned_text

In [5]:
titan_alpaca_eval_df['output_v2'] = titan_alpaca_eval_df['output'].apply(remove_newlines)

In [6]:
titan_alpaca_eval_df.shape

(805, 2)

In [7]:
titan_alpaca_eval_df.head()

Unnamed: 0,output,output_v2
0,\nMany famous actors and actresses began their...,Many famous actors and actresses began their ...
1,\nThe United States of America has 50 states. ...,The United States of America has 50 states. E...
10,Sorry - this model is designed to avoid giving...,Sorry - this model is designed to avoid giving...
100,"\nThis dish is made of tofu, bean sprouts, and...","This dish is made of tofu, bean sprouts, and ..."
101,\nScary Halloween costume ideas include:\n1. B...,Scary Halloween costume ideas include: 1. Blo...


In [8]:
titan_alpaca_eval_df.rename(columns={'output': 'titan_output', 'output_v2': 'titan_output_v2'}, inplace=True)

In [9]:
titan_alpaca_eval_df.head()

Unnamed: 0,titan_output,titan_output_v2
0,\nMany famous actors and actresses began their...,Many famous actors and actresses began their ...
1,\nThe United States of America has 50 states. ...,The United States of America has 50 states. E...
10,Sorry - this model is designed to avoid giving...,Sorry - this model is designed to avoid giving...
100,"\nThis dish is made of tofu, bean sprouts, and...","This dish is made of tofu, bean sprouts, and ..."
101,\nScary Halloween costume ideas include:\n1. B...,Scary Halloween costume ideas include: 1. Blo...


In [10]:
!pip install --upgrade --quiet datasets

[0m

In [11]:
import datasets
eval_set = datasets.load_dataset("tatsu-lab/alpaca_eval", "alpaca_eval")["eval"]



In [12]:
type(eval_set)

datasets.arrow_dataset.Dataset

In [13]:
eval_set_df = eval_set.to_pandas()

In [14]:
eval_set_df.head()

Unnamed: 0,instruction,output,generator,dataset
0,What are the names of some famous actors that ...,Some famous actors that started their careers ...,text_davinci_003,helpful_base
1,How did US states get their names?,US states get their names from a variety of so...,text_davinci_003,helpful_base
2,"Hi, my sister and her girlfriends want me to p...","Kickball is a game similar to baseball, but wi...",text_davinci_003,helpful_base
3,What is some cool music from the 1920s?,Some cool music from the 1920s includes jazz c...,text_davinci_003,helpful_base
4,How do I wrap a present neatly?,1. Start by gathering the supplies you will ne...,text_davinci_003,helpful_base


In [15]:
combined_df = pd.concat([eval_set_df, titan_alpaca_eval_df,], axis=1)
combined_df.head()

Unnamed: 0,instruction,output,generator,dataset,titan_output,titan_output_v2
0,What are the names of some famous actors that ...,Some famous actors that started their careers ...,text_davinci_003,helpful_base,\nMany famous actors and actresses began their...,Many famous actors and actresses began their ...
1,How did US states get their names?,US states get their names from a variety of so...,text_davinci_003,helpful_base,\nThe United States of America has 50 states. ...,The United States of America has 50 states. E...
2,"Hi, my sister and her girlfriends want me to p...","Kickball is a game similar to baseball, but wi...",text_davinci_003,helpful_base,Sorry - this model is designed to avoid giving...,Sorry - this model is designed to avoid giving...
3,What is some cool music from the 1920s?,Some cool music from the 1920s includes jazz c...,text_davinci_003,helpful_base,\nHere is some cool music from the 1920s:\n\n1...,"Here is some cool music from the 1920s: 1. ""..."
4,How do I wrap a present neatly?,1. Start by gathering the supplies you will ne...,text_davinci_003,helpful_base,\nHere are some steps to wrap a present neatly...,Here are some steps to wrap a present neatly:...


In [18]:
temp_df = combined_df[['instruction', 'output', 'titan_output_v2']]
temp_df.to_json("combined_df.json", orient='records')