In [33]:
import boto3
import json
import os

from langchain_aws.embeddings.bedrock import BedrockEmbeddings
from langchain_community.chat_models import BedrockChat
from langchain_community.document_loaders.generic import GenericLoader
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from typing import Optional

os.environ['LANGCHAIN_TRACING_V2'] = 'true'

Read Access keys for AWS services

In [14]:
json_file_path = 'keys.json'
with open(json_file_path, 'r') as j:
     credentials = json.loads(j.read())

for k, v in credentials.items():
     if not v:
          raise ValueError(f'Please, write your {k} in keys.json file')
     os.environ[k] = v

Collect external data (transcript files in docx format)

In [17]:
data_path = '../data'
file_names = [os.path.join(data_path, f) for f in os.listdir(data_path)]

In [18]:
loader = GenericLoader.from_filesystem(path=data_path, show_progress=True)
data = loader.load()
len(data)

  from .autonotebook import tqdm as notebook_tqdm
100%|██████████| 8/8 [00:16<00:00,  2.03s/it]


Split the documents 

In [20]:
splitter_param = {'chunk_size': 1000, 'chunk_overlap': 200, 'add_start_index': True}
text_splitter = RecursiveCharacterTextSplitter(**splitter_param)
all_splits = text_splitter.split_documents(data)
len(all_splits)

923

In [21]:
len(all_splits[0].page_content)

28

Create bedrock client

In [None]:
bedrock = boto3.client(service_name='bedrock-runtime')

Create embeddings and save to a folder

In [23]:
embeddings = BedrockEmbeddings(client=bedrock)

In [29]:
vectorstore_to_disk = Chroma.from_documents(all_splits, embeddings, persist_directory='./chroma_db')

Read embeddings

In [30]:
vectorstore = Chroma(persist_directory='./chroma_db', embedding_function=embeddings)

  warn_deprecated(


Create retriever

In [31]:
retriever = vectorstore.as_retriever(
    search_type='similarity', 
    search_kwargs={'k': 3},
)

Create model

In [34]:
model_kwargs =  { 
    'max_tokens': 2048,
    'temperature': 0.0,
    'top_k': 250,
    'top_p': 1,
    'stop_sequences': ['\n\nHuman'],
}

model_id = 'anthropic.claude-v2'

model = BedrockChat(
    client=bedrock,
    model_id=model_id,
    model_kwargs=model_kwargs
)

  warn_deprecated(


Create prompt

In [36]:
template = """
Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

Create final chain

In [37]:
def format_docs(docs):
    return '\n\n'.join(doc.page_content for doc in docs)


chain = (
    {'context': retriever | format_docs, 'question': RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

Test

In [38]:
question1 = 'Is Vodka popular the drink?'
question2 = 'Who is produce Vodka?'

In [48]:
for chunk in chain.stream(question1):
    print(chunk, end='', flush=True)

Based on the conversation, it seems that vodka is a popular liquor choice for canned cocktails and mixed drinks. A few key points:

- Speaker 3 notes that many canned cocktails and seltzers just advertise the brand name and you assume it's vodka, since vodka doesn't have a strong flavor. This implies vodka is commonly used.

- Speaker 1 says they try to stick to clear liquors like vodka and tequila to avoid hangovers. 

- Speaker 3 says they don't feel like vodkas taste very different from each other, especially once mixed in a drink. This suggests vodka is commonly used.

So in summary, yes, the speakers indicate that vodka seems to be a popular liquor used in canned and mixed drinks. Speaker 3 in particular notes that vodka is likely assumed in many canned cocktails and seltzers because it has a neutral flavor.

In [50]:
for chunk in chain.stream(question2):
    print(chunk, end='', flush=True)

Based on the context, it is not directly stated who produces the vodka mentioned in the passage. The speaker mentions that the region they are in (Niagara) produces amazing grapes and grain, and they use local ingredients in their collection of handcrafted spirits. But a specific producer of the vodka is not named.