In [1]:
!pip install --quiet langchain_community pymupdf langchain_postgres langchain_experimental open_clip_torch "psycopg[binary,pool]" psycopg2-binary langchain-aws faiss-cpu python-dotenv transformers

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.34.4 requires botocore==1.35.4, but you have botocore 1.34.162 which is incompatible.[0m[31m
[0m

In [1]:
from langchain_experimental.open_clip import OpenCLIPEmbeddings

from helpers.param_manager import get_param_manager
from helpers.vectorstore import update_vectorstore, get_vectorstore_retriever
from helpers.chat import get_bedrock_llm, get_initial_student_query, get_student_query, create_dynamodb_history_table, get_response

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


## 1. Calling the 'update_vectorstore' function to update the embeddings in the vectorstore

In [2]:
# Step 1) Specify the new bucket and course, along with the multimodal embeddings function (currently, OpenCLIP is the best)
bucket = 'aila-data-ingestion-bucket'
course = 'cpsc210-kb'
embeddings = OpenCLIPEmbeddings()

# Step 2) Use AWS Secrets Manager config for the RDS secret
secret_name = "credentials/rdsDbCredential"
param_manager = get_param_manager()

# Step 3) Get RDS secrets to get connection parameters
db_secret = param_manager.get_secret(secret_name)

# Step 4) Construct the vectorstore configuration dictionary
vectorstore_config_dict = {
    'collection_name': 'CPSC_210XYZ',
    'dbname': db_secret["dbname"],
    'user': db_secret["username"],
    'password': db_secret["password"],
    'host': db_secret["host"],
    'port': db_secret["port"]
}

# Step 5) Call the function! The function returns nothing, so you could call it like this:

_ = update_vectorstore(
    bucket=bucket,
    course=course,
    vectorstore_config_dict=vectorstore_config_dict,
    embeddings=embeddings
)

INFO:root:Loaded ViT-H-14 model config.
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


No named profile found, running default session
AWS session and clients initialized successfully.
Got param manager for region: None


INFO:helpers.helper:Connecting to the database
INFO:helpers.helper:Connected to the database
INFO:helpers.helper:Initializing the VectorStore
INFO:helpers.helper:VectorStore initialized
Processing pages:   0%|          | 0/8 [00:00<?, ?it/s]
Storing chunks for cpsc210-kb/module/documents/CPSC_210_-_Control-Flow_Models.pdf_page_1.txt:   0%|          | 0/2 [00:00<?, ?it/s][A
Storing chunks for cpsc210-kb/module/documents/CPSC_210_-_Control-Flow_Models.pdf_page_1.txt:  50%|█████     | 1/2 [00:00<00:00,  2.49it/s][A
Storing chunks for cpsc210-kb/module/documents/CPSC_210_-_Control-Flow_Models.pdf_page_1.txt: 100%|██████████| 2/2 [00:00<00:00,  2.60it/s][A
Processing pages:  12%|█▎        | 1/8 [00:06<00:43,  6.24s/it]
Storing chunks for cpsc210-kb/module/documents/CPSC_210_-_Control-Flow_Models.pdf_page_2.txt:   0%|          | 0/2 [00:00<?, ?it/s][A
Storing chunks for cpsc210-kb/module/documents/CPSC_210_-_Control-Flow_Models.pdf_page_2.txt:  50%|█████     | 1/2 [00:00<00:00,  3.05it/s

Indexing updates: 
 {'num_added': 164, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 164}


INFO:processing.figures:Adding image galaxy.jpg to the vectorstore
INFO:processing.figures:Successfully added image galaxy.jpg to the vectorstore


## 2. Calling the 'get_vectorstore_retriever' function to use for conversations

In [2]:
# Step 1) Specify the Bedrock LLM ID and the multimodal embeddings function (currently, OpenCLIP is the best)
bedrock_llm_id = "meta.llama3-70b-instruct-v1:0"
embeddings = OpenCLIPEmbeddings()

# Step 2) Obtain the Bedrock LLM from the get_bedrock_llm function
llm = get_bedrock_llm(bedrock_llm_id=bedrock_llm_id)

# Step 3) Use AWS Secrets Manager config for the RDS secret
secret_name = "credentials/rdsDbCredential"
param_manager = get_param_manager()

# Step 4) Get RDS secrets to get connection parameters
db_secret = param_manager.get_secret(secret_name)

# Step 5) Construct the vectorstore configuration dictionary
vectorstore_config_dict = {
    'collection_name': 'CPSC_210',
    'dbname': db_secret["dbname"],
    'user': db_secret["username"],
    'password': db_secret["password"],
    'host': db_secret["host"],
    'port': db_secret["port"]
}

# Step 6) Call the function! The function returns the history_aware_retriever, so you can call it like this:
history_aware_retriever = get_vectorstore_retriever(
    llm=llm,
    vectorstore_config_dict=vectorstore_config_dict,
    embeddings=embeddings
)

INFO:root:Loaded ViT-H-14 model config.
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:helpers.helper:Connecting to the database


No named profile found, running default session
AWS session and clients initialized successfully.
Got param manager for region: None


INFO:helpers.helper:Connected to the database
INFO:helpers.helper:Initializing the VectorStore
INFO:helpers.helper:VectorStore initialized


## 3. Starting a conversation

In [3]:
# Step 1) Define the topic for which the student's knowledge needs to be assessed
topic = "data abstraction"

# Step 2) Get the initial student query based on the specified topic. This query will trigger the first message shown to the student when they begin the chat
student_query = get_initial_student_query(topic=topic)

# Step 3) Obtain the Bedrock LLM by specifying its ID
bedrock_llm_id = "meta.llama3-70b-instruct-v1:0"
llm = get_bedrock_llm(bedrock_llm_id=bedrock_llm_id)

# Step 4) Use AWS Secrets Manager to retrieve the RDS secret
secret_name = "credentials/rdsDbCredential"
param_manager = get_param_manager()

# Step 5) Get the RDS secrets, which include the connection parameters needed to interact with the database.
db_secret = param_manager.get_secret(secret_name)

# Step 6) Construct the vectorstore configuration dictionary using the retrieved RDS secrets, and then obtain the history-aware retriever for processing the student's queries
vectorstore_config_dict = {
    'collection_name': 'CPSC_210',
    'dbname': db_secret["dbname"],
    'user': db_secret["username"],
    'password': db_secret["password"],
    'host': db_secret["host"],
    'port': db_secret["port"]
}

embeddings = OpenCLIPEmbeddings()

history_aware_retriever = get_vectorstore_retriever(
    llm=llm,
    vectorstore_config_dict=vectorstore_config_dict,
    embeddings=embeddings
)

# Step 7) Define the DynamoDB table name and session ID
table_name = "SessionTable3x"
session_id = "Test-Sean2"

# Step 8) Create the DynamoDB table to store the session history if it doesn't already exist.
# NOTE: In case the table already exists, this function just skips the table creation process, so you don't have to worry about running into an error if you call it and the table already exists).
create_dynamodb_history_table(table_name=table_name)

# Step 9) Execute the main function to get a response for the student's query, using all the components defined above
get_response(
    query=student_query,
    topic=topic,
    llm=llm,
    history_aware_retriever=history_aware_retriever,
    table_name=table_name,
    session_id=session_id
)


INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:root:Loaded ViT-H-14 model config.


Got param manager for region: None


INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
INFO:helpers.helper:Connecting to the database
INFO:helpers.helper:Connected to the database
INFO:helpers.helper:Initializing the VectorStore
INFO:helpers.helper:VectorStore initialized


{'llm_output': "Hello! I'm excited to help you master the topic of data abstraction.\n\nData abstraction is a fundamental concept in computer science, and I'm here to guide you through it. To get started, can you tell me what you think data abstraction is and why it's important in programming?",
 'llm_verdict': False}

## 4. Continuing a conversation

In [4]:
# Step 1) Get the structured student query from their raw query
raw_query = """


"""
student_query = get_student_query(raw_query)

# The rest of the stuff remains the same, so, just call get_response() again
get_response(
    query=student_query,
    topic=topic,
    llm=llm,
    history_aware_retriever=history_aware_retriever,
    table_name=table_name,
    session_id=session_id
)

{'llm_output': "It seems like you didn't provide an answer. That's okay! Let me try to help you get started.\n\nData abstraction is a concept in programming that allows us to show only the necessary information to the outside world while hiding the internal details. It's like a car, for example. You don't need to know how the engine works to drive the car; you only need to know how to interact with it (e.g., press the accelerator, brake, and steer).\n\nCan you think of a simple example in programming where data abstraction is useful?",
 'llm_verdict': False}

In [11]:
# Here's how you can continue calling this function

# Step 1) Get the structured student query from their raw query
raw_query = """
Abstraction simplifies interaction with objects by exposing only the necessary features, while encapsulation protects the object’s internal state and ensures that it is accessed and modified in a controlled manner.

"""
student_query = get_student_query(raw_query)

# The rest of the stuff remains the same, so, just call get_response() again
get_response(
    query=student_query,
    topic=topic,
    llm=llm,
    history_aware_retriever=history_aware_retriever,
    table_name=table_name,
    session_id=session_id
)

{'llm_output': "That's absolutely right! You've correctly distinguished between abstraction and encapsulation. Abstraction focuses on exposing only the necessary features, while encapsulation is about protecting the internal state and controlling access to it.\n\nHere's a question to delve deeper: How do abstract data types (ADTs) relate to data abstraction, and what are some common examples of ADTs in programming?",
 'llm_verdict': False}

In [12]:
raw_query = """
Abstract Data Types (ADTs) in programming promote data abstraction by separating the interface from the implementation.
This means that you can define and use data types based on their behavior rather than knowing their implementation details.
Examples include Lists in Python and how you can use built-in methods like len, index, remove, pop, etc to perform functions on a List.
"""
student_query = get_student_query(raw_query)

get_response(
    query=student_query,
    topic=topic,
    llm=llm,
    history_aware_retriever=history_aware_retriever,
    table_name=table_name,
    session_id=session_id
)

{'llm_output': "That's spot on! You've correctly explained how ADTs promote data abstraction by separating the interface from the implementation, allowing you to focus on the behavior rather than the implementation details. And you've provided a great example of Lists in Python, which is a classic ADT.\n\nHere's a question to further solidify your understanding: How do interfaces in object-oriented programming languages, such as Java or C#, relate to abstract data types and data abstraction?",
 'llm_verdict': False}

In [None]:
# Here's how you can continue calling this function

# Step 1) Get the structured student query from their raw query
raw_query = """
Is this the real life? Is this just fantasy?
Caught in a landslide, no escape from reality.
"""
student_query = get_student_query(raw_query)

# The rest of the stuff remains the same, so, just call get_response() again
get_response(
    query=student_query,
    topic=topic,
    llm=llm,
    history_aware_retriever=history_aware_retriever,
    table_name=table_name,
    session_id=session_id
)

In [6]:
# Here's how you can continue calling this function

# Step 1) Get the structured student query from their raw query
raw_query = """
Nah, I don't really want to study anymore.
"""
student_query = get_student_query(raw_query)

# The rest of the stuff remains the same, so, just call get_response() again
get_response(
    query=student_query,
    topic=topic,
    llm=llm,
    history_aware_retriever=history_aware_retriever,
    table_name=table_name,
    session_id=session_id
)

{'llm_output': "It can be tough to stay motivated when studying a new concept. But don't worry, I'm here to help make it more engaging and fun!\n\nLet's take a break from the questions and do something different. I'll share a fun fact about data abstraction, and then we can come back to the questions when you're ready.\n\nHere's the fun fact:\n\nDid you know that data abstraction is used in many real-world systems, such as banking and finance? For example, when you use an ATM to withdraw cash, the ATM doesn't need to know the internal details of your bank account. It only needs to interact with the account through a public interface, which is an example of data abstraction in action!\n\nFeel free to take a break, and when you're ready, we can continue exploring data abstraction together.",
 'llm_verdict': False}