# Open-Source only RAG for Testing Scenario Creation

- Sentence Transformers as embedding model
- Postgres as vector store
- Llama.cpp as LLM

#### Sentence Transformers

In [1]:
%pip install llama-index-readers-file pymupdf
%pip install llama-index-vector-stores-postgres
%pip install llama-index-embeddings-huggingface
%pip install llama-index-llms-llama-cpp

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
# sentence transformers
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en")

  from .autonotebook import tqdm as notebook_tqdm


#### Llama CPP

In [3]:
%pip install llama-cpp-python

Note: you may need to restart the kernel to use updated packages.


In [4]:
from llama_index.llms.llama_cpp import LlamaCPP

# model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
model_url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF/resolve/main/llama-2-13b-chat.Q4_0.gguf"

llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=3900,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": 1},
    verbose=True,
)

llama_model_loader: loaded meta data with 19 key-value pairs and 363 tensors from /Users/busraoguzoglu/Library/Caches/llama_index/models/llama-2-13b-chat.Q4_0.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 5120
llama_model_loader: - kv   4:                          llama.block_count u32              = 40
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 13824
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:         

#### Postgres

In [5]:
%pip install psycopg2-binary pgvector asyncpg "sqlalchemy[asyncio]" greenlet

Note: you may need to restart the kernel to use updated packages.


In [6]:
import psycopg2

# Define your database connection parameters
db_name = "rag_db"  # Use the existing database name here
host = "localhost"
password = "password"  # Replace with your actual PostgreSQL password
port = "5432"  # Default PostgreSQL port
user = "myuser"  # Replace with your actual PostgreSQL username

# Connect directly to the 'rag_db' database
conn = psycopg2.connect(
    dbname=db_name,  # Connect directly to 'rag_db'
    host=host,
    password=password,
    port=port,
    user=user,
)
conn.autocommit = True

# Optionally, perform any operations on 'rag_db' using a cursor
with conn.cursor() as c:
    # Drop and create operations are not needed if the database already exists
    # Here you can perform other database setup actions if necessary
    print(f"Connected to {db_name} successfully.")

# Close the connection when done
#conn.close()

Connected to rag_db successfully.


Create table (checks if exist, truncates if already exist and generates again)

In [7]:
from sqlalchemy import create_engine, text
from sqlalchemy.exc import OperationalError
from llama_index.vector_stores.postgres import PGVectorStore

# Connection parameters
db_params = {
    "user": "myuser",      # Replace with your actual username
    "password": "password",  # Replace with your actual password
    "host": "localhost",   # Adjust if your database is hosted elsewhere
    "port": "5432",        # Default PostgreSQL port
    "database": "rag_db"   # Replace with your actual database name
}

# Create SQLAlchemy engine
engine = create_engine(f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['database']}")

# Update table name to include 'data_' prefix
table_name = "data_software_test_table"  # Postgre adds 'data' in front of table_name
embed_dim = 384  # Embedding dimension

try:
    with engine.connect() as conn:
        # Check if the table exists
        result = conn.execute(text(f"""
            SELECT EXISTS (
                SELECT FROM information_schema.tables 
                WHERE table_schema = 'public' AND table_name = :table_name
            );
        """), {"table_name": table_name}).scalar()

        if result:
            print(f"Table '{table_name}' exists. Cleaning it...")
            conn.execute(text(f"TRUNCATE TABLE {table_name};"))
        else:
            print(f"Table '{table_name}' does not exist. Creating it...")

        # Create a new table for vector store
        vector_store = PGVectorStore.from_params(
            database=db_params["database"],
            host=db_params["host"],
            password=db_params["password"],
            port=db_params["port"],
            user=db_params["user"],
            table_name="pmc_table",  # Use unprefixed name; PGVectorStore adds 'data_' automatically
            embed_dim=embed_dim,
        )
        print(f"Table '{table_name}' has been created.")
except OperationalError as e:
    print(f"Error: {e}")
    print("Make sure the database exists and connection parameters are correct.")

Table 'data_software_test_table' does not exist. Creating it...
Table 'data_software_test_table' has been created.


## Ingestion Pipeline

### Data Loading and Preprocessing:

Preprocessing of the document does this:

1- Remove all text after the stopwords, like 'References', because they do not have any information for us.

2- Remove section numbering, letters, extra spaces, new lines.

3- Remove 'tables' from the text.

In [8]:
from pathlib import Path
from llama_index.readers.file import PyMuPDFReader
from llama_index.core.node_parser import SentenceSplitter

## Preprocessing 

### HTML Preprocessing

In [9]:
from bs4 import BeautifulSoup
import re
from pathlib import Path

def extract_text_and_elements(html_file_path):
    """Extract meaningful information from static HTML."""
    # Load the HTML content from the file
    with open(html_file_path, "r", encoding="utf-8") as file:
        html_content = file.read()
    
    # Parse the HTML content
    soup = BeautifulSoup(html_content, "html.parser")
    
    # Extract the title of the page
    title = soup.title.string.strip() if soup.title else "No Title"
    
    # Extract meta description (if available)
    meta_description = ""
    meta_tag = soup.find("meta", attrs={"name": "description"})
    if meta_tag and meta_tag.get("content"):
        meta_description = meta_tag["content"]
    
    # Extract all visible text (paragraphs, headings, etc.)
    text_content = []
    for tag in soup.find_all(["p", "h1", "h2", "h3", "h4", "h5", "h6"]):  # Common text tags
        text = tag.get_text(strip=True)
        if text:
            text_content.append(text)
    
    # Extract all button labels
    buttons = [button.get_text(strip=True) for button in soup.find_all("button")]
    
    # Extract all links (text and URLs)
    links = []
    for link in soup.find_all("a", href=True):
        link_text = link.get_text(strip=True)
        links.append({"text": link_text, "url": link["href"]})
    
    # Structure the extracted information
    extracted_content = {
        "title": title,
        "meta_description": meta_description,
        "text_content": "\n".join(text_content),
        "buttons": buttons,
        "links": links,
    }
    
    return extracted_content

def preprocess_for_rag(content):
    """Preprocess extracted HTML content for use in a RAG system."""
    # Combine title, meta description, text content, and button labels
    combined_content = (
        f"Title: {content['title']}\n\n"
        f"Meta Description: {content['meta_description']}\n\n"
        f"Text Content:\n{content['text_content']}\n\n"
        f"Buttons:\n{', '.join(content['buttons'])}\n\n"
        f"Links:\n"
    )
    # Add links in a readable format
    for link in content["links"]:
        combined_content += f"- {link['text']} ({link['url']})\n"
    
    return combined_content

# Input and Output Paths
html_file_path = Path("./data/imdbhtml.html")  # Replace with your actual HTML file path
output_path = Path("./processed_html.txt")  # File to save processed output

# Extract and preprocess the HTML content
extracted_content = extract_text_and_elements(html_file_path)
processed_content = preprocess_for_rag(extracted_content)

# Write the processed content into an output file
with output_path.open("w", encoding="utf-8") as output_file:
    output_file.write(processed_content)

print(f"Processed HTML content written to {output_path}")

Processed HTML content written to processed_html.txt


### pdf and txt preprocessing

In [10]:
import re
from pathlib import Path
from bs4 import BeautifulSoup  # For HTML parsing
from llama_index.readers.file import PyMuPDFReader

# Define stop keywords
stop_keywords = []

def preprocess_text(text, stop_keywords):
    """Preprocess text by stopping at the earliest occurrence of any keyword, ignoring case."""
    earliest_position = len(text)  # Default to the end of the text
    
    for keyword in stop_keywords:
        pattern = rf"\b{re.escape(keyword)}\b"  # Match exact word boundaries
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            print(f"Keyword '{keyword}' found at position {match.start()}")  # Debugging
            earliest_position = min(earliest_position, match.start())
    
    # Truncate the text at the earliest keyword match
    return text[:earliest_position]

def clean_text(block_text):
    """Clean and process individual blocks of text."""
    block_text = " ".join(block_text.split())  # Remove extra spaces and newlines
    block_text = re.sub(r'^\d+\.\s+', '', block_text)  # Remove section numbering
    block_text = re.sub(r'[^A-Za-z0-9\s,.]', '', block_text)  # Keep only basic punctuation and letters

    # Heuristic: Skip blocks that appear to be tables (e.g., many numbers or special formats)
    if len(block_text) < 10 or re.match(r'Page \d+', block_text):
        return None  # Skip short blocks or page numbers

    # Remove blocks that have a high density of numbers or specific patterns (tables)
    if re.search(r'\d{2,}', block_text) and re.search(r'(\d+\.\d+|\d+%)', block_text):
        return None  # Skip blocks with many numeric values

    # Remove blocks with common table headers like 'Table', 'Mediation Effect', etc.
    if re.search(r'\b(Table|Effect|Mediation|Summary|IV|Mediator|Cont)\b', block_text, re.IGNORECASE):
        return None

    return block_text

In [41]:
# Paths to input files
pdf_file_path = Path("./data/IMPb_Software_Design_Document.pdf")  
pdf_file_path_coding = Path("./data/Playwright_Coding_Document.pdf")  

pdf_file_paths = [pdf_file_path, pdf_file_path_coding]

html_file_path = Path("./data/processed_html.txt")  # Replace with your actual HTML file path
output_path = Path("./processed_documents.txt")  # File to save processed documents

In [42]:
documents = []

for pdf_file_path in pdf_file_paths:
    loader = PyMuPDFReader()
    loaded_docs = loader.load(file_path=str(pdf_file_path))
    print(f"{pdf_file_path.name}: {len(loaded_docs)} documents loaded")

    for doc_idx, doc in enumerate(loaded_docs):
        processed_text = preprocess_text(doc.text, stop_keywords)  # Use `doc.page_content` for text

        cleaned_blocks = []
        for block in processed_text.split("\n"):
            cleaned_block = clean_text(block)
            if cleaned_block:
                cleaned_blocks.append(cleaned_block)

        final_text = "\n".join(cleaned_blocks)

        # Append a dictionary to the `documents` list
        documents.append({
            "source": pdf_file_path.name,
            "content": final_text
        })

# Process the plain text file
with open(html_file_path, "r", encoding="utf-8") as txt_file:
    txt_content = txt_file.read()

# Preprocess the text
processed_text = preprocess_text(txt_content, stop_keywords)

cleaned_blocks = []
for block in processed_text.split("\n"):
    cleaned_block = clean_text(block)
    if cleaned_block:
        cleaned_blocks.append(cleaned_block)

final_text = "\n".join(cleaned_blocks)

# Append the processed text file to the documents list
documents.append({
    "source": html_file_path.name,
    "content": final_text
})

# Write the processed text into the output file
with output_path.open("w", encoding="utf-8") as output_file:
    for idx, doc in enumerate(documents):
        output_file.write(f"Document {idx} from {doc['source']}:\n")
        output_file.write(doc['content'] + "\n")
        output_file.write("=" * 80 + "\n")  # Separator between documents

print(f"Processed documents written to {output_path}")


IMPb_Software_Design_Document.pdf: 7 documents loaded
Playwright_Coding_Document.pdf: 6 documents loaded
Processed documents written to processed_documents.txt


In [43]:
print(documents[2])
print(len(documents))

{'source': 'IMPb_Software_Design_Document.pdf', 'content': 'Requirements\n3.1 Functional Requirements\nComprehensive Search Functionality\nAdvanced keyword and phrase search with fulltext indexing.\nAutosuggestions based on search context and user history.\nMultifaceted filtering options e.g., genre, release year, language.\nPredictive search for incomplete or misspelled queries.\nDetailed Information Pages\nRich metadata for movies, TV shows, and celebrities.\nFeatures such as trailers, image galleries, user reviews, and critic ratings.\nRealtime updates for box office performance and award statistics.\nLinks to external streaming platforms or purchase options.\nUser Account Features\nUser registration, loginlogout, and multifactor authentication.\nPersonalized profiles with privacy settings and activity logs.\nOption to link social media accounts for seamless sharing.\nWatchlist and Favorites Management\nDynamic watchlist creation and management with category support.\nAbility to sha

### Splitting Documents

In [44]:
from llama_index.core.node_parser import SentenceSplitter

text_parser = SentenceSplitter(
    chunk_size=1024,
    # separator=" ",
)

In [45]:
text_chunks = []
# Maintain relationship with source doc index to help inject doc metadata
doc_idxs = []

for doc_idx, doc in enumerate(documents):
    # Use dictionary-style indexing to access 'content'
    cur_text_chunks = text_parser.split_text(doc['content'])
    
    # Optionally print chunk info for debugging
    # print(f"Document {doc_idx} has {len(cur_text_chunks)} chunks")
    
    text_chunks.extend(cur_text_chunks)
    doc_idxs.extend([doc_idx] * len(cur_text_chunks))  # Maintain mapping between chunks and their original document

In [46]:
print(len(text_chunks))

17


### Construct Nodes from Text Chunks

In [47]:
from llama_index.core.schema import TextNode

nodes = []

for idx, text_chunk in enumerate(text_chunks):
    node = TextNode(
        text=text_chunk,
        metadata={
            "source": documents[doc_idxs[idx]]["source"],  # Access 'source' from the dictionary
            "doc_index": doc_idxs[idx]  # Optional: document index
        }
    )
    nodes.append(node)

In [48]:
nodes[1]

TextNode(id_='7e90ac9d-cd85-478a-b777-a145c1136ff8', embedding=None, metadata={'source': 'IMPb_Software_Design_Document.pdf', 'doc_index': 1}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text='The purpose of this document is to outline the software design for the IMPb web platform.\nIMPb is a widelyused online database that provides comprehensive information about\nmovies, TV shows, actors, production crew, trailers, and usergenerated reviews. This\ndocument serves as a blueprint for the development, maintenance, scalability, and\nintegration of advanced features into the platform.\nThe IMPb web platform allows users to\nSearch for and browse movies, TV shows, celebrity profiles, and production details.\nView detailed information about titles, including trailers, cast, crew, reviews, ratings,\nand box office performance.\nCreate user accounts to rate, review, and discuss titles, create wa

### Generate Embeddings for each Node

Using sentence_transformers

In [49]:
for node in nodes:
    node_embedding = embed_model.get_text_embedding(
        node.get_content(metadata_mode="text")
    )
    node.embedding = node_embedding

In [50]:
print(type(nodes[0].embedding))
print(nodes[0].embedding)
print(len(nodes))

<class 'list'>
[-0.06500004976987839, -0.026631640270352364, -0.0065530091524124146, -0.04911062866449356, 0.002144655678421259, -0.002839255379512906, -0.010860219597816467, 0.03735371306538582, -0.015071369707584381, 1.5128996892599389e-05, 0.04870150610804558, -0.03448118641972542, 0.005861581768840551, -0.010402941145002842, -0.0122929522767663, 0.03205455094575882, -0.016111919656395912, -0.008281818591058254, 0.023431234061717987, 0.05985504761338234, 0.02716049551963806, -0.003940812312066555, -0.012262781150639057, -0.041150376200675964, 0.017546379938721657, 0.0632588118314743, -0.043382540345191956, -0.020719710737466812, -0.040972527116537094, -0.2025124579668045, 0.008221041411161423, -0.013206740841269493, 0.02134670503437519, 0.011414052918553352, 0.008081169798970222, -0.013245726935565472, -0.0011889674933627248, 0.03756382688879967, -0.044760171324014664, -0.0247772429138422, -0.0035183823201805353, 0.02039165422320366, -0.00552359176799655, -0.03486882522702217, -0.00

### Load Nodes into a Vector Store

We now insert these nodes into our `PostgresVectorStore`.

In [51]:
vector_store.add(nodes)

['c9f7c380-2456-4f99-83df-27b3583275f7',
 '7e90ac9d-cd85-478a-b777-a145c1136ff8',
 '93102df6-cbff-452b-9669-45370ae2fabd',
 'c552267f-bbe0-49b0-a398-a0c1395fceee',
 '6edb2474-a182-4a8e-97f0-1a197fb13f65',
 'b210ba90-f3eb-46bc-865d-19e22888a335',
 '5bc9d28f-19f0-411e-b77d-95cfbbef9365',
 'c682372c-55b2-46a8-847e-acbe9d7349c6',
 '8a20a28e-988f-43b4-ad46-9af55081002c',
 'a878e863-f480-4542-9d19-16c63e55630d',
 'b4780614-0e75-46b9-bb0c-1f7dc82d56f5',
 'c62bfa70-b30f-422e-874b-c7edf7adb187',
 'a822222f-a275-4970-9358-9afe153e4df4',
 '0f0ee7ba-e437-4d51-ba47-2e4a83f31efa',
 '4af979ae-9abc-4ae2-9588-eeea8eb5fb5b',
 '86bdc897-169f-4238-a0ed-94da6e583a98',
 'a0d20cda-e2c0-45fe-801e-9b443547135d']

## Retrieval Pipeline

In [52]:
from llama_index.core import QueryBundle
from llama_index.core.retrievers import BaseRetriever
from typing import Any, List
from llama_index.core.schema import NodeWithScore
from typing import Optional
from llama_index.core.vector_stores import VectorStoreQuery

In [53]:
class VectorDBRetriever(BaseRetriever):
    """Retriever over a postgres vector store."""

    def __init__(
        self,
        vector_store: PGVectorStore,
        embed_model: Any,
        query_mode: str = "default",
        similarity_top_k: int = 2,
    ) -> None:
        """Init params."""
        self._vector_store = vector_store
        self._embed_model = embed_model
        self._query_mode = query_mode
        self._similarity_top_k = similarity_top_k
        super().__init__()

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve."""
        query_embedding = embed_model.get_query_embedding(
            query_bundle.query_str
        )
        vector_store_query = VectorStoreQuery(
            query_embedding=query_embedding,
            similarity_top_k=self._similarity_top_k,
            mode=self._query_mode,
        )
        query_result = vector_store.query(vector_store_query)

        nodes_with_scores = []
        for index, node in enumerate(query_result.nodes):
            score: Optional[float] = None
            if query_result.similarities is not None:
                score = query_result.similarities[index]
            nodes_with_scores.append(NodeWithScore(node=node, score=score))

        return nodes_with_scores

In [54]:
retriever = VectorDBRetriever(
    vector_store, embed_model, query_mode="default", similarity_top_k=2
)

retriever

<__main__.VectorDBRetriever at 0x658183450>

## Response

In [27]:
from llama_index.core.query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine.from_args(retriever, llm=llm)

In [28]:
query_str = "What can be a test scenario in IMPB website about login page?"

response = query_engine.query(query_str)


llama_print_timings:        load time =   11233.69 ms
llama_print_timings:      sample time =       9.47 ms /   256 runs   (    0.04 ms per token, 27041.30 tokens per second)
llama_print_timings: prompt eval time =   15102.40 ms /   623 tokens (   24.24 ms per token,    41.25 tokens per second)
llama_print_timings:        eval time =   32005.50 ms /   255 runs   (  125.51 ms per token,     7.97 tokens per second)
llama_print_timings:       total time =   47246.92 ms /   878 tokens


In [29]:
print(str(response))

 Based on the provided context information, here are some possible test scenarios for the login page of the IMPb website:

1. Successful login: Test that a valid user can successfully log in to the website using their registered email address and password.
2. Incorrect login credentials: Test that an invalid email address or password results in an error message.
3. Forgotten password: Test that a user can reset their password using the "forgotten password" feature.
4. Registered user login: Test that a registered user can log in to the website using their email address and password.
5. Unregistered user login: Test that an unregistered user cannot log in to the website.
6. Login form validation: Test that the login form validates user input, such as requiring a valid email address and password.
7. Login page layout and design: Test that the login page is displayed correctly and is easy to use.
8. Login page functionality: Test that the login page functions correctly, such as submitting

In [30]:
print(response.source_nodes[0].get_content())

Software Design Document for IMPb
Web Platform
Version 2.0
Prepared by Your Name
Date Insert Date
Introduction
References
System Overview
Requirements
Functional Requirements
NonFunctional Requirements
System Architecture
Detailed Design
Frontend Design
Backend Design
Database Design
ThirdParty Integrations
Security Considerations
Performance Optimization
Testing and Validation
Deployment Strategy
Future Enhancements
Introduction
1.1 Purpose


In [31]:
query_str = "Generate different test scenarios for IMPB web page"

response = query_engine.query(query_str)

Llama.generate: 604 prefix-match hit, remaining 15 prompt tokens to eval

llama_print_timings:        load time =   11233.69 ms
llama_print_timings:      sample time =       9.42 ms /   256 runs   (    0.04 ms per token, 27184.88 tokens per second)
llama_print_timings: prompt eval time =     915.30 ms /    15 tokens (   61.02 ms per token,    16.39 tokens per second)
llama_print_timings:        eval time =   31749.55 ms /   255 runs   (  124.51 ms per token,     8.03 tokens per second)
llama_print_timings:       total time =   32799.17 ms /   270 tokens


In [32]:
print(str(response))



Test Scenario 1: Search Functionality

* Enter a movie title in the search bar and press Enter.
* Verify that the search results display the expected movie information, including title, cast, crew, and ratings.
* Test with different search terms, such as actor names, directors, genres, and release years.
* Verify that the search results are sorted and filtered correctly based on user preferences.

Test Scenario 2: User Profile Creation

* Create a new user profile and verify that the system sends a confirmation email.
* Log in to the system using the created profile and verify that the user can access the platform's features.
* Test with different user information, such as email addresses, usernames, and passwords.
* Verify that the system enforces password policies and account lockout policies.

Test Scenario 3: Movie Details Page

* Navigate to a movie details page and verify that the page displays the expected information, such as title, cast, crew, and ratings.
* Test with differ

In [33]:
print(response.source_nodes[0].get_content())

Software Design Document for IMPb
Web Platform
Version 2.0
Prepared by Your Name
Date Insert Date
Introduction
References
System Overview
Requirements
Functional Requirements
NonFunctional Requirements
System Architecture
Detailed Design
Frontend Design
Backend Design
Database Design
ThirdParty Integrations
Security Considerations
Performance Optimization
Testing and Validation
Deployment Strategy
Future Enhancements
Introduction
1.1 Purpose


In [34]:
query_str = "Generate a test flow for User Profile and Watchlist features in IMPB website, explain step by step"

response = query_engine.query(query_str)

Llama.generate: 31 prefix-match hit, remaining 971 prompt tokens to eval

llama_print_timings:        load time =   11233.69 ms
llama_print_timings:      sample time =       9.42 ms /   256 runs   (    0.04 ms per token, 27164.69 tokens per second)
llama_print_timings: prompt eval time =   22164.46 ms /   971 tokens (   22.83 ms per token,    43.81 tokens per second)
llama_print_timings:        eval time =   34094.74 ms /   255 runs   (  133.70 ms per token,     7.48 tokens per second)
llama_print_timings:       total time =   56403.42 ms /  1226 tokens


In [35]:
print(str(response))



Test Flow for User Profile and Watchlist Features in IMPb Website

Step 1: User Registration

* Go to the IMPb website and click on the "Sign Up" button.
* Fill in the registration form with valid information, including email address, username, and password.
* Click on the "Create Account" button to complete the registration process.

Step 2: User Profile Creation

* After registration, the user should be redirected to their profile page.
* On the profile page, the user should be able to view their profile information, including their username, email address, and a brief description of their watchlist.
* The user should also be able to edit their profile information, including their username and email address.

Step 3: Watchlist Creation

* On the profile page, the user should be able to create a new watchlist by clicking on the "Create Watchlist" button.
* The user should be able to add movies or TV shows to their watchlist by searching for the title and clicking on the "Add to Watc

In [36]:
print(response.source_nodes[0].get_content())

The purpose of this document is to outline the software design for the IMPb web platform.
IMPb is a widelyused online database that provides comprehensive information about
movies, TV shows, actors, production crew, trailers, and usergenerated reviews. This
document serves as a blueprint for the development, maintenance, scalability, and
integration of advanced features into the platform.
The IMPb web platform allows users to
Search for and browse movies, TV shows, celebrity profiles, and production details.
View detailed information about titles, including trailers, cast, crew, reviews, ratings,
and box office performance.
Create user accounts to rate, review, and discuss titles, create watchlists, and
receive personalized recommendations.
Integrate with thirdparty services like streaming platforms, ticket booking, and social
Access APIs for external applications and data analysis.
1.3 Glossary
User Enduser of the IMPb platform.
CRUD Create, Read, Update, Delete operations.
REST Repre

In [37]:
print(response.source_nodes[1].get_content())

Requirements
3.1 Functional Requirements
Comprehensive Search Functionality
Advanced keyword and phrase search with fulltext indexing.
Autosuggestions based on search context and user history.
Multifaceted filtering options e.g., genre, release year, language.
Predictive search for incomplete or misspelled queries.
Detailed Information Pages
Rich metadata for movies, TV shows, and celebrities.
Features such as trailers, image galleries, user reviews, and critic ratings.
Realtime updates for box office performance and award statistics.
Links to external streaming platforms or purchase options.
User Account Features
User registration, loginlogout, and multifactor authentication.
Personalized profiles with privacy settings and activity logs.
Option to link social media accounts for seamless sharing.
Watchlist and Favorites Management
Dynamic watchlist creation and management with category support.
Ability to share watchlists with friends or make them public.
Notifications for upcoming rel

In [39]:
query_str = """
Convert these instructions to Java code:

Step 1: User Registration
1. Go to the IMPb website and click on the Sign Up button.
2. Fill in the registration form with valid information, including email address, username, and password.
3. Click on the Create Account button to complete the registration process.

Step 2: User Profile Creation
1. After registration, the user should be redirected to their profile page.
2. On the profile page, the user should be able to view their profile information, including their username, email address, and a brief description of their watchlist.
3. The user should also be able to edit their profile information, including their username and email address.
""";

response = query_engine.query(query_str)

Llama.generate: 12 prefix-match hit, remaining 754 prompt tokens to eval

llama_print_timings:        load time =   11233.69 ms
llama_print_timings:      sample time =       9.69 ms /   256 runs   (    0.04 ms per token, 26432.63 tokens per second)
llama_print_timings: prompt eval time =  164277.20 ms /   893 tokens (  183.96 ms per token,     5.44 tokens per second)
llama_print_timings:        eval time =   33589.52 ms /   255 runs   (  131.72 ms per token,     7.59 tokens per second)
llama_print_timings:       total time =   49959.40 ms /  1148 tokens


In [40]:
print(str(response))


Here is the Java code for the instructions provided:

Step 1: User Registration

// Create a registration form with email, username, and password fields
Form registrationForm = new Form();
registrationForm.addTextField("email", "Enter your email address");
registrationForm.addTextField("username", "Enter your username");
registrationForm.addPasswordField("password", "Enter your password");

// Add a create account button
registrationForm.addSubmitButton("Create Account");

// Handle form submission
registrationForm.addEventListener(new FormListener() {
    @Override
    public void handleFormSubmission(Form form) {
        // Get the email, username, and password from the form
        String email = form.getEmail();
        String username = form.getUsername();
        String password = form.getPassword();

        // Validate the form data
        if (email.isEmpty() || username.isEmpty() || password.isEmpty()) {
            form.setError("All fields are required");
            retur

In [55]:
query_str = """
Convert these instructions to Playwright testing code:

Step 1: User Registration
1. Go to the IMPb website and click on the Sign Up button.
2. Fill in the registration form with valid information, including email address, username, and password.
3. Click on the Create Account button to complete the registration process.

Step 2: User Profile Creation
1. After registration, the user should be redirected to their profile page.
2. On the profile page, the user should be able to view their profile information, including their username, email address, and a brief description of their watchlist.
3. The user should also be able to edit their profile information, including their username and email address.
""";

response = query_engine.query(query_str)

Llama.generate: 12 prefix-match hit, remaining 549 prompt tokens to eval

llama_print_timings:        load time =   11233.69 ms
llama_print_timings:      sample time =       9.61 ms /   256 runs   (    0.04 ms per token, 26647.24 tokens per second)
llama_print_timings: prompt eval time =   13834.98 ms /   549 tokens (   25.20 ms per token,    39.68 tokens per second)
llama_print_timings:        eval time =   33270.57 ms /   255 runs   (  130.47 ms per token,     7.66 tokens per second)
llama_print_timings:       total time =   47253.13 ms /   804 tokens


In [56]:
print(str(response))


Here is the Playwright testing code for the given instructions:

const { test, expect } = require('playwright-test');

test('User Registration', async ({ page }) => {

// Step 1: Go to the IMPb website and click on the Sign Up button
await page.goto('https://example.com/sign-up');
await expect(page).toHaveSelector('button[type="submit"]');

// Step 2: Fill in the registration form with valid information
await page.fill('input[name="username"]', 'testuser');
await page.fill('input[name="email"]', 'testuser@example.com');
await page.fill('input[name="password"]', 'password123');

// Step 3: Click on the Create Account button to complete the registration process
await page.click('button[type="submit"]');

// Step 4: Verify that the user is redirected to their profile page
await expect(page).toHaveUrl('https://example.com/profile');

// Step 5: Verify that the user's profile information
