In [1]:
!pip -q install langchain openai tiktoken chromadb pypdf unstructured markdown pydantic

In [2]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFLoader
from langchain import PromptTemplate
from langchain.document_loaders import DirectoryLoader
from dotenv import load_dotenv
import os
from lib.files import load_text_documents_from_directory, load_pdf_documents_from_directory, remove_non_ascii, consolidate_whitespace, load_markdown_documents_from_directory
from lib.lc import process_llm_response
load_dotenv()

True

### Read directory of files

In [3]:

input_directory = os.getenv("INPUT_DIRECTORY")
# documents = load_pdf_documents_from_directory(input_directory)
documents = load_markdown_documents_from_directory(input_directory)
for document in documents:
    # document.page_content = remove_non_ascii(document.page_content)
    # document.page_content = consolidate_whitespace(document.page_content)
    print(document.page_content)

len(documents)


Quilting instructions for Bee Kind Pillow

Contact information

Designed by: Amy Bradley Designs 
Visit: www.amybradleydesigns.com

Contact: 785-218-6803 or info@amybradleydesigns.com

Description

This guide provides step-by-step instructions for creating a Bee Kind pillow measuring 14 x 14 inches. It includes a comprehensive list of materials and tools needed, as well as a pre-prepared pattern and placement sheet.

Material List

13"x13" or Fat Quarter or 3/8 yd Background

1/8 yd. or Fat Eighth Inside Border

1/2 yd.  Outside Border & Pillow Back

1 yd. Piping

4" x 6"  Letter K

2" x 4"  Letter I

4" x 4"  Letter N

4" x 6"  Letter D

4" x 5"  Body

3" x 4"  Stripes

3" x 4"  Wings

2" x 2"  Heart

2" x 3"  Eyes

2" x 2" Cheeks

2" x 3"  Feet

White Muslin to back light fabrics

1/2 yd. (17 width) HeatnBond  Lite fusible web

19 x 19 piece of White Batting (Amy used Warm & White by The Warm Company )

Smallest package Sulky  Tear Away Iron On Stabilizer Machine embroidery thread - 

1

### Create Text Chunks

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
len(texts)

2

In [5]:
vector_db_path = os.getenv("DATA_BASE_DIRECTORY")
embedding = OpenAIEmbeddings()
print(vector_db_path)

./aimy_db/


### Create Enbeddings and write Vector DB
uncomment this code to rebuild embeddings database

In [6]:

vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=vector_db_path)
vectordb.persist()
vectordb = None

### Load Cached Embeddings

In [7]:
vectordb = Chroma(persist_directory=vector_db_path, 
                  embedding_function=embedding)

### Test Retreival

In [8]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})
retriever_docs = retriever.get_relevant_documents("Should I cut on the pencil line?")
len(retriever_docs)

2

### Create the Chain

In [20]:
prompt_template = """You are a quilting instructor please read the content below to help the user design a quilt. 
If you don't know the answer, please think rationally and answer from your own knowledge base.

{context}

Instruction: Only provide the answers and details from the context given above, and don't use any other information which is not 
covered in the context. Provide the response in markdown format.
Question: {question}
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs = {"prompt": PROMPT}


qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(temperature=0.5, max_tokens=1000), 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=True)

## Ask a question and process the response

In [25]:
# query = "What tools do I need to create the bee kind pillow?"
query = "How do I know what fabrics I need to back with muslin?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 Light fabrics that will be placed over dark fabric or a strong print need to be backed with muslin. This is noted in the material list.


Sources:
text_assets/bee_kind_pillow.md
text_assets/bee_kind_pillow.md
