In [0]:
!pip install pymupdf4llm
!pip install llama_index

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:

from databricks.vector_search.client import VectorSearchClient
vsc = VectorSearchClient()

[NOTICE] Using a notebook authentication token. Recommended for development only. For improved performance, please use Service Principal based authentication. To disable this message, pass disable_notice=True to VectorSearchClient().


In [0]:
from jinja2 import Environment, FileSystemLoader
from llama_index.core import PromptTemplate, StorageContext, VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.storage.docstore import SimpleDocumentStore


def construct_index(llm, nodes, leaf_nodes):
    docstore = SimpleDocumentStore()
    docstore.add_documents(nodes)
    storage_context = StorageContext.from_defaults(docstore=docstore)
    base_index = VectorStoreIndex(leaf_nodes, storage_context=storage_context, llm=llm)
    return storage_context, base_index


def construct_retriever(storage_context, index, top_k):
    base_retriever = index.as_retriever(similarity_top_k=top_k)
    retriever = AutoMergingRetriever(base_retriever, storage_context)
    return retriever


def construct_queryengine(retriever, template_dir, template_file):
    query_engine = RetrieverQueryEngine.from_args(retriever)
    template_loader = FileSystemLoader(template_dir)
    template_env = Environment(loader=template_loader)
    template = template_env.get_template(template_file)

    query_prompt = PromptTemplate(template.render())
    query_engine.update_prompts({"response_synthesizer:text_qa_template": query_prompt})
    return query_engine


In [0]:
import pymupdf4llm
from llama_index.core import Document
from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes


def parse_pdf(pdf_file):
    output = pymupdf4llm.to_markdown(pdf_file)
    return output


def text_splitter(text, chunk_sizes=[1024, 512]):
    docs = [Document(text=text)]
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(docs)
    leaf_nodes = get_leaf_nodes(nodes)
    return nodes, leaf_nodes

    
class RAG:

    def __init__(self, pdf_file, llm, params):
        self.pdf_file = pdf_file
        self.llm = llm
        self.params = params
        self.setup_queryengine()

    def process_pdf(self):
        parsed_text = parse_pdf(self.pdf_file)
        self.nodes, self.leaf_nodes = text_splitter(
            parsed_text, self.params["chunk_sizes"]
        )

    def setup_queryengine(self):
        self.process_pdf()
        storage_context, base_index = construct_index(
            self.llm, self.nodes, self.leaf_nodes
        )
        retriever = construct_retriever(
            storage_context, base_index, self.params["top_k"]
        )
        self.query_engine = construct_queryengine(
            retriever, self.params["template_dir"], self.params["template_file"]
        )

    def respond_query(self, query):
        return self.query_engine.query(query)


In [0]:
from openai import OpenAI
from openai import OpenAI
llm = OpenAI(base_url='https://dbc-8896238f-7e0f.cloud.databricks.com/serving-endpoints/openai/invocations', 
             api_key = dbutils.secrets.list('hack')[0])
pdf_file = "/Volumes/home-inspection-gpt/default/inspection-report/report1.pdf"

params = {
    "chunk_sizes": [512, 256],
    "top_k": 5,
    "template_dir": "prompts",
    "template_file": "template.jinja",
}
prompt_list = [
    "What are conditions regarding the home structure including foundation walls, floors, roof, and other structural components of the house?", 
    "What are conditions regarding the home system including mechanical, electrical, plumbing system, heating, air conditioning, water heater, electrical panel and wiring, plumbing pipes, and sewage systems?",
    "What are conditions regarding the home interior including walls, ceilings, floors, windows, doors, and built-in appliances?",
    "What are conditions regarding home exterior including driveways, patios, decks, and landscaping?"
]

homeGPT= RAG(pdf_file, llm, params)

In [0]:
def generate_homeGPT_summary(home_agent):
    report = []
    for prompt in prompt_list:
        report.append(homeGPT.respond_query(prompt).response)
    report_string = "/n/n".join(report)
    overall_report = homeGPT.respond_query(f"Generate a overall summary based on the following perspectives {report_string}").response
    final_report = {
        "Structure": {
            "Rating": report[0].split('\n')[0],
            "Approximate cost": report[0].split('\n')[1],
            "Reason": report[0].split('\n')[2]
        }, 
        "System": {
            "Rating": report[1].split('\n')[0],
            "Approximate cost": report[1].split('\n')[1],
            "Reason": report[1].split('\n')[2]
        },
        "Interior": {
            "Rating": report[2].split('\n')[0],
            "Approximate cost": report[2].split('\n')[1],
            "Reason": report[2].split('\n')[2]
        }, 
        "Exterior": {
            "Rating": report[3].split('\n')[0],
            "Approximate cost": report[3].split('\n')[1],
            "Reason": report[3].split('\n')[2]
        }, 
        "Overall": overall_report
    }
    return final_report

In [0]:
import json 
final_report = generate_homeGPT_summary(homeGPT)
report_file = "/Volumes/home-inspection-gpt/default/inspection-report/HomeGPT_report1.pdf"
with open('result.json', 'w') as fp:
    json.dump(final_report, fp)

In [0]:
final_report

{'Structure': {'Rating': 'Rating: 4/5',
  'Approximate cost': 'Approximate cost: $500 - $2000',
  'Reason': 'Reasons: The foundation walls in the basement show signs of damage, with visible cracks and damaged insulation. The basement floor also has a few visible cracks. Additionally, the sump pump pit does not have a high water level alarm, which is recommended for safety. These issues should be addressed by a qualified professional to prevent further damage and ensure the structural integrity of the home. The cost range provided is an estimate and can vary depending on the extent of the damage and the specific repairs needed.'},
 'System': {'Rating': 'Rating: 5/5',
  'Approximate cost': 'Approximate cost: $500 - $2000',
  'Reason': 'Reasons: The home system has multiple issues that need to be addressed, including potential carbon monoxide risks from the heating system, water softener concerns, gas line issues, electrical deficiencies, and damaged windows. These issues are urgent and i