In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

# Specify the path to your local model or the model name from Hugging Face
model_name = "sentence-transformers/all-MiniLM-L6-v2"

# Initialize the Hugging Face embeddings
embeddings = HuggingFaceEmbeddings(model_name=model_name)


In [2]:
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os

def load_document_paths(directory):
    document_paths = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith(('.pptx', '.xlsx', '.docx', '.pdf', '.png', '.jpg', '.jpeg', '.txt', '.csv', '.json', 'yaml', '.html')):
                document_paths.append(os.path.join(root, file))
    return document_paths

document_paths = load_document_paths('./my-docs')
all_documents = []
for document_path in document_paths:
    loader = UnstructuredFileLoader(document_path)
    try:
        document = loader.load()
        print(f"Loaded {document_path}")
    except Exception as e:
        print(f"Error loading {document_path}: {e}")
        continue
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

    documents = text_splitter.split_documents(document)
    all_documents.extend(documents)

  loader = UnstructuredFileLoader(document_path)


Loaded ./my-docs/Amynta Group - Solution Design.docx
Loaded ./my-docs/Broker In A Box/Broker in a Box - Exec Overview (01-21-2025).pptx
Loaded ./my-docs/Broker In A Box/AP_Rules.xlsx
Loaded ./my-docs/Broker In A Box/AP_Rule_Evalution_test1.docx
Loaded ./my-docs/Broker In A Box/Assured Partners - Broker in a Box - Endorsements - Proposed Solution Overview - 01292025.pptx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/Broker In A Box Change Request#2 1.14.25 - SOW - AP Format.docx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/Fog Solutions - Assured Partners - Broker in a Box - Proposal.pptx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/SOW_3060310_Fog Solutions Inc._CAS-1477383-D1Q5K3_267970094 - signed.pdf
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/AssuredPartners - Fog Solutions - Broker in a Box - Exhibit A.docx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/ECIF PO 101108580 AssuredPartners.pdf
Loaded ./my-docs/Broker In A Box/Proposal & Cont

  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)


Loaded ./my-docs/Broker In A Box/Proposal & Contracts/Broker In A Box Change Request #2 1.23.25 - SOW - AP Format.docx.pdf
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/AssuredPartners - Fog Solutions - Broker in a Box - SOW.docx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/Archive/OUTDATED FORMAT Broker in a Box - AssuredPartners - Fog Solutions - CR2.docx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/Archive/OUTDATED FORMAT Broker in a Box - AssuredPartners - Fog Solutions - CR1 with Admin.docx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/Archive/OUTDATED FORMAT Broker in a Box - AssuredPartners - Fog Solutions - CR1.docx
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/Archive/[Not in effect, see AP formatted CR] Broker in a Box - AssuredPartners - Fog Solutions - CR1.pdf
Loaded ./my-docs/Broker In A Box/Proposal & Contracts/ECIF POE/POE_End_Customer_3060310_Fog Solutions Inc._CAS-1477383-D1Q5K3_573072723 - signed.pdf
Loaded ./my-docs/Broker In

  warn(msg)


Loaded ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box Sprint 10 Mid-Sprint - 022125.pptx
Loaded ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box Sprint 6b Sprint-End  -122024.pptx
Loaded ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box Sprint 4 Sprint-End & Monthly Stakeholder Review -102524.pptx
Loaded ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box Sprint 2 End-Sprint & Monthly Stakeholder Review - 0927241.pptx
Loaded ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box - Mid Project AP Exec Demo - 102224.pptx
Loaded ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box Sprint 1 Sprint-End Review - 0912241.pptx
Loaded ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box Sprint 8 Mid-Sprint-012425.pptx
Loaded ./my-docs/Broker In A Box/Delivery/Sta

Load/save documents to store

In [None]:
from langchain_community.vectorstores import FAISS
from langchain_chroma import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore

document_embeddings = embeddings.embed_documents([doc.page_content for doc in all_documents])

# Initialize the FAISS index
dimension = len(document_embeddings[0])
index = faiss.IndexFlatL2(dimension)

doc_store = InMemoryDocstore()
# Create the FAISS vector store
vector_store = FAISS(
    embedding_function=embeddings, 
    index=index, 
    docstore=doc_store, 
    index_to_docstore_id={})



# Add documents and their embeddings to the vector store
vector_store.add_documents(all_documents)
# Save the FAISS index and documents
vector_store.save_local("faiss_index")

In [10]:
vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
chroma = Chroma(embedding_function=embeddings, persist_directory=".chroma")

In [None]:

from gpt_researcher import GPTResearcher
from gpt_researcher.utils.enum import ReportType, ReportSource, Tone

new_vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

sections = [ "Project Summary", "Scope of the Solution", "Stakeholders and Audiences", "Solution Design Overview", "User Roles and Security Considerations", "Definition of User Roles", "Data Sources and Ingestion", "Data Ingestion Strategy", "Data Storage and Management", "Storage Solutions", "Data Retention and Archival Policies", "Data Encryption and Data Security", "Encryption Methods", "Data Security Techniques", "Analytics and Reporting", "Reporting", "Analytics Services", "DevOps Integration", "Source Control Management", "Code Deployment and Management Processes", "Rollback Strategies", "Cloud Fundamentals/Readiness", "Regional Planning and Data Center Utilization", "Network and Infrastructure Setup", "Azure Resources", "Network", "Identity and Access Management", "Cost Optimization and Governance", "Backup and Recovery Solutions", "High Availability/Disaster Recovery Plan", "Monitoring and Alert Systems", "Appendix and References", "Glossary of Terms", "Reference Documents"]

query = f"""
You are to create a design document that describes, in detail, a solutions design for Assured Partners Broker in a box.

Use the sections provided to create a detailed report.  Each section should be expanded to include as much detail as possible.  Use the internet to expand on recommended practices.

Be verbose and expand and topics as much as possible.  If the vecttor_store provided doesn't have informmation, state would should be in each sub topic provided.

Do not mention anything about Amynta, but assume the Broker in the box data fabric implmentation is the same.

Use the internet to expand on recommended practices.

The report sections should be as follows:

"Project Summary", "Scope of the Solution", "Stakeholders and Audiences", "Solution Design Overview", "User Roles and Security Considerations", "Definition of User Roles", "Data Sources and Ingestion", "Data Ingestion Strategy", "Data Storage and Management", "Storage Solutions", "Data Retention and Archival Policies", "Data Encryption and Data Security", "Encryption Methods", "Data Security Techniques", "Analytics and Reporting", "Reporting", "Analytics Services", "DevOps Integration", "Source Control Management", "Code Deployment and Management Processes", "Rollback Strategies", "Cloud Fundamentals/Readiness", "Regional Planning and Data Center Utilization", "Network and Infrastructure Setup", "Azure Resources", "Network", "Identity and Access Management", "Cost Optimization and Governance", "Backup and Recovery Solutions", "High Availability/Disaster Recovery Plan", "Monitoring and Alert Systems", "Appendix and References", "Glossary of Terms", "Reference Documents"


"""
researcher = GPTResearcher(
    query=query, 
    report_type=ReportType.Technical, 
    report_format="markdown", 
    report_source="langchain_vectorstore", 
    tone=Tone.Descriptive, 
    source_urls=None, 
    document_urls=None,
    vector_store=vector_store,
    query_domains=[], 
    subtopics=None)

research_result = await researcher.conduct_research()

report = await researcher.write_report()

INFO:     [12:16:04] 🔍 Starting the research task for '
You are to create a design document that describes, in detail, a solutions design for Assured Partners Broker in a box.

Use the sections provided to create a detailed report.  Each section should be expanded to include as much detail as possible.  Use the internet to expand on recommended practices.

Be verbose and expand and topics as much as possible.  If the vecttor_store provided doesn't have informmation, state would should be in each sub topic provided.

Do not mention anything about Amynta, but assume the Broker in the box data fabric implmentation is similar.

Use the internet to expand on recommended practices.

The report sections should be as follows:

"Project Summary", "Scope of the Solution", "Stakeholders and Audiences", "Solution Design Overview", "User Roles and Security Considerations", "Definition of User Roles", "Data Sources and Ingestion", "Data Ingestion Strategy", "Data Storage and Management", "Storage So

Tavily API key not found, set to blank. If you need a retriver, please set the TAVILY_API_KEY environment variable.


INFO:     [12:16:04] 🤔 Planning the research strategy and subtasks...


Error: 400 Client Error: Bad Request for url: https://api.tavily.com/search. Failed fetching sources. Resulting in empty response.


INFO:     [12:16:05] 🗂️  I will conduct my research based on the following queries: ['best practices for creating a solutions design document for data fabric architecture', 'data ingestion strategies and tools for cloud-based solutions 2025', 'modern encryption methods and data security techniques for cloud systems', 'DevOps integration and rollback strategies for Azure-based deployments', '\nYou are to create a design document that describes, in detail, a solutions design for Assured Partners Broker in a box.\n\nUse the sections provided to create a detailed report.  Each section should be expanded to include as much detail as possible.  Use the internet to expand on recommended practices.\n\nBe verbose and expand and topics as much as possible.  If the vecttor_store provided doesn\'t have informmation, state would should be in each sub topic provided.\n\nDo not mention anything about Amynta, but assume the Broker in the box data fabric implmentation is similar.\n\nUse the internet to

[32m# Solution Design Document for Assured Partners Broker in a Box

[0m
[32m---

[0m
[32m## **Project Summary**

[0m
[32mThe "Broker in a Box" project is a transformative initiative aimed at modernizing and streamlining insurance brokerage operations for Assured Partners (AP). The solution leverages cutting-edge technologies such as Microsoft Fabric, Azure cloud services, and AI-driven insights to create a scalable, secure, and efficient platform. The primary objective is to provide brokers with a unified platform for policy management, rule-based decision-making, and enhanced customer interactions. By integrating advanced data ingestion, analytics, and reporting capabilities, the solution ensures operational excellence and compliance with industry standards.

[0m
[32mThe project is structured into multiple sprints, each focusing on iterative development, testing, and deployment of features. Key deliverables include user authentication via Okta, policy data integration, AI-po

INFO:     [12:16:32] 📝 Report written for '
You are to create a design document that describes, in detail, a solutions design for Assured Partners Broker in a box.

Use the sections provided to create a detailed report.  Each section should be expanded to include as much detail as possible.  Use the internet to expand on recommended practices.

Be verbose and expand and topics as much as possible.  If the vecttor_store provided doesn't have informmation, state would should be in each sub topic provided.

Do not mention anything about Amynta, but assume the Broker in the box data fabric implmentation is similar.

Use the internet to expand on recommended practices.

The report sections should be as follows:

"Project Summary", "Scope of the Solution", "Stakeholders and Audiences", "Solution Design Overview", "User Roles and Security Considerations", "Definition of User Roles", "Data Sources and Ingestion", "Data Ingestion Strategy", "Data Storage and Management", "Storage Solutions", "D

[32m- Source: ./my-docs/Broker In A Box/Delivery/Status updates/Assured Partners - Broker in a Box Sprint 10 Mid-Sprint - 022125.pptx[0m


In [24]:
#await researcher.write_introduction()
print(report)

# Solution Design Document for Assured Partners Broker in a Box

---

## **Project Summary**

The "Broker in a Box" project is a transformative initiative aimed at modernizing and streamlining insurance brokerage operations for Assured Partners (AP). The solution leverages cutting-edge technologies such as Microsoft Fabric, Azure cloud services, and AI-driven insights to create a scalable, secure, and efficient platform. The primary objective is to provide brokers with a unified platform for policy management, rule-based decision-making, and enhanced customer interactions. By integrating advanced data ingestion, analytics, and reporting capabilities, the solution ensures operational excellence and compliance with industry standards.

The project is structured into multiple sprints, each focusing on iterative development, testing, and deployment of features. Key deliverables include user authentication via Okta, policy data integration, AI-powered recommendations, and robust reporting c

Create langchain retreiver.

In [None]:
new_vector_store = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
retriever = new_vector_store.as_retriever()
query = "What is broker in a box?"


# Process and use the results as needed
for result in results:
    print(result.page_content)

systems, offering features like policy project management, document uploads, and tailored natural language queries.  • Broker in a Box provides actionable insights that inform strategic decisions, helping AssuredPartners identify trends, optimize offerings, and enhance customer retention.
systems, offering features like policy project management, document uploads, and tailored natural language queries.  • Broker in a Box provides actionable insights that inform strategic decisions, helping AssuredPartners identify trends, optimize offerings, and enhance customer retention.
With 7,000 brokers managing 9,000 policies per year, 'Broker in a Box' is a breakthrough. It’s cutting review time by days and will save thousands of hours a year—unlocking speed & precision we’ve never seen before. This is the future of client service at AP & Gallagher!

Rob Roth, EVP – Wholesale BrokerageAccretive Insurance Solutions

Challenge: AP had a time consuming & inconsistent process for reviewing terms d

Code to generate by section:

In [None]:
from docx import Document

from langchain_openai import AzureOpenAI, AzureChatOpenAI
import os

def print_headings(docx_path):
    doc = Document(docx_path)
    for paragraph in doc.paragraphs:
        if paragraph.style.name.startswith('Heading'):
            print(paragraph.text)

# Replace 'your_document.docx' with the path to your .docx file
print_headings('./my-docs/Amynta Group - Solution Design.docx')


# Initialize the Azure OpenAI model

api_key = os.getenv("AZURE_OPENAI_API_KEY")
deployment_name = 'gpt-4o' #os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME")
model_name = 'gpt-4o' #os.getenv("AZURE_OPENAI_MODEL_NAME")

llm = AzureChatOpenAI(
    api_key=api_key,
    deployment_name=deployment_name,
    model_name=model_name,
    base_url=os.getenv("AZURE_OPENAI_BASE_URL"),
)




Table of Contents
1. Project Summary
Scope of the Solution
Stakeholders and Audiences
2. Solution Design Overview
2.1 Solution Objective
2.2 Current State Solution Diagram
2.3 Future State Solution Architecture
3. User Roles and Security Considerations
3.1 Definition of User Roles
4. Data Sources and Ingestion
4.1 Identification of Data Sources
4.2 Data Ingestion Strategy
4.3 Data Ingestion Metadata Framework
Ingestion Metadata 
4.3.1.1 Source Table  
4.3.1.2 Source Object Table
4.3.1.3 Object Fields Table
4.3.2 Data Quality Metadata
5. Data Storage and Management
5.1 Storage Solutions 
5.2 Data Organization and Structuring
5.3 Common Data Model
6. Data Encryption and Security
Encryption Methods 
6.2 Data Security Techniques
7. Analytics and Reporting 
7.1 Analytics Platform
7.3 Fabric Domains
7.4 Fabric Workspaces
7.5 Reporting
7.5.1 Legacy Production Reports
7.5.2 New Product Dashboard Reporting
7.5.3 Operational Data Platform Health
7.6 Analytics Services 
8. DevOps Integration
8.1 

In [None]:
# Summarize the docx content
# def summarize_docx(docx_path):
#     doc = Document(docx_path)
#     full_text = []
#     for paragraph in doc.paragraphs:
#         full_text.append(paragraph.text)
#     document_text = '\n'.join(full_text)
    
#     summary_query = f"Summarize the following document and return a paragraph that can be used as context for other prompts to help produce a document just like it.  Remove any company names.  Do not format in sections.  You MUST return only an overall summary of what hte document is:\n\n{document_text}"
#     summary = llm.invoke(summary_query)
#     return summary

# # Replace 'your_document.docx' with the path to your .docx file
# summary = summarize_docx('./my-docs/Amynta Group - Solution Design.docx')
# print(summary)

# Create the agent using the FAISS index
#agent = create_openai_agent(llm, retriever)

# Example query to the agent
#response = agent.run("What is broker in a box?")
print(summary.content)
context = summary.content

prompt = f"""
    You are to create a design document that describes, in detail, a solutions design for Assured Partners Broker in a box.
    
    """
    


The document outlines a comprehensive solution design for a scalable enterprise data platform leveraging Microsoft Fabric to centralize and unify data management, reporting, and analytics across an organization. It details the implementation of a Lakehouse architecture using a medallion model (Bronze, Silver, Gold layers) for data ingestion, transformation, and storage, while employing metadata-driven frameworks to streamline data processing and ensure scalability. The design integrates various data sources, implements secure data handling with Azure Key Vault, and emphasizes governance through role-based access controls (RBAC) and DevOps best practices. Reporting capabilities are powered by Power BI, supported by semantic models and calculation groups to enable efficient report development and operational monitoring. The solution incorporates cost optimization strategies, modular pipelines for processing, and recommendations for future enhancements, such as Azure Landing Zone deployme