In [7]:
import os

from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings

In [8]:
# Define the persistent directory
current_dir = os.getcwd()
db_dir = os.path.join(current_dir, "db")
persistent_directory = os.path.join(db_dir, "ideagen_products_db")

In [9]:
urls = [
    "https://www.ideagen.com/solutions/audit-and-risk/external-audit/ideagen-audit-analytics",
    "https://www.complispace.com.au/",
    "https://www.ideagen.com/solutions/quality/coruson",
    "https://www.devonway.com/",
    "https://www.ideagen.com/solutions/document-collaboration/huddle",
    "https://ideagen.ideagenacademy.com/",
    "https://www.ideagen.com/solutions/environmental-health-and-safety/ehs",
    "https://www.mailmanager.com/",
    "https://www.ideagen.com/solutions/quality/quality-control",
    "https://www.medforcetech.com/about/interoperability/",
    "https://opcentral.com.au/",
    "https://www.oneplacesolutions.com/",
    "https://www.ideagen.com/solutions/audit-and-risk/internal-audit",
    "https://www.ideagen.com/solutions/audit-and-risk/external-audit",
    "https://www.ideagen.com/solutions/audit-and-risk/risk-management",
    "https://www.assessor.com.au/",
    "https://www.ideagen.com/solutions/document-collaboration/pleasereview",
    "https://www.ideagen.com/solutions/quality/quality-management",
    "https://www.ideagen.com/solutions/elearning-content/workrite",
    "https://www.ideagen.com/solutions/quality/smartforms",
    "https://www.ideagen.com/solutions/environmental-health-and-safety/maritime-safety",
]

loader = WebBaseLoader(urls)
documents = loader.load()

In [10]:
# Step 2: Split the scraped content into chunks
# CharacterTextSplitter splits the text into smaller chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Display information about the split documents
print("\n--- Document Chunks Information ---")
print(f"Number of document chunks: {len(docs)}")
print(f"Sample chunk:\n{docs[0].page_content}\n")


--- Document Chunks Information ---
Number of document chunks: 246
Sample chunk:
Advanced audit insights software for comprehensive analysis

 

Company

Our company
Our leadership
Our values
Events
News
Contact Us

Careers

Careers overview
Current vacancieshiring
Early Careers
Benefits

Resources

Blogs

Case studies

Webinars
White papers


Contact sales


Back

Your priorities

by industry


AEC


Aerospace and defense


Audit, accounting and advisory


Aviation


Education


Energy


Financial services


Food and beverage


Government


Healthcare


Laboratories


Life sciences


Manufacturing


Pharmaceuticals


Latest Gartner® Market Guide for EHS Software

Download now


by regulation


Cal/OSHA Workplace Violence


CAPA


COSO


ECCAIRS


ESG


GFSI


HACCP


HASAWA


IASA


IATF 16949


Natasha's Law


OSHA General Duty Clause


RIDDOR


SOX

Latest Gartner® Market Guide for EHS Software

Download now


by standard


AS 9100


AS 9102


AS 9145


AS 13100


Global Internal A

In [None]:
embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key='<api-key>', model_name="sentence-transformers/all-MiniLM-l6-v2"
)

In [12]:
# Step 4: Create and persist the vector store with the embeddings
# Chroma stores the embeddings for efficient searching
if not os.path.exists(persistent_directory):
    print(f"\n--- Creating vector store in {persistent_directory} ---")
    db = Chroma.from_documents(docs, embeddings, persist_directory=persistent_directory)
    print(f"--- Finished creating vector store in {persistent_directory} ---")
else:
    print(f"Vector store {persistent_directory} already exists. No need to initialize.")
    db = Chroma(persist_directory=persistent_directory, embedding_function=embeddings)


--- Creating vector store in c:\Users\azeem.rom\OneDrive - Ideagen plc\Desktop\product-recommendation\rag-notebooks\db\ideagen_products_db ---
--- Finished creating vector store in c:\Users\azeem.rom\OneDrive - Ideagen plc\Desktop\product-recommendation\rag-notebooks\db\ideagen_products_db ---


In [26]:
# Step 5: Query the vector store
# Create a retriever for querying the vector store
retriever = db.as_retriever(
    search_type="mmr",  # Maximal Marginal Relevance for balancing relevance and diversity
    search_kwargs={
        "k": 5,  # Increase k to retrieve more candidates for ranking
        "lambda_mult": 0.8,  # Control relevance vs. diversity (closer to 1 is more relevance-focused)
    },
)


# Define the user's question
query = "a user with jobTitle: Auditor, productUsed: Ideagen Audit & Risk, and searchQueries: audit workflow, evidence management, recommend 3 products"

# Retrieve relevant documents based on the query
relevant_docs = retriever.invoke(query)

In [27]:
# Extract text content from relevant_docs
context = "\n".join(doc.page_content for doc in relevant_docs)

In [28]:
context

"50+ databases of standardized disclosure data (dependent on subscription type).\nSearch by company, auditor, industry, location and more.\nComprehensive company profiles.\nAccounting Quality Risk Matrix that helps monitor, compare and evaluate “red flag” disclosures.\nSaved searches and alerts for every time new data related to your queries is available.\nComplementary thought leadership ranging in topics from cybersecurity, to restatements, to audit fees, to internal controls and more.\n\n\nIdeagen Audit Analytics Demos\nSee Ideagen Audit Analytics in action\n Take part in a tailored demo experience instantly or reach out to connect with one of our experts \n\n\nTailor your demo by selecting the pain points and business needs that matter most to you\n\r\n                                            Instant personalized demo\nComply with all the standards and regulations that impact your business, including SOX, ISO, ESG and COSO.\nStep-by-step workflows for audit methodology and audit

In [None]:
import requests
import json

def generate_payload(jobTitle, productUsed, searchQueries):
    instantPrompt = f"based on the products and product descriptions, as a user with jobTitle: {jobTitle}, productUsed: {productUsed}, and searchQueries: {', '.join(searchQueries)}, recommend 3 products, rank them based on the highest relevancy with percentage. Provide the answer in json array format. For the recommended products ensure that it is different from the product owned"
    
    text = """Audit Analytics: Provides unique data and insights to empower accounting, financial, and academic professionals with informative content critical for their work.\nCompliancePath: Validates your QMS to 21 CFR Part 11 and harmonizes your policy and privacy frameworks to HIPAA, HITRUST, IEC62304, 21 CFR Part 820, and any area of Health IT or Life Science convergence.\nCompliSpace: Built on the four pillars of policy, learning, assurance, and reporting, helping you achieve 'policy to culture.'\nCoruson: Enables transport companies to gain complete control, visibility, and real-time reporting of their safety and operational risks.\nDevonWay: Provides a fully integrated, configurable product suite across environmental, health and safety, quality management, enterprise asset management, and workforce management.\nHuddle: Offers secure file sharing and document collaboration for internal and external teams, enabling content collaboration, activity tracking, and project communication in a shared environment.\nIdeagen Academy: Offers interactive demos, online learning, and assessments to help your workforce become proficient with Ideagen software.\nIdeagen EHS: Powered by ProcessMAP, provides an integrated incident reporting and safety management solution, giving safety managers a 'single source of truth.'\nIdeagen Mail Manager: Eliminates email headaches and ensures control of sensitive information and project correspondence.\nInspectionXpert: Facilitates faster and accurate creation of ballooned part drawings and inspection sheets. Now integrated as Ideagen Quality Control.\nMedforce: Offers process and document management solutions tailored for US healthcare. Acquired by Ideagen in 2018.\nOpsbase: A user-friendly, paperless checklist and inspection platform acquired by Ideagen in 2021.\nOp Central: An AI-powered global software platform specializing in managing SOPs, training, audits, communications, and incidents for franchises and multi-site organizations.\nOnePlace Solutions: Integrates Office 365 tools like Outlook, SharePoint, Teams, and OneDrive for intelligent information management, compliance, and collaboration.\nPentana Audit: Provides an integrated audit lifecycle tool, supporting internal audit teams with confidence. Now known as Ideagen Internal Audit.\nPentana Disclose: An automated accounts disclosure checklist tool for efficient tailoring, reviewing, and approval. Now called Ideagen Disclose.\nPentana Risk: Offers complete visibility and control over organizational risks, helping build resilience and compliance. Now Ideagen Risk Management.\nPlant Assessor: The world's largest platform for plant and equipment safety and information sharing, tailored for machinery users.\nPleaseReview: A secure collaborative platform for document review, co-authoring, and redaction, enabling efficient control of document workflows.\nProquis: A quality management solution now replaced by Ideagen Quality Management.\nProcessMAP: Acquired by Ideagen in 2022, a leading solution for digitalizing and transforming environmental, health, and safety management initiatives. Now Ideagen EHS.\nQADEX: [No description provided.]\nQualsys: Acquired by Ideagen in 2020, it provides a primary quality management solution now known as Ideagen Quality Management.\nQualtrax: Combines customizable process and document management, especially for laboratories and government sectors in the US.\nQ-Pulse EHS: Formerly the Scannell EHS management solution, it is now Ideagen EHS, a workplace safety solution.\nQ-Pulse Law: A regulatory content service acquired by Ideagen in 2019, formerly Scannell Solutions, now called Q-Pulse Law.\nQ-Pulse OSHENS: [No description provided.]\nQ-Pulse PM: Automates First Article Inspection (FAI), Production Part Approval Process (PPAP), and New Product Introduction (NPI) for quality documentation and customer relationships.\nQ-Pulse QMS: Acquired by Ideagen in 2015, a leading name in quality management digitalization. Now Ideagen Quality Management.\nQ-Pulse Risk: A tool leveraging standardized risk models to manage risks effectively, aiding decision-making through impact analysis and risk modeling.\nQ-Pulse SP: Enables suppliers to submit quality documentation via a web portal, with approval tools for submissions and part shipments.\nQ-Pulse WorkRite: Transforms workplace training with an e-Learning LMS designed to meet legal regulations and protect staff. Now Ideagen WorkRite.\nRisk Management: A next-generation enterprise risk management solution delivering integration of assurance, risk, and compliance with excellent user experience.\nSmartforms: Simplifies high-volume data collection with a low-code mobile solution that saves time and enhances productivity, integrating with enterprise systems.\nTritan Software: Helps maritime operators maintain health and safety best practices across global fleets, improving compliance, uptime, and operations.\nWorkbench: A quality management solution now replaced by Ideagen Quality Management.\nIdeagen WorkRite: An e-Learning LMS designed to meet legal regulations and ensure staff well-being."""

    retriever = db.as_retriever(
    search_type="mmr",  # Maximal Marginal Relevance for balancing relevance and diversity
    search_kwargs={
        "k": 5,  # Increase k to retrieve more candidates for ranking
        "lambda_mult": 0.8,  # Control relevance vs. diversity (closer to 1 is more relevance-focused)
    },
    )


    # Define the user's question
    query = f"a user with jobTitle: {jobTitle}, productUsed: {productUsed}, and searchQueries: {', '.join(searchQueries)}, recommend 3 products"

    # Retrieve relevant documents based on the query
    relevant_docs = retriever.invoke(query)
    context = f"{text}\n".join(doc.page_content for doc in relevant_docs)

    payload = {
        'data': json.dumps({
            "instantPrompt": instantPrompt,
            "text": context,
            "platform": 5
        })
    }
    return payload

def recommend(payload):
    url = "https://idea-gen-ai-igh.ideagendevai.com/api/idea-gen-ai-service/v2/prompts/15a57fc3-cd0c-4aac-87af-6db3ac1ee0ad"

    headers = {
        'x-api-key': '<api-key>',
        'productInstanceId': '987e6543-e21b-23d4-a789-426614173999',
        'tenantId': 'f97df110-f4de-492e-8849-4a6af68026b0'
    }

    response = requests.post(url, headers=headers, data=payload)

    print(response.text)
    # Parse the JSON response
    response_data = json.loads(response.text)

    # Extract the answer text containing product names and descriptions
    answer_text = response_data["data"]["answer"]["answerText"]

    # Remove the surrounding markdown code block
    answer_text = answer_text.strip("```json\n").strip("\n```")

    # Parse the JSON content inside the answer text
    products_data = json.loads(answer_text)

    # Create a dictionary to store the products and their descriptions
    products = {item["product"]: item["description"] for item in products_data}
    return products

In [49]:
payload = generate_payload("Restaurant owner", "Coruson", ["safety", "health", "document collaboration"])
payload

{'data': '{"instantPrompt": "based on the products and product descriptions, as a user with jobTitle: Restaurant owner, productUsed: Coruson, and searchQueries: safety, health, document collaboration, recommend 3 products, rank them based on the highest relevancy with percentage. Provide the answer in json array format. For the recommended products ensure that it is different from the product owned", "text": "Service & Maintenance ManagementKeep your machines in top conditionwith a preventative maintenance program\\n\\n\\u00a0\\n\\n\\nSafe Operating Procedures(SOPs)Access easy-to-read, comprehensiveSOPs specific to your machines\\n\\n\\u00a0\\n\\n\\nMySiteAll the tools to manage machinerycompliance obligations and upholdsafety on-site\\n\\n\\n\\u00a0\\n\\n\\nView All Features \\n\\n\\nINDUSTRIES\\nConstructionDealersHireClearing SalesAuctionsLocal GovernmentUtilities\\nAll Industries \\u00a0\\n\\nROLES\\nFleet ManagementMachinery OperatorsProcurementEngineersProject ManagementSafety\\n

In [50]:
products = recommend(payload)
products

{"data":{"answer":{"answerText":"```json\n[\n  {\n    \"product\": \"Ideagen EHS\",\n    \"description\": \"Powered by ProcessMAP, provides an integrated incident reporting and safety management solution, giving safety managers a 'single source of truth.'\",\n    \"relevancy\": 95\n  },\n  {\n    \"product\": \"PleaseReview\",\n    \"description\": \"A secure collaborative platform for document review, co-authoring, and redaction, enabling efficient control of document workflows.\",\n    \"relevancy\": 90\n  },\n  {\n    \"product\": \"Huddle\",\n    \"description\": \"Offers secure file sharing and document collaboration for internal and external teams, enabling content collaboration, activity tracking, and project communication in a shared environment.\",\n    \"relevancy\": 85\n  }\n]\n```","answerType":"json","isAnswerPositive":null}}}



{'Ideagen EHS': "Powered by ProcessMAP, provides an integrated incident reporting and safety management solution, giving safety managers a 'single source of truth.'",
 'PleaseReview': 'A secure collaborative platform for document review, co-authoring, and redaction, enabling efficient control of document workflows.',
 'Huddle': 'Offers secure file sharing and document collaboration for internal and external teams, enabling content collaboration, activity tracking, and project communication in a shared environment.'}

In [19]:
print(relevant_docs)

[Document(metadata={'description': 'DevonWay provides SaaS software for quality, safety, asset management, and workforce management for organizations in regulated, high-risk industries', 'language': 'en', 'source': 'https://www.devonway.com/', 'title': 'DevonWay Software for QMS, EHS, EAM, and WFM'}, page_content="Kevin L.\nQuality Engineering Lead | Viking Air\nDevonWay is the perfect vendor that supports our long-term vision of having a unified quality, safety, and asset management platform.\n\nMark C.\nDirector of Operations - Global Service | GE Healthcare\nIt's been an excellent experience with Devonway. We need to track multi-million $ productivity improvement projects across the globe and Devonway has made that seamless.\n\nKyle P.\nSenior Systems Analyst | Nebraska Public Power District\nI use\xa0the\xa0relationship we have with\xa0DevonWay\xa0as our gold standard when we work with outside entities.\xa0I don’t\xa0know\xa0anyone I work with outside of NPPD that we have such a go