In [34]:
import os
import shutil
import glob
from dotenv import load_dotenv
import gradio as gr

In [86]:
from langchain.document_loaders import DirectoryLoader, TextLoader, PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_chroma import Chroma
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
import numpy as np
import plotly.graph_objects as go
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import SystemMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
import gradio as gr
from langchain_groq import ChatGroq


In [98]:
MODEL = "llama-3.3-70b-versatile"
db_name = "vector_db"

In [82]:
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

In [99]:
import glob
import os
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

folders = glob.glob("knowledge-base/*")

def add_metadata(doc, doc_type):
    doc.metadata["doc_type"] = doc_type
    return doc

def load_documents_from_folders(folders):
    documents = []
    for folder in folders:
        doc_type = os.path.basename(folder)
        loader = DirectoryLoader(
            folder,
            glob="**/*.pdf",
            loader_cls=PyPDFLoader 
        )
        folder_docs = loader.load()
        documents.extend([add_metadata(doc, doc_type) for doc in folder_docs])
    return documents


In [113]:
from langchain.schema import Document
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
import json

def create_profiles(documents):
    profiles = []
    for doc in documents:
        text = doc.page_content
        llm = ChatGroq(temperature=1, model=MODEL)
        prompt_template = ChatPromptTemplate.from_template(
            """
            Extract each profile from the following text.
            Return a JSON object, without any text surrounding it, Not even ```json..```,, 
            where each item has: "name", "title", "company", "description", etc.
            Text:
            ---
            {text}
            ---
            """
        )
        chain = LLMChain(llm=llm, prompt=prompt_template)
        result = chain.run({"text": text})
        # print(f"Result1 : {result}")
        items = json.loads(result)
        print(items)
        print(type(items))
        # for item in result:
        #     print(item)
        #     profiles.append(item)
        for item in items:
            profiles.append(str(item))
    return profiles


In [101]:
def create_vectorstore_from_profiles(profiles):
    agent_chunks = []
    for profile in profiles:
        profile_doc = Document(
                    page_content=profile,
                )
        agent_chunks.append(profile_doc)

    embeddings = OpenAIEmbeddings()
    if os.path.exists(db_name):
        Chroma(persist_directory=db_name, embedding_function=embeddings).delete_collection()
    vectorstore = Chroma.from_documents(
        documents=agent_chunks,
        embedding=embeddings,
        persist_directory=db_name
    )
    print(f"✅ Vectorstore created with {vectorstore._collection.count()} agent profiles.")
    return vectorstore

In [121]:
# ─────────────────────────────────────────────────────────────────────────────
# Globals & Setup
# ─────────────────────────────────────────────────────────────────────────────
UPLOAD_DIR = "C:/Users/nisar/Desktop/Applications/Processes/Sociosquares/Projects/projects/llm_engineering/Provisor_project/knowledge-base/uploads"
vectorstore = None
user_business = None  
referral_types = None 

llm = ChatGroq(temperature=1, model=MODEL)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

extract_prompt = ChatPromptTemplate.from_template("""
From the conversation below, extract two things:
1) business: What business the user is in (if they've told you yet).
2) referral_types: The kinds of referral partners the user wants (as a comma-separated string).

If the user hasn’t provided one or both, set its value to an empty string.

Return a JSON object exactly like without any text surrounding it, Not even ```json..```:
{{  
  "business": "...",  
  "referral_types": "..."  
}}

Conversation:
---
{history}
---  
""")
extract_chain = LLMChain(llm=llm, prompt=extract_prompt)

def handle_pdf_upload(files):
    global vectorstore
    if not files:
        return "No files uploaded."
    os.makedirs(UPLOAD_DIR, exist_ok=True)
    for f in files:
        dest = os.path.join(UPLOAD_DIR, os.path.basename(f.name))
        shutil.copy(f.name, dest)
    docs = load_documents_from_folders([UPLOAD_DIR])
    profiles = create_profiles(docs)
    vectorstore = create_vectorstore_from_profiles(profiles)
    return f"{len(files)} PDF(s) processed and added to the vectorstore."


def qna_chat(user_message, history):
    global user_business, referral_types, vectorstore

    if vectorstore is None:
        return "📂 Please upload and process your PDFs before chatting."

    convo = []
    for u, a in history:
        convo.append(f"User: {u}")
        convo.append(f"Assistant: {a}")
    convo.append(f"User: {user_message}")
    history_str = "\n".join(convo)

    extracted = extract_chain.run({"history": history_str})
    print(f"Extracted: {extracted}")
    data = json.loads(extracted)
    if not user_business and data.get("business"):
        user_business = data["business"].strip()
    if not referral_types and data.get("referral_types"):
        referral_types = data["referral_types"].strip()

    if not user_business or not referral_types:
        return llm.invoke([{"role":"system","content":
            "You are an expert assistant who guides a user to find referral partners. "
            "First ask what business they are in, then ask who good referral partners are. "
            "Wait until both are given."
        }, 
        {"role":"user","content": history_str}]).content

    semantic_query = (
        f"I am in the {user_business} business and want referral partners who are {referral_types}."
    )
    retriever = vectorstore.as_retriever()
    system_prompt = SystemMessagePromptTemplate.from_template(
        "You are an expert assistant matching users to business referral contacts only from the retrieved documents. "
    )
    human_prompt = HumanMessagePromptTemplate.from_template(
        "Context:\n{context}\n\nQuestion: {question}"
    )
    chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt])
    convo_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        memory=memory,
        combine_docs_chain_kwargs={"prompt": chat_prompt}
    )
    result = convo_chain({"question": semantic_query})
    return result["answer"]


def reset_state():
    global user_business, referral_types, memory
    user_business = None
    referral_types = None
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    return "🔄 Search reset! Please tell me what business you are in."

# ─────────────────────────────────────────────────────────────────────────────
# Gradio App
# ─────────────────────────────────────────────────────────────────────────────
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Referral Partner Finder")
    with gr.Row():
        uploader = gr.File(
            label="Upload Agent Profile PDFs",
            file_types=[".pdf"],
            file_count="multiple"
        )
        up_out = gr.Textbox(label="Status")
        reset_btn   = gr.Button("🔄 Restart Search")
        reset_status = gr.Textbox(label="Reset Status")
    gr.Button("Process PDFs").click(
        fn=handle_pdf_upload, inputs=uploader, outputs=up_out
    )
    reset_btn.click(fn=reset_state, inputs=[], outputs=reset_status)

    gr.ChatInterface(
        fn=qna_chat,
        title="Referral Partner Q&A",
        description="Bot will guide you through a few questions, then find matching profiles.",
        chatbot=gr.Chatbot()
    )

demo.launch(inbrowser=True)


  chatbot=gr.Chatbot()


* Running on local URL:  http://127.0.0.1:7906

To create a public link, set `share=True` in `launch()`.




{'profiles': [{'name': 'Angela Cearns', 'title': 'Consultant & Service', 'company': 'Ask Kevin', 'description': '', 'location': 'San Francisco, CA United States', 'phone': '650.733.9898', 'email': 'angela@askevin.com', 'website': 'https://www.askevin.com/aboutus'}, {'name': 'Jonathan Bello', 'title': 'Founder/CEO & Principal Accountant', 'company': 'One 8 Solutions, LLC', 'description': 'Numbers tell half the story. Let us show you the rest. We are a Client Accounting Services (CAS) firm acting as an outsourced accounting department where bookkeeping is a component of those services.', 'location': 'W Newton, MA 02465 United States', 'phone': '617-332-8522', 'email': 'jbello@one-8.com', 'website': 'www.one8solutions.com'}, {'name': 'Carrie Beam', 'title': 'Data Scientist', 'company': 'Carrie Beam Consulting', 'description': 'Fractional data scientist: user requirements, data architecture, algorithm selection and design. Analytics.', 'location': 'Walnut Creek, CA 94596 United States', 'p

In [None]:
#Tool is user specific.
