In [1]:
!pip install -q langchain-google-genai langchain-huggingface langchain-community faiss-cpu python-dotenv

In [2]:
!pip uninstall keras -y

Found existing installation: keras 3.11.3
Uninstalling keras-3.11.3:
  Successfully uninstalled keras-3.11.3


In [3]:
!pip install tf-keras

Collecting tf-keras
  Using cached tf_keras-2.20.1-py3-none-any.whl.metadata (1.8 kB)
Collecting tensorflow<2.21,>=2.20 (from tf-keras)
  Using cached tensorflow-2.20.0-cp311-cp311-win_amd64.whl.metadata (4.6 kB)
Collecting keras>=3.10.0 (from tensorflow<2.21,>=2.20->tf-keras)
  Using cached keras-3.11.3-py3-none-any.whl.metadata (5.9 kB)
Using cached tf_keras-2.20.1-py3-none-any.whl (1.7 MB)
Using cached tensorflow-2.20.0-cp311-cp311-win_amd64.whl (331.8 MB)
Using cached keras-3.11.3-py3-none-any.whl (1.4 MB)
Installing collected packages: keras, tensorflow, tf-keras
  Attempting uninstall: tensorflow
    Found existing installation: tensorflow 2.16.2
    Uninstalling tensorflow-2.16.2:
      Successfully uninstalled tensorflow-2.16.2
Successfully installed keras-3.11.3 tensorflow-2.20.0 tf-keras-2.20.1


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-intel 2.16.2 requires ml-dtypes~=0.3.1, but you have ml-dtypes 0.5.3 which is incompatible.
tensorflow-intel 2.16.2 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 5.29.5 which is incompatible.
tensorflow-intel 2.16.2 requires tensorboard<2.17,>=2.16, but you have tensorboard 2.20.0 which is incompatible.


In [4]:
import json
import os
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document # for creating langChain documents
from dotenv import load_dotenv

In [20]:
load_dotenv()
os.environ["GOOGLE_API_KEY"] = input("Gemini API key: ")

Gemini API key: ....


In [6]:
with open('occams_scraped_data.json', 'r') as f:
    scraped_data = json.load(f)

In [7]:
len(scraped_data)

156

In [8]:
for i in scraped_data[:1]:
    print(i)

{'url': 'https://www.occamsadvisory.com/', 'content': 'Occams Advisory | Global Financing Advisory & Professional Services\nCareer\nBlog\nAwards\nAbout\nServices\nServices\nBSGI\nBusiness Services & Growth Incubation\nFTPS\nFinancial Technology & Payment\n                                                                    Solutions\nCMIB\nCapital Market & Investment Banking\nTC\nTax Advisory and Tax Credits\nEmployer of\n                                                                    Record (EOR)\nWe handle payroll, contracts, taxes, and benefits, so\n                                                                    you can focus on growth.\nStructuring,\n                                                                    Incorporation & Accounting Advisory\nExpert guidance for business formation, legal\n                                                                    structuring, and financial setup.\nProcess\n                                                                  

In [9]:
documents = []

for data in scraped_data:
    content = data['content']
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = splitter.split_text(content)
    for chunk in chunks:
#         print(chunk)
#         print("-"*50)
        documents.append(Document(page_content=chunk, metadata={"url":data['url']}))

print(len(documents))  

3940


In [10]:
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(documents, embeddings)
vectorstore.save_local("faiss_index")




In [11]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.3)

prompt_template = ''' You are a helpful assistant for Occams Advisory. Use only the provided context to answer the question.
If the information is not in the context, say "I don't have enough information to answer that." Personalize with the user's name if provided.

Context: {context}
Question: {question}
Answer: """
'''

prompt = PromptTemplate.from_template(prompt_template)

In [12]:
retriver = vectorstore.as_retriever(search_kwrags={"k":3})

In [14]:
def combine_retrieved_parts(parts):
    return "\n".join(part.page_content for part in parts)

In [15]:
rag_chain = (
    {"context": retriver | combine_retrieved_parts, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [16]:
query = "What services does Occams Advisory offer?"

In [17]:
answer = rag_chain.invoke(query)
answer

'Occams Advisory offers services in three verticals: Business Services & Growth Incubation (BSGI), Capital Markets & Investment Banking (CMIB), and Financial Technology & Payment Solutions (FTPS).  They also handle tax filing,  help with process efficiency, compliance, and tax planning, and offer services related to structuring and incorporation, brand building, digital marketing, and enhancing online presence.'

In [19]:
while True:
    query = input("Question: ")
    if query.lower() == "exit":
        break
    answer = rag_chain.invoke(query)
    print(f'Answer: {answer}\n')

Question: What is this company?
Answer: This is Occams Advisory.  They provide independent services.

Question: what kind of services?
Answer: Occams Advisory offers services in three areas:

* **Streamlining operations, ensuring regulatory adherence, and optimizing tax strategies.**
* **Information Technology Services:** Delivering innovative tech solutions to enhance operations and drive business growth.
* **Digital:** Crafting strong brand identity, engaging customers and fostering active community engagement.

Question: exit
