## KnowledgeAgent Project

In [1]:
from langchain_community.utilities import WikipediaAPIWrapper, ArxivAPIWrapper
from langchain_community.tools import WikipediaQueryRun, ArxivQueryRun
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import Ollama
from langchain.agents import initialize_agent, AgentType, AgentOutputParser
from langchain.tools import Tool
import re

# Custom Output Parser to handle responses
class CustomOutputParser(AgentOutputParser):
    def parse(self, text):
        print("LLM Output:", text)  # Debugging: To see what the LLM returns
        if "Final Answer:" in text:
            final_answer = text.split("Final Answer:")[-1].strip()
            return {"output": final_answer}  # Return final answer directly
        raise ValueError(f"Could not parse LLM output: `{text}`")  # Raise error if format doesn't match

# Initialize Wikipedia tool
wiki_api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
wiki = WikipediaQueryRun(api_wrapper=wiki_api_wrapper)

# Initialize Arxiv tool
arxiv_api_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv = ArxivQueryRun(api_wrapper=arxiv_api_wrapper)

# Initialize the Ollama LLM
llm = Ollama(model="deepseek-r1")

# Define a custom prompt template
prompt_template = """
You are an AI assistant. Use the following format to answer the question:
Thought: Explain your reasoning.
Action: Specify the tool you will use (if any).
Action Input: Provide input for the tool (if any).
Final Answer: Provide the final response.

Question: {input}
"""

# Create a PromptTemplate object
prompt = PromptTemplate(template=prompt_template, input_variables=["input"])

# Define tools for the agent
tools = [
    Tool(
        name="Wikipedia",
        func=wiki.run,
        description="Use this tool to search information on Wikipedia."
    ),
    Tool(
        name="Arxiv",
        func=arxiv.run,
        description="Use this tool to search academic papers on Arxiv."
    )
]

# Initialize the agent with tools and LLM using the custom prompt and output parser
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    agent_kwargs={"output_parser": CustomOutputParser()},  # Now passing as valid instance
    prompt=prompt
)

# Define the query
query = "What is deep learning?"

# Run the agent with only the query
try:
    response = agent.run(query)  # Pass only the query string, not a dictionary
    print("Agent Response:", response)
except AttributeError as e:
    print(f"AttributeError occurred: {e}")
except ValueError as e:
    print(f"ValueError occurred: {e}")


  llm = Ollama(model="deepseek-r1")
  agent = initialize_agent(
  response = agent.run(query)  # Pass only the query string, not a dictionary




[1m> Entering new AgentExecutor chain...[0m
LLM Output: <think>
Okay, so I need to figure out what deep learning is. Hmm, I've heard the term before in tech and machine learning contexts, but I'm not entirely sure about the details. Let me start by breaking down the word itself. "Deep" usually implies something complex or multi-layered, and "learning" suggests some kind of training or process.

I remember that deep learning is a subset of machine learning, which is part of artificial intelligence. So maybe it's a type of AI that uses algorithms inspired by neural networks. Neural networks in the human brain have layers—like neurons connected in multiple layers—and perhaps deep learning refers to having many such layers in machines as well.

Wait, I think "deep" here refers to having several layers of neurons or artificial neurons. Traditional machine learning models might use a few layers, but deep learning uses many more, allowing them to learn more complex patterns from data. So 

In [2]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

# Wikipedia tool setup
api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=200)
wiki = WikipediaQueryRun(api_wrapper=api_wrapper)

from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Web scraping and vectorization setup
loader = WebBaseLoader("https://docs.smith.langchain.com/")
docs = loader.load()
documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)
vectordb = FAISS.from_documents(documents, OllamaEmbeddings(model="deepseek-r1"))  # Use deepseek-r1 here
retriever = vectordb.as_retriever()

# Create retriever tool
from langchain.tools.retriever import create_retriever_tool
retriever_tool = create_retriever_tool(retriever, "langsmith_search",
                                      "Search for information about LangSmith. For any questions about LangSmith, you must use this tool!")

# Arxiv Tool setup
from langchain_community.utilities import ArxivAPIWrapper
from langchain_community.tools import ArxivQueryRun

arxiv_wrapper = ArxivAPIWrapper(top_k_results=1, doc_content_chars_max=200)
arxiv = ArxivQueryRun(api_wrapper=arxiv_wrapper)

tools = [wiki, arxiv, retriever_tool]

# Load the deepseek-r1 model using Ollama
from langchain_community.llms import Ollama
llm = Ollama(model="deepseek-r1")  # Change to deepseek-r1

# LangChain LLMChain setup with custom prompt template
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Define a custom prompt template
prompt_template = PromptTemplate(
    input_variables=["question"],
    template="Answer the following question: {question}"
)

# Create LangChain LLMChain using the deepseek-r1 model
chain = LLMChain(llm=llm, prompt=prompt_template)

# Run the chain with a question
response = chain.run("What is deep learning?")
#print(response)

# Initialize the agent with the deepseek-r1 model
from langchain.agents import initialize_agent, AgentType
from langchain.tools import Tool
from langchain.utilities import WikipediaAPIWrapper, ArxivAPIWrapper

# Define your tools
wiki = Tool(
    name="Wikipedia",
    func=WikipediaAPIWrapper().run,
    description="Useful for searching information on Wikipedia."
)

arxiv = Tool(
    name="Arxiv",
    func=ArxivAPIWrapper().run,
    description="Useful for searching academic papers on Arxiv."
)

# Custom retriever tool
def retriever_tool(query: str) -> str:
    return f"Retrieved information for: {query}"

retriever = Tool(
    name="Retriever",
    func=retriever_tool,
    description="Useful for retrieving custom information."
)

# List of tools
tools = [wiki, arxiv, retriever]

# Initialize the agent with the deepseek-r1 model
agent = initialize_agent(
    tools,
    llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,  # Specify the agent type
    verbose=True  # Optional: Set to True to see intermediate steps
)

# Run the agent with a query
query = "What is the math?"
response = agent.run(query)

# Print the response
print(response)


USER_AGENT environment variable not set, consider setting it to identify your requests.
  vectordb = FAISS.from_documents(documents, OllamaEmbeddings(model="deepseek-r1"))  # Use deepseek-r1 here
  chain = LLMChain(llm=llm, prompt=prompt_template)




[1m> Entering new AgentExecutor chain...[0m


ValueError: An output parsing error occurred. In order to pass this error back to the agent and have it try again, pass `handle_parsing_errors=True` to the AgentExecutor. This is the error: Could not parse LLM output: `<think>
Okay, so I need to figure out what "math" refers to in this context. The user mentioned that I have access to Wikipedia, Arxiv, and a Retrieval tool. Hmm, first off, when someone says "the math," it's a bit vague because it can refer to mathematics in general or specific instances like mathematical formulas, mathematicians, or even something like the "math" used in everyday language, maybe referencing numerals.

Since I don't have prior knowledge of what they're asking for beyond this specific question, I should probably start by looking up some basic information about mathematics. That makes sense because if it's a general term, knowing its basics would help clarify any confusion.

So, the first step is to use Wikipedia to get an overview. I'll query "Mathematics" on Wikipedia. The summary says math is the study of quantity, structure, space, and change. It's used as a tool in various fields like natural science, engineering, medicine, and economics. That gives me a basic understanding.

Now that I know it's about numbers, structures, and patterns, maybe "the math" refers to something specific, but without more context, it's hard to tell. Perhaps the user is asking for an introduction or definitions of different areas within mathematics, like algebra or calculus? Or maybe they're referring to mathematical concepts in a particular context, such as statistics?

I should check if there are any other relevant sources available through Arxiv. Maybe there are recent papers on foundational math topics. But given that I don't have access to the internet, I can only use these tools as provided.

Alternatively, perhaps using the Retrieval tool could help me get more precise information based on some keywords related to "math." But without knowing exactly what they need, it's tricky. Maybe they just want a summary or an overview of mathematics.

Given that, I think providing the general definition from Wikipedia is a good start. If more details are needed, further actions can be taken, but for now, this seems like the best initial step to understand "the math."
</think>

The term "math" generally refers to mathematics, which is the study of numbers, structures, patterns, and change. It serves as a fundamental tool across various disciplines including natural sciences, engineering, medicine, and economics. This broad field encompasses different branches such as algebra, calculus, geometry, and statistics.

**Answer:**  
Mathematics is the systematic study of quantity, structure, space, and change, providing a foundational framework for understanding patterns and relationships in various scientific and practical contexts.`
For troubleshooting, visit: https://python.langchain.com/docs/troubleshooting/errors/OUTPUT_PARSING_FAILURE 

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

# List of websites to scrape
urls = [
    "https://www.zoopla.co.uk/discover/buying/answers-to-your-most-common-mortgage-questions/",
    "https://moneysavingguru.co.uk/info/what-will-a-mortgage-advisor-ask-me/"
]

# Web scraping setup
docs = []
for url in urls:
    loader = WebBaseLoader(url)
    docs.extend(loader.load())

# Text splitting
documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(docs)

# Embedding and vector store
vectordb = FAISS.from_documents(documents, OllamaEmbeddings(model="deepseek-r1"))
retriever = vectordb.as_retriever()

# Now, you can use retriever to search for mortgage-related information


USER_AGENT environment variable not set, consider setting it to identify your requests.
  vectordb = FAISS.from_documents(documents, OllamaEmbeddings(model="deepseek-r1"))


In [None]:
# Ask a question to retrieve information related to mortgages
query = "What are the main types of mortgages available in the UK?"

# Use the retriever to search for information
results = retriever.get_relevant_documents(query)

# Print only the page_content
for doc in results:
    print(doc.page_content)


An experienced mortgage advisor will guide you through the pros and cons of each major product type to help identify your best fit. 
Their role is to find you a mortgage that’s not just affordable today but over the full loan term.
7. Do You Have Children or Plans to Start a Family?
Your family situations or plans matter.
If you already have kids, be honest about any childcare, education or maintenance costs.
And if you’re looking to have children soon, then your lender will factor the potential loss of income (e.g. paternity/maternity leave) and higher family spending into their calculations.
While starting a family is an exciting time, mortgage lenders must ensure your loan remains affordable even with the added costs that children bring. 
Openly discussing your family plans allows them to properly “stress test” affordability over the long mortgage term.
9. What Are Your Potential Long-Term Housing Plans?
Credit cards
Personal loans
Hire purchase agreements
Court judgements/decrees
D

In [None]:
from langchain_community.document_loaders import WebBaseLoader, PDFLoader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI
from langchain.agents import initialize_agent, AgentType
from langchain.tools import Tool

# Step 1: Define the web scraping and PDF data loading (Mortgage-specific content)
urls = [
    "https://www.zoopla.co.uk/discover/buying/answers-to-your-most-common-mortgage-questions/",
    "https://moneysavingguru.co.uk/info/what-will-a-mortgage-advisor-ask-me/"
]

# Load PDF file data
pdf_loader = PDFLoader("/mnt/data/AI Mortgage Advisor Project.pdf")
pdf_docs = pdf_loader.load()

# Scraping data from the websites
docs = []
for url in urls:
    loader = WebBaseLoader(url)
    docs.extend(loader.load())

# Combine web scraped data and PDF data
all_docs = docs + pdf_docs

# Step 2: Split the documents into chunks using RecursiveCharacterTextSplitter
documents = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200).split_documents(all_docs)

# Step 3: Use Ollama Embeddings to convert documents into vectors
vectordb = FAISS.from_documents(documents, OllamaEmbeddings(model="deepseek-r1"))
retriever = vectordb.as_retriever()

# Define a custom retriever tool to fetch mortgage-related info
def retriever_tool(query: str) -> str:
    return f"Retrieved mortgage-related information for: {query}"

retriever_tool = Tool(
    name="Mortgage Information Retriever",
    func=retriever_tool,
    description="Useful for retrieving mortgage-related information."
)

# Step 4: Set up the LLM chain for personalized responses based on user inputs
llm = OpenAI(model="gpt-4")

# Define the prompt template for personalized mortgage responses
summary_prompt = PromptTemplate(
    input_variables=["user_name", "purpose", "credit_score", "loan_amount", "loan_term", "interest_rate", "down_payment"],
    template="""
    Here is a summary for {user_name} regarding their mortgage inquiry:
    - Purpose: {purpose}
    - Credit Score: {credit_score}
    - Estimated Loan Amount: {loan_amount}
    - Loan Term: {loan_term} years
    - Interest Rate: {interest_rate}
    - Down Payment: {down_payment}
    
    Based on this information, please provide a personalized recommendation from the available mortgage options.
    """
)

# Create LLMChain using GPT-4 and the defined summary prompt
llm_summary_chain = LLMChain(llm=llm, prompt=summary_prompt)

# Step 5: Initialize the agent for interactive conversation
agent = initialize_agent(
    tools=[retriever_tool],
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Step 6: Define a function to handle mortgage-related user queries
def handle_mortgage_query(user_query):
    # Check if the query contains mortgage-related keywords
    mortgage_keywords = ['mortgage', 'loan', 'interest rate', 'down payment', 'credit score', 'repayment', 'refinance']
    
    # If the question is mortgage-related, use the retriever to fetch information
    if any(keyword in user_query.lower() for keyword in mortgage_keywords):
        return agent.run(user_query)
    else:
        # If the question is not mortgage-related, return a polite response
        return "I can only assist with mortgage-related queries."

# Step 7: Example use case - Mortgage-related user interaction
print("Welcome to our mortgage assistant service! I’m here to help with your mortgage questions.")
user_name = input("May I know your name to make this conversation more personal? ")

# Example questions to user
print(f"Hello {user_name}, let’s get started! What is the purpose of your mortgage?")
purpose = input("Your response: ")

print("Got it! Now, could you share your credit score range? (e.g., Excellent, Good, Fair)")
credit_score = input("Your response: ")

print("What’s your estimated budget or loan amount?")
loan_amount = input("Your response: ")

print("How long do you plan to take the loan for? (15, 20, 30 years)")
loan_term = input("Your response: ")

print("Do you prefer a fixed or adjustable interest rate?")
interest_rate = input("Your response: ")

print("How much down payment can you afford?")
down_payment = input("Your response: ")

# Step 8: Generate the personalized summary using LLMChain
generated_summary = llm_summary_chain.run(
    user_name=user_name,
    purpose=purpose,
    credit_score=credit_score,
    loan_amount=loan_amount,
    loan_term=loan_term,
    interest_rate=interest_rate,
    down_payment=down_payment
)

print("\nGenerated Mortgage Summary:")
print(generated_summary)

# Step 9: Ask if the user is interested in this mortgage option
interest = input("\nAre you interested in this mortgage option? (Yes/No) ")

if interest.lower() == "yes":
    # Send information to human mortgage advisor
    print("Thank you! I will now send your details to a human mortgage advisor.")
    # Send the email to the advisor (you will need to implement this functionality as needed)
else:
    print("Thank you for your time. Feel free to reach out if you have more questions!")
