In [1]:
!pip install -qU langchain langchain-community langchain-text-splitters
!pip install -qU langchain-openai langchain-chroma pypdf

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters  import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
# os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
os.environ["GEMINI_KEY"] = os.getenv('GEMINI_KEY')

In [3]:
# Download the PDF file
import requests

pdf_url = 'https://nibmehub.com/opac-service/pdf/read/An%20Introduction%20to%20Mathematics%20for%20Economics.pdf'
response = requests.get(pdf_url)

pdf_path = 'Mathematics_for_Economics.pdf'
with open(pdf_path, 'wb') as file:
    file.write(response.content)

In [4]:
from typing import List
from langchain.schema import Document

def pdf_extract(pdf_path: str) -> List[Document]:
    print("PDF file text is extracted...")
    loader = PyPDFLoader(pdf_path)
    pdf_text = loader.load()
    return pdf_text
     

In [5]:
# Extract text from the PDF file
pdf_text = pdf_extract(pdf_path)

PDF file text is extracted...


In [6]:
print(pdf_text)



In [7]:
print(pdf_text[0].page_content[:1000])  # Print the first 1000 characters of the first page

AN INTRODUCTION TO  
MATHEMATICS  
FOR ECONOMICS
AKIHITO ASANO
CONTENTS
1  Demand and supply in 
 competitive markets
2  Basic mathematics
3  Financial mathematics
4  Differential calculus 1
5  Differential calculus 2
6  Multivariate calculus
7  Integral calculus
Appendix A Matrix algebra
Appendix B An introduction 
to difference and differential 
equations
ASANOAN INTRODUCTION TO MATHEMATICS FOR ECONOMICS
A concise, accessible introduction to quantitative methods 
for economics and ﬁnance students, this textbook con-
tains lots of practical applications to show why maths is 
necessary and relevant to economics, as well as worked 
examples and exercises to help students learn and prepare 
for exams.
 Introduces mathematical techniques in the context of  
 introductory economics, bridging the gap between the  
 two subjects 
 Written in a friendly conversational style, but with  
 precise presentation of mathematics 
 Explains applications in detail, enabling students to  
 learn 

In [10]:
def pdf_chunk(pdf_text: List[Document]) -> List[Document]:
    
    # Splits extracted PDF text into smaller chunks using RecursiveCharacterTextSplitter.
    print("PDF file text is chunked....")
    text_splitter = RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size=1000,
        chunk_overlap=100,
        length_function=len,
        is_separator_regex=False,
    )
    chunks = text_splitter.split_documents(pdf_text)

    return chunks
     

In [11]:
chunks = pdf_chunk(pdf_text)


PDF file text is chunked....


In [12]:
print(f"Number of chunks = {len(chunks)}")

Number of chunks = 670


In [13]:
print(chunks[100].page_content[:1000])

Much of economic analysis relies on the use of diagrams on which various schedules,
such as demand and supply schedules, are drawn. In the following subsections (and in
Chapter 3), we will introduce various types of function and describe them on diagrams.
As a preparation, we will ﬁrst establish the coordinate plane and related ideas.
Recall the real line we discussed previously. If we place another real line on top of it
and pivot one of them anti-clockwise around zero, then we have two real lines intersecting
perpendicularly at the zeros as in Figure 2.2. The plane made by these two lines (just
imagine placing a piece of paper on the two lines) is called thecoordinate plane. The
horizontal line is called thehorizontal axis and the other one is called thevertical axis.
The intersection of the two axes is called theorigin.
Let us think of the case wherex is taken on the horizontal axis andy is taken on the


In [14]:
import os
from langchain.embeddings import HuggingFaceInstructEmbeddings
# Set the chroma DB path
current_dir = os.getcwd()

persistent_directory = os.path.join(current_dir, "db", "chroma_db_pdf")

In [15]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings


def create_vector_store(chunks: List[Document], db_path: str) -> Chroma:
    # Creates a Chroma vector store from chunked documents.

    print("Chrome vector store is created...\n")
    # embedding_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0, max_tokens=1000)
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key= os.getenv('GEMINI_KEY'))

    # embedding_model = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large", model_kwargs={"device": "cuda"})
    # embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
    db = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=db_path)

    return db

In [16]:
db = create_vector_store(chunks, persistent_directory)

Chrome vector store is created...



In [17]:
def retrieve_context(db: Chroma, query: str) -> List[Document]:
    
    # Retrieves relevant document chunks from the Chroma vector store based on a query.

    retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
    print("Relevant chunks are retrieved...\n")
    relevant_chunks = retriever.invoke(query)

    return relevant_chunks

In [18]:
query = "Hướng dẫn giải bài tập: Một dự án đầu tư đòi hỏi chi phí hiện tại 100 triệu đồng và sẽ đem lại 150 triệu đồng sau 3 năm. Với lãi suất thịnh hành 8% một năm, đánh giá xem có nên thực hiện dự án không?"

relevant_chunks = retrieve_context(db, query)

Relevant chunks are retrieved...



In [19]:
print(f"Number of relevant chunks = {len(relevant_chunks)}")

Number of relevant chunks = 2


In [28]:
for i, chunk in enumerate(relevant_chunks):
  print(f"Chunk-{i}")
  print(chunk)
  print("=======================================")
  print("\n")

Chunk-0
page_content='79 3.6 Logarithms: how many years will it take for my money to double?
1
1
y y = ex
y = x
y = ln x
x
0
Figure 3.6 A graph of the natural logarithmic function.
Rearranging this equation yields:
1.05t = 2. (3.45)
Taking (the natural) logarithms of the both sides of this equation yields:
ln 1.05t = ln 2. (3.46)
Now, using one of the logarithm properties, we rearrange the left hand side of this
equation in order to solve for t:
t ln 1.05 = ln 2. (3.47)
Hence:
t = ln 2
ln 1.05 (years). (3.48)
This is the answer and you can stop here. If you have a scientiﬁc calculator, you can
check that the value of t in (3.48) is roughly 14.21. Remember this value in reference to
the next exercise.
Question How many years will it take for our money to double if interest is compounded
quarterly at a nominal rate of 5 per cent?
Solution
2P = P
⎞
1 + 0.05
4
⎠4t
ln 2 = 4t ln 1.0125
t = ln 2
4l n1.0125
≈ 13.95.' metadata={'page': 95, 'creationdate': '2013-02-22T13:32:35+04:00', 'total_pag

In [21]:
def build_context(relevant_chunks: List[Document]) -> str:
    
   # Builds a context string from retrieved relevant document chunks.

    print("Context is built from relevant chunks")
    context = "\n\n".join([chunk.page_content for chunk in relevant_chunks])

    return context

In [22]:
context = build_context(relevant_chunks)

Context is built from relevant chunks


In [23]:
print(context)

79 3.6 Logarithms: how many years will it take for my money to double?
1
1
y y = ex
y = x
y = ln x
x
0
Figure 3.6 A graph of the natural logarithmic function.
Rearranging this equation yields:
1.05t = 2. (3.45)
Taking (the natural) logarithms of the both sides of this equation yields:
ln 1.05t = ln 2. (3.46)
Now, using one of the logarithm properties, we rearrange the left hand side of this
equation in order to solve for t:
t ln 1.05 = ln 2. (3.47)
Hence:
t = ln 2
ln 1.05 (years). (3.48)
This is the answer and you can stop here. If you have a scientiﬁc calculator, you can
check that the value of t in (3.48) is roughly 14.21. Remember this value in reference to
the next exercise.
Question How many years will it take for our money to double if interest is compounded
quarterly at a nominal rate of 5 per cent?
Solution
2P = P
⎞
1 + 0.05
4
⎠4t
ln 2 = 4t ln 1.0125
t = ln 2
4l n1.0125
≈ 13.95.

193 7.3 Application of integration to ﬁnance
value of a continuous annuity over three years is:
A =

In [24]:
import os
from typing import Dict
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.vectorstores import Chroma

def get_context(inputs: Dict[str, str]) -> Dict[str, str]:
    
    # Creates or loads a vector store for a given PDF file and extracts relevant chunks based on a query.

    pdf_path, query, db_path  = inputs['pdf_path'], inputs['query'], inputs['db_path']

    # Create new vector store if it does not exist
    if not os.path.exists(db_path):
        print("Creating a new vector store...\n")
        pdf_text = pdf_extract(pdf_path)
        chunks = pdf_chunk(pdf_text)
        db = create_vector_store(chunks, db_path)

    # Load the existing vector store
    else:
        print("Loading the existing vector store\n")
        embedding_model = GoogleGenerativeAIEmbeddings(model="text-embedding-3-small", google_api_key=os.getenv('GEMINI_KEY'))
        db = Chroma(persist_directory=db_path, embedding_function=embedding_model)

    relevant_chunks = retrieve_context(db, query)
    context = build_context(relevant_chunks)

    return {'context': context, 'query': query}

In [25]:

template = """ Bạn là mô hình AI được đào tạo để trả lời câu hỏi. Bạn chỉ nên trả lời
câu hỏi đã cho dựa trên ngữ cảnh đã cho.
Câu hỏi: {query}
\n
Ngữ cảnh: {context}
\n
Nếu câu trả lời không có trong ngữ cảnh đã cho, hãy trả lời như sau: Câu trả lời cho câu hỏi này không có sẵn
trong nội dung đã cung cấp.
  """

rag_prompt = ChatPromptTemplate.from_template(template)

llm = ChatGoogleGenerativeAI( model="gemini-2.0-flash", temperature=0, max_tokens=None, timeout=None,max_retries=2)

str_parser = StrOutputParser()

rag_chain = (
    RunnableLambda(get_context)
    | rag_prompt
    | llm
    | str_parser
)
     

In [26]:
answer = rag_chain.invoke({'pdf_path':pdf_path, 'query':query, 'db_path':persistent_directory})

Loading the existing vector store

Relevant chunks are retrieved...



  db = Chroma(persist_directory=db_path, embedding_function=embedding_model)


GoogleGenerativeAIError: Error embedding content: 400 * EmbedContentRequest.model: unexpected model name format
