In [2]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [3]:
loader=DirectoryLoader('./Data',glob='*.pdf',loader_cls=PyPDFLoader,show_progress=True)
documents=loader.load()

  0%|          | 0/2 [00:00<?, ?it/s]parsing for Object Streams
100%|██████████| 2/2 [00:05<00:00,  2.54s/it]


In [16]:
print(type(documents))
print(len(documents))
print(documents[143].page_content)
print(documents[143].metadata)

<class 'list'>
144
 
 265 
BEHAVIORAL 
QUESTIONS 
"Roll with the Punches" 
Mike Tyson once said, “Everybody’s got a plan until they get punched in the face.” That’s what a 
confrontational question feels like the first time you get it, and you'll almost certainly get one. 
The most important part of handling these tough questions is not to appear rattled and 
remain composed. Many interviewees fall into a spiral after a tough question and never 
recover for the rest of the interview, which is exactly what your interviewer is trying to screen for. This advice 
is similar to our recommendation on navigating questions when you don't know the answer; the difference is 
that these questions are intentionally phrased to assess how you respond to being provoked with negative 
criticism. 
A few examples of these “Mike Tyson Questions” are: 
 Why did you not land an internship offer last summer? 
 Do you not have any other offers on the table right now? Would I be correct in assuming we are one

In [17]:
splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
docs=splitter.split_documents(documents)    
print(len(docs))
print(docs[143].page_content)
print(docs[143].metadata)

396
http://breakingintowallstreet.com 
http://www.mergersandinquisitions.com 
 
61 
 
 
 
Merger Model Questions & Answers – Basic 
 
You don’t need to understand merger models as well as an M&A banker does, but you 
do need to more than just the basics, especially if you’ve had a finance internship or full-
time job before. 
 
It’s important to know the effects of an acquisition, and understand concepts such as 
synergies and why Goodwill & Other Intangibles actually get created. 
 
One thing that’s not important?  Walking through how all 3 statements are affected by 
an acquisition.  In 99% of cases, you only care about the Income Statement in a merger 
model (despite rumors to the contrary). 
 
1. Walk me through a basic merger model. 
 
“A merger model is used to analyze the financial profiles of 2 companies, the purchase 
price and how the purchase is made, and determines whether the buyer’s EPS increases 
or decreases.
{'source': 'Data\\400 Questions & Technicals.pdf', 'page': 60

In [32]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS

In [33]:
import warnings
warnings.filterwarnings('ignore')

In [34]:
embeddigs=HuggingFaceBgeEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
db=FAISS.from_documents(docs,embeddigs)

In [45]:
api_key='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'

from langchain.llms import HuggingFaceHub
llm = HuggingFaceHub(
    huggingfacehub_api_token=api_key,
    repo_id="google/flan-t5-large",
    model_kwargs={
        "temperature": 0.5,
        "top_p": 0.85,
        "max_length": 150  # Increase max_length for longer outputs
    }
)

In [46]:
llm('Hello, how are you?')

"I'm fine."

In [47]:
db.similarity_search('What is a Relative Valuation?',k=3)

[Document(metadata={'source': 'Data\\WSP_RedBook_Sample.pdf', 'page': 19}, page_content='49 \nVALUATION \nQUESTIONS \nFor example, an analyst valuing an acquisition target may look at the past premiums and values paid on \ncomparable transactions to determine what the acquirer must realistically expect to pay. The analyst may also \nvalue the company using a DCF to help show how far market prices are from intrinsic value estimates.  \nAnother example of when the DCF and comps approaches can be used together is when an investor considers \ninvesting in a business – the analyst may identify investing opportunities where comps-derived market values \nfor companies are significantly lower than valuations derived using a DCF (although it bears repeating that the \nDCF’s sensitivity to assumptions is a frequent criticism). \nWould you agree with the statement that relative valuation relies less on the discretionary \nassumptions of individuals? \nThat could be argued as an inaccurate stateme

In [52]:
class RAG_Chatbot:
    def __init__(self, db, llm):
        self.db=db
        self.llm=llm
    def generate(self, question):
        docs=self.db.similarity_search(question,k=3)
        context='\n'.join([doc.page_content for doc in docs])
        context_metadata='\n'.join([doc.metadata['source'] for doc in docs])
        prompt=f'''Use the following pieces of information to answer the user's question.
                  If you don't know the answer, just say that you don't know, don't try to make up an answer.
                  Context: {context}
                  Question: {question}
                  Only return the helpful answer below and nothing else'''
        response=self.llm(prompt)
        return response, context_metadata

In [53]:
rag=RAG_Chatbot(db,llm)
rag.generate('What is a Relative Valuation?')

('an analyst valuing an acquisition target may look at the past premiums and values paid on comparable transactions to determine what the acquirer must realistically expect to pay',
 'Data\\WSP_RedBook_Sample.pdf\nData\\400 Questions & Technicals.pdf\nData\\WSP_RedBook_Sample.pdf')

In [55]:
import pickle

with open('db.pkl','wb') as f:
    pickle.dump(db,f)