In [9]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA


#Read the PDF'sfrom the folder
loader=PyPDFDirectoryLoader('G:\\Python coding\\Metropolia\\Gen AI Projects')

documents=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

final_doc=text_splitter.split_documents(documents)
final_doc[0]

Document(metadata={'source': 'G:\\Python coding\\Metropolia\\Gen AI Projects\\Market_Segmentation_Analysis.pdf', 'page': 0}, page_content='Management for Professionals\nMarket\nSegmentation\nAnalysis\nSara Dolnicar\nBettina Grün\nFriedrich Leisch\nUnderstanding It, Doing It,\nand Making It Useful')

In [11]:
len(final_doc)

1006

In [23]:
# Embedding technique using HuggingFace

hugging_face_embeddings= HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",   #or use sentence-transformers/all-MiniLM-16-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)





modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [37]:
import numpy as np

np.array(hugging_face_embeddings.embed_query(final_doc[0].page_content))

#print(np.array(huggingface_embeddings.embed_query(final_documents[0].page_content)))
print(np.array(hugging_face_embeddings.embed_query(final_doc[0].page_content)).shape)

(384,)


In [39]:
## VectorStore Creation
vectorstore=FAISS.from_documents(final_doc[:384],hugging_face_embeddings)

In [41]:
## Query using Similarity Search
query="WHAT IS Step2 in Part 2 of document?"
relevant_docments=vectorstore.similarity_search(query)


print(relevant_docments[0].page_content)

Chapter 4
Step 2: Specifying the Ideal Target
Segment
4.1 Segment Evaluation Criteria
The third layer of market segmentation analysis (illustrated in Fig. 2.1) depends
primarily on user input. It is important to understand that – for a market segmenta-
tion analysis to produce results that are useful to an organisation – user input cannot
be limited to either a brieﬁng at the start of the process, or the development of a
marketing mix at the end. Rather, the user needs to be involved in most stages,
literally wrapping around the technical aspects of market segmentation analysis.
After having committed to investigating the value of a segmentation strategy in
Step 1, the organisation has to make a major contribution to market segmentation
analysis in Step 2. While this contribution is conceptual in nature, it guides many of
the following steps, most critically Step 3 (data collection) and Step 8 (selecting
one or more target segments). In Step 2 the organisation must determine two


In [43]:
retriever=vectorstore.as_retriever(search_type="similarity",search_kwargs={"k":3})
print(retriever)

tags=['FAISS', 'HuggingFaceBgeEmbeddings'] vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000028A2D69AD80> search_kwargs={'k': 3}


In [47]:
import os
os.environ['HUGGINGFACEHUB_API_TOKEN']="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"

In [49]:
from langchain_community.llms import HuggingFaceHub

hf=HuggingFaceHub(
    repo_id="mistralai/Mistral-7B-v0.1",
    model_kwargs={"temperature":0.1,"max_length":500}

)
query="WHAT IS Step2 in Part 2 of document?"
hf.invoke(query)

'WHAT IS Step2 in Part 2 of document?\n\nStep 2 is the second step in the process of documenting a project. It is the step where the project is documented in a way that is understandable to the reader.\n\nWhat is the purpose of Step 2 in Part 2 of document?\n\nThe purpose of Step 2 in Part 2 of document is to provide a clear and concise description of the project. This description should be written in a way that is easy to understand and follow.\n\n'

In [55]:
prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

{context}
Question:{question}

Helpful Answers:
 """

prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])

In [57]:
retrievalQA=RetrievalQA.from_chain_type(
    llm=hf,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt":prompt}
)

In [59]:
query="""what is the step9 talks about?"""
# Call the QA chain with our query.
result = retrievalQA.invoke({"query": query})
print(result['result'])


Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context

28 3 Step 1: Deciding (not) to Segment
Task
Who is
responsible? Completed?
Ask if the organisation’s culture is market-oriented. If yes, proceed. If 
no, seriously consider not to proceed.
Ask if the organisation is genuinely willing to change. If yes, proceed. 
If no, seriously consider not to proceed.
Ask if the organisation takes a long-term perspective. If yes, proceed. 
If no, seriously consider not to proceed.
Ask if the organisation is open to new ideas. If yes, proceed. If no, 
seriously consider not to proceed.
Ask if communication across organisational units is good. If yes, 
proceed. If no, seriously consider not to proceed.
Ask if the organisation is in the position to make significant
(structural) changes. If yes, proceed. If no, seriously consider not to 
proceed.
Ask if the organisation has sufficient financial resources to support a 
market segme