In [1]:
import os

In [2]:
from dotenv import load_dotenv

In [3]:
load_dotenv('/home/santhosh/Projects/courses/Pinnacle/.env')

True

In [4]:
load_dotenv('D:/projects/env')

False

In [5]:
OPENAI_API_KEY = os.environ['OPENAI_API_KEY']

In [6]:
import langchain
import langchain_community

In [7]:
langchain.__version__, langchain_community.__version__

('0.2.14', '0.2.12')

In [8]:
import tiktoken

# Data

In [9]:
from langchain_community.document_loaders import UnstructuredWordDocumentLoader, UnstructuredPDFLoader

In [10]:
from langchain_community.document_loaders import PyMuPDFLoader

In [11]:
from langchain_text_splitters import CharacterTextSplitter

https://info.email.online.hbs.edu/strategy-formulation-ebook

In [12]:
loader = PyMuPDFLoader(file_path='how-to-formulate-successful-business-strategy.pdf', extract_images=False)

In [13]:
data = loader.load()

In [14]:
len(data)

23

In [15]:
text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=500,
    chunk_overlap=0,
    is_separator_regex=False,
)

In [16]:
texts = text_splitter.split_documents(data)

In [17]:
len(texts)

69

In [18]:
texts[0]

Document(metadata={'source': 'how-to-formulate-successful-business-strategy.pdf', 'file_path': 'how-to-formulate-successful-business-strategy.pdf', 'page': 0, 'total_pages': 23, 'format': 'PDF 1.7', 'title': 'How to Formulate a Successful Business Strategy', 'author': 'Harvard Business School Online', 'subject': '', 'keywords': '', 'creator': 'Adobe InDesign 17.4 (Macintosh)', 'producer': 'Adobe PDF Library 16.0.7', 'creationDate': "D:20221018124609-04'00'", 'modDate': "D:20221018124635-04'00'", 'trapped': ''}, page_content='How to Formulate \na\xa0Successful \nBusiness\xa0Strategy')

In [19]:
encoding = tiktoken.encoding_for_model(model_name='gpt-4o-mini')

In [20]:
total_embedding_tokens = 0
for i in range(len(data)):
    num_tokens = len(encoding.encode(data[i].page_content))
    total_embedding_tokens += num_tokens

In [21]:
total_embedding_tokens

5956

# Embeddings

In [22]:
from langchain_openai import OpenAIEmbeddings

In [23]:
embedding_model = OpenAIEmbeddings(model='text-embedding-3-small', show_progress_bar=True)

In [29]:
from langchain.indexes import SQLRecordManager, index

In [24]:
from langchain_chroma import Chroma

In [25]:
db = Chroma(persist_directory='./HBS_db', collection_name='strategy', 
            embedding_function=embedding_model, collection_metadata={"hnsw:space": "cosine"})

In [26]:
len(db.get()['ids'])

0

In [27]:
namespace = f"hbs/strategy"

In [30]:
record_manager = SQLRecordManager(namespace, db_url="sqlite:///hbs_strategy.sql")

In [31]:
record_manager.create_schema()

In [32]:
record_manager.list_keys()

[]

In [33]:
index(docs_source=texts, record_manager=record_manager, vector_store=db, cleanup='incremental' ,source_id_key='source')

  0%|          | 0/1 [00:00<?, ?it/s]

{'num_added': 69, 'num_updated': 0, 'num_skipped': 0, 'num_deleted': 0}

In [34]:
len(db.get()['ids'])

69

# Retriever

http://www.cs.bilkent.edu.tr/~canf/CS533/hwSpring14/eightMinPresentations/handoutMMR.pdf

In [36]:
retriever = db.as_retriever(search_type="mmr", 
                            search_kwargs={"k": 3, "score_threshold": 0, "fetch_k":10, "lambda_mult":0.9})

In [37]:
docs = retriever.invoke("wat role do employees play?")

  0%|          | 0/1 [00:00<?, ?it/s]

In [38]:
for i, doc in enumerate(docs):
    # print(doc.metadata)
    print(i, doc.page_content, end='\n\n')

0 aware of your company’s goals, how and why you chose those goals, and what they can 
do to help reach them, you can create an increased sense of responsibility throughout 
your organization.
This can also have trickle-down effects. For instance, if a manager isn’t clear on your 
organization’s strategy or the reasoning used to craft it, they could make decisions that 
counteract it. With one vision to unite around, everyone at your organization can act 
with a broader strategy in mind.

1 How can my business create value for employees? 
And how can my business create value by 
collaborating with suppliers? Think of a company’s 
strategy as an answer to these three questions.”
HBS Professor Felix Oberholzer-Gee 
in Business Strategy

2 generates for its 
shareholders. 
PEOPLE
refers to a company’s 
commitment to making 
a societal impact in 
communities locally and 
abroad. 
PLANET
refers to the impact 
an organization 
makes on the 
environment.



# With score

In [39]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import chain

In [40]:
@chain
def retriever(query: str) -> List[Document]:
    docs, scores = zip(*db.similarity_search_with_score(query, k=3))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [41]:
docs = retriever.invoke("wat role do employees play?")

  0%|          | 0/1 [00:00<?, ?it/s]

In [42]:
for i, doc in enumerate(docs):
    print(doc.metadata['score'])
    print(i, doc.page_content, end='\n\n')

0.5587440674862967
0 aware of your company’s goals, how and why you chose those goals, and what they can 
do to help reach them, you can create an increased sense of responsibility throughout 
your organization.
This can also have trickle-down effects. For instance, if a manager isn’t clear on your 
organization’s strategy or the reasoning used to craft it, they could make decisions that 
counteract it. With one vision to unite around, everyone at your organization can act 
with a broader strategy in mind.

0.5931501501426615
1 How can my business create value for employees? 
And how can my business create value by 
collaborating with suppliers? Think of a company’s 
strategy as an answer to these three questions.”
HBS Professor Felix Oberholzer-Gee 
in Business Strategy

0.6429791127071117
2 execution requires organization-wide effort.
Crafting a strategy is crucial, but it can’t be successful unless 
it’s effectively and artfully communicated to all employees 
so they feel empowered 