### Loading PDF and Removing /n

In [43]:
file_path = "../data/ug_student_handbook.pdf"

In [None]:
from langchain_community.document_loaders import PyPDFLoader
import pprint
loader = PyPDFLoader(file_path)


In [8]:
pages_pdf = loader.load()

In [9]:
#Removing \n in pages
for page in pages_pdf:
    page.page_content = ' '.join(page.page_content.split())

### Document Splitting with Recusrive CharacterTextSplitter

In [10]:
import uuid
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [11]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap =50
)

In [12]:
texts = text_splitter.split_documents(pages_pdf)

In [13]:
#metadata list only
metadata_list = [text.metadata for text in texts]

In [14]:
#id_list
id_list = [str(uuid.uuid4()) for _ in texts]


In [15]:
id_list

['356ea3db-e2e4-4475-ae04-6ba18c1ad5f3',
 'b0e84474-3640-4704-888e-ada433e56572',
 'a7b1261a-41ba-4cd1-8d8a-596ae9f61ffd',
 '42c32110-b5dc-4f07-8f48-930ef49b39cf',
 'd6f2baac-198d-4a81-9f62-b113f53e5fa9',
 '17409e67-72c5-461b-af96-25ea17ddcc42',
 '65323618-9f5f-40c5-a837-c5fbbcb87959',
 '25505394-ec0c-4e48-a99e-2eff4683eeee',
 '4b6bee28-2c09-43f6-b05f-35293b267391',
 '935d5383-204e-45bf-8bb3-297d128a83b7',
 '21ce8945-c069-44d7-96d7-fbad1b84d9ec',
 'b7318ee1-8833-4e36-b5ea-bd1ed7656b20',
 '81355d01-efb1-46a0-9664-b4e046f51c5d',
 '5535679b-3e29-477e-9905-baa9fd909cf8',
 '8864efe9-89fe-4c41-b276-eab1c90588ec',
 '907a3b9d-525d-4c78-ad04-7ce67e20a8d7',
 '13cd7d9b-8e46-4b86-8029-36ce9843db21',
 'fc170a58-1acd-4fa0-828a-f0c3bc69d12e',
 '8d5e0a3f-a108-467f-b4da-5f5936854d30',
 '5cfecda9-4bff-4e71-9e5b-55c27ca53fa8',
 '688e1fe7-9187-4d67-bc7a-344f2ed657bb',
 'ba94bd4e-8de0-48e9-92ba-3cdae40020b1',
 '7013404d-a6bd-49f4-92ac-394548c34d72',
 '07be42b3-e06c-4128-bbea-088719890115',
 '357dcb8b-8a70-

### Embedding

changing text -> vectors

In [16]:
from sentence_transformers import SentenceTransformer

In [17]:
embedding = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [18]:
#Storing all the content in a list
page_contents = []
for text in texts:
    page_contents.append(text.page_content)

In [19]:
#Encode all the content into list of vectors
page_contents_vectors = [embedding.encode(content) for content in page_contents]

In [20]:
(len(page_contents_vectors),len(id_list), len(metadata_list))

(229, 229, 229)

### Chroma DB

In [21]:
import chromadb

chroma_client = chromadb.PersistentClient(path="./chroma_langchain_db")

In [22]:
collection = chroma_client.get_or_create_collection(name="handbook_embeddings_collections")

In [23]:
collection.add(
    documents=page_contents,
    embeddings=page_contents_vectors,
    ids=id_list,
    metadatas=metadata_list
)

### Retrieving Query

In [51]:
query_text = "School Uniform?"

In [52]:
query_vector = embedding.encode(query_text)

In [53]:
result = collection.query(
    query_embeddings=query_vector,
    n_results=3
)

In [54]:
result['documents']

[['7. Shoes Black covered shoes Student Uniform and Dress Code for male 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Trousers Plain black according to the university pattern 4. Necktie University pattern 5. Pin University emblem, on the necktie 6. Belt University emblem buckle, black leather belt 7. Socks Formal black color 8. Shoes Black covered shoes',
  'you are and the character you possess. Stamford International University has a formal uniform. It exists in two versions, one for men and one for women (applicable for undergraduate): Student Uniform and Dress Code for female 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Button University emblem 4. Skirt Gray color, knee length 5. Pin University emblem, right side 6. Belt University emblem buckle, black leather belt 7. Shoes Black covered shoes Student Uniform and',
  'students can expect to go on many company visits and to give presentations in front of senior members of compan

In [55]:
pprint.pp(result)

{'ids': [['3390f3a6-8d5e-4dc7-8761-7b17b0109ebb',
          '78f7a13b-1313-4f03-9ab9-4a822c414a39',
          'f2aeb4d5-0ce0-4363-8a35-2a3b570c7d74']],
 'embeddings': None,
 'documents': [['7. Shoes Black covered shoes Student Uniform and Dress Code '
                'for male 1. Blazer Gray color 2. Shirt White shirt, short '
                'sleeves, button down 3. Trousers Plain black according to the '
                'university pattern 4. Necktie University pattern 5. Pin '
                'University emblem, on the necktie 6. Belt University emblem '
                'buckle, black leather belt 7. Socks Formal black color 8. '
                'Shoes Black covered shoes',
                'you are and the character you possess. Stamford International '
                'University has a formal uniform. It exists in two versions, '
                'one for men and one for women (applicable for undergraduate): '
                'Student Uniform and Dress Code for female 1. Blazer Gray

validate context

In [56]:
for i, doc in enumerate(result["documents"][0]):
    print(f"\n--- Result {i+1} ---\n")
    print(doc[:1000])



--- Result 1 ---

7. Shoes Black covered shoes Student Uniform and Dress Code for male 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Trousers Plain black according to the university pattern 4. Necktie University pattern 5. Pin University emblem, on the necktie 6. Belt University emblem buckle, black leather belt 7. Socks Formal black color 8. Shoes Black covered shoes

--- Result 2 ---

you are and the character you possess. Stamford International University has a formal uniform. It exists in two versions, one for men and one for women (applicable for undergraduate): Student Uniform and Dress Code for female 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Button University emblem 4. Skirt Gray color, knee length 5. Pin University emblem, right side 6. Belt University emblem buckle, black leather belt 7. Shoes Black covered shoes Student Uniform and

--- Result 3 ---

students can expect to go on many company visits and to give presen

### Ollama

In [1]:
from ollama import chat
from ollama import ChatResponse

In [None]:
response = chat(
    model='mistral',
    messages=[{'role': 'user', 'content': 'Hello!'}],
)
print(response.message.content)

 Hello there! How can I help you today? Is there something specific you'd like to know or talk about? I'm here to assist with a wide range of topics, from answering questions and providing explanations to engaging in conversation and even helping with writing tasks. What do you need help with?


: 