### Loading PDF and Removing /n

In [1]:
file_path = "../data/ug_student_handbook.pdf"

In [2]:
from langchain_community.document_loaders import PyPDFLoader
import pprint
loader = PyPDFLoader(file_path)


In [3]:
pages_pdf = loader.load()

In [4]:
#Removing \n in pages
for page in pages_pdf:
    page.page_content = ' '.join(page.page_content.split())

### Document Splitting with Recusrive CharacterTextSplitter

In [5]:
import uuid
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap =50
)

In [7]:
texts = text_splitter.split_documents(pages_pdf)

In [8]:
#metadata list only
metadata_list = [text.metadata for text in texts]

In [9]:
#id_list
id_list = [str(uuid.uuid4()) for _ in texts]


In [10]:
id_list

['2c389d7e-eeea-422d-9413-fec47b3791a5',
 '8aacb2b2-46ab-42ce-b15b-0683cfb722b6',
 'afc64b9b-faf4-40cf-9c52-6a848ae242e6',
 'c32a4885-53cc-4e07-8b52-0e9caf0ffd00',
 '6641e562-3e34-4894-b3a7-9a8f7b064313',
 'e15159ea-024e-4f79-8529-dfe7935c44d7',
 '47e10b98-2994-448e-a8ca-5a8b1e22835f',
 '024635e0-ca26-4501-9233-3d3547ea11d7',
 '0fdce225-64b6-4012-9b0c-9929ebfa8788',
 '3b6c0adc-0fa9-4c18-9811-0fdb80af7a22',
 'c6a2efd2-1e5e-486c-812a-ef28d7c938d0',
 '6a656a78-4409-4396-bc4b-3e3b35839a0a',
 '7b3beca1-df98-41b0-9995-9264c3c1288b',
 '48998dab-61f4-4434-aed1-e42919bc2bae',
 '36589cf9-e4e7-44ad-bc0d-04cb5d6da83b',
 '2e46fe49-74b3-4398-852f-6be467ab076c',
 '8ad744e3-de31-48a7-a743-3562aaa790b9',
 'cb615347-fe05-463f-a124-66f3037fa033',
 '01a2777f-5c24-430a-a678-b63fbef8d6fa',
 'e3f89d87-8f0d-4260-89e3-fecb3411d392',
 '761ce9d0-cc67-4810-a998-97de0aba1181',
 '7bd37449-7548-435d-b281-bd288c06f5a9',
 'd468e9b0-c7bb-4a43-b19a-7ff1c4e1ac92',
 '9339c12d-75af-494b-bb93-db2a2048f166',
 'a749aa2e-992e-

### Embedding

changing text -> vectors

In [11]:
from sentence_transformers import SentenceTransformer

In [12]:
embedding = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

In [13]:
#Storing all the content in a list
page_contents = []
for text in texts:
    page_contents.append(text.page_content)

In [14]:
#Encode all the content into list of vectors
page_contents_vectors = [embedding.encode(content) for content in page_contents]

In [15]:
(len(page_contents_vectors),len(id_list), len(metadata_list))

(229, 229, 229)

### Chroma DB

In [16]:
import chromadb

chroma_client = chromadb.PersistentClient(path="./chroma_langchain_db")

In [17]:
collection = chroma_client.get_or_create_collection(name="handbook_embeddings_collections")

In [18]:
collection.add(
    documents=page_contents,
    embeddings=page_contents_vectors,
    ids=id_list,
    metadatas=metadata_list
)

### Retrieving Query

In [19]:
query_text = "School Uniform?"

In [20]:
query_vector = embedding.encode(query_text)

In [21]:
result = collection.query(
    query_embeddings=query_vector,
    n_results=3
)

validate context

In [24]:
for i, doc in enumerate(result["documents"][0]):
    print(f"\n--- Result {i+1} ---\n")
    print(doc[:1000])



--- Result 1 ---

7. Shoes Black covered shoes Student Uniform and Dress Code for male 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Trousers Plain black according to the university pattern 4. Necktie University pattern 5. Pin University emblem, on the necktie 6. Belt University emblem buckle, black leather belt 7. Socks Formal black color 8. Shoes Black covered shoes

--- Result 2 ---

you are and the character you possess. Stamford International University has a formal uniform. It exists in two versions, one for men and one for women (applicable for undergraduate): Student Uniform and Dress Code for female 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Button University emblem 4. Skirt Gray color, knee length 5. Pin University emblem, right side 6. Belt University emblem buckle, black leather belt 7. Shoes Black covered shoes Student Uniform and

--- Result 3 ---

students can expect to go on many company visits and to give presen

In [39]:
result['documents']

[['7. Shoes Black covered shoes Student Uniform and Dress Code for male 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Trousers Plain black according to the university pattern 4. Necktie University pattern 5. Pin University emblem, on the necktie 6. Belt University emblem buckle, black leather belt 7. Socks Formal black color 8. Shoes Black covered shoes',
  'you are and the character you possess. Stamford International University has a formal uniform. It exists in two versions, one for men and one for women (applicable for undergraduate): Student Uniform and Dress Code for female 1. Blazer Gray color 2. Shirt White shirt, short sleeves, button down 3. Button University emblem 4. Skirt Gray color, knee length 5. Pin University emblem, right side 6. Belt University emblem buckle, black leather belt 7. Shoes Black covered shoes Student Uniform and',
  'students can expect to go on many company visits and to give presentations in front of senior members of compan

### Ollama

In [27]:
from ollama import chat
from ollama import ChatResponse

In [40]:
from langchain_core.prompts import ChatPromptTemplate

In [None]:
response : ChatResponse = chat(
    model="mistral",
    messages = [
        {
            'role' : 'system',
            'content' : "You are a chatbot for assisting university student using a handbook as your resource."
        },
        {
            'role' : 'user',
            'content' : "Location of a campus",
        },
        {
            'role' : 'assistant',
            'content' : "{handbook}"
        }
    ]
    )   

print(response['message']['content'])
print(response.message.content)

 The sky appears blue due to a process called Rayleigh scattering. When sunlight, which is white light made up of different colors such as red, orange, yellow, green, blue, indigo, and violet, travels through Earth's atmosphere, the smaller molecules of gases like nitrogen and oxygen scatter sunlight in all directions.

Blue light has a shorter wavelength and is scattered more than other colors because it interacts more frequently with the gas molecules. This scattered blue light gets scattered in every direction, reaching our eyes from all angles, making the sky appear blue to us. At sunrise and sunset, the sky can appear red or orange due to a different type of scattering called scattering by dust and water droplets (known as Mie scattering), which preferentially scatters longer wavelengths like red and orange.
 The sky appears blue due to a process called Rayleigh scattering. When sunlight, which is white light made up of different colors such as red, orange, yellow, green, blue, in