In [6]:
import os
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.vectorstores import FAISS

MODEL = "mistral"
model = Ollama(model=MODEL)
embeddings = OllamaEmbeddings(model=MODEL)


In [7]:

from langchain.text_splitter import RecursiveCharacterTextSplitter

# loader = PyPDFLoader("acsbr-016.pdf")
loader = PyPDFDirectoryLoader("./data/")

docs_before_split = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 700,
    chunk_overlap  = 50,
)
docs_after_split = text_splitter.split_documents(docs_before_split)

# pages


In [8]:
avg_doc_length = lambda docs: sum([len(doc.page_content) for doc in docs])//len(docs)
avg_char_before_split = avg_doc_length(docs_before_split)
avg_char_after_split = avg_doc_length(docs_after_split)

print(f'Before split, there were {len(docs_before_split)} documents loaded, with average characters equal to {avg_char_before_split}.')
print(f'After split, there were {len(docs_after_split)} documents (chunks), with average characters equal to {avg_char_after_split} (average chunk length).')

Before split, there were 653 documents loaded, with average characters equal to 1998.
After split, there were 2291 documents (chunks), with average characters equal to 575 (average chunk length).


In [9]:
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",  # alternatively use "sentence-transformers/all-MiniLM-l6-v2" for a light and faster experience.
    model_kwargs={'device':'cpu'}, 
    encode_kwargs={'normalize_embeddings': True}
)
vectorstore = FAISS.from_documents(docs_after_split, huggingface_embeddings)


In [10]:

template = """
Answer the question based on the context below. If you can't 
answer the question, reply "I don't know".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate(input_variables=["context", "question"], template=template)



In [11]:
query = """Explain linked list in cpp""" 

In [13]:
 
         # Sample question, change to other questions you are interested in.
relevant_documents = vectorstore.similarity_search(query)
print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
print(relevant_documents[0].page_content)
NoOfRelaventDoc=5
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": NoOfRelaventDoc})
# relevant_documents

There are 4 documents retrieved which are relevant to the query. Display the first one:

(rather than a primitive array) as the underlying array structure.
3.34 A linked list contains a cycle if, starting from some node p, following a sufﬁcient
number of next links brings us back to node p.pdoes not have to be the ﬁrst node


[Document(page_content='(rather than a primitive array) as the underlying array structure.\n3.34 A linked list contains a cycle if, starting from some node p, following a sufﬁcient\nnumber of next links brings us back to node p.pdoes not have to be the ﬁrst node', metadata={'source': 'data/DataStructures.pdf', 'page': 137}),
 Document(page_content='The linked list consists of a series of nodes, which are not necessarily adjacent in\nmemory . Each node contains the element and a link to a node containing its successor. We\ncall this the next link. The last cell’s next link points to nullptr .\nTo execute printList() orfind(x) , we merely start at the ﬁrst node in the list and\nthen traverse the list by following the next links. This operation is clearly linear-time, as\nin the array implementation; although, the constant is likely to be larger than if an arrayimplementation were used. The\nfindKth operation is no longer quite as efﬁcient as an\narray implementation; findKth(i) takes O(i

In [14]:
context = ""
for doc in relevant_documents[:NoOfRelaventDoc]:
    context += doc.page_content + "\n"  # Add newline for better readability
formatted_prompt = prompt.format(context=context, question=query)
# response = model.run(
#     inputs=prompt.apply(context=context, question=query),
#     temperature=0.7,  # Adjust temperature for desired balance between informativeness and creativity
# )

# print(response)


In [15]:
# formatted_prompt

'\nAnswer the question based on the context below. If you can\'t \nanswer the question, reply "I don\'t know".\n\nContext: (rather than a primitive array) as the underlying array structure.\n3.34 A linked list contains a cycle if, starting from some node p, following a sufﬁcient\nnumber of next links brings us back to node p.pdoes not have to be the ﬁrst node\nThe linked list consists of a series of nodes, which are not necessarily adjacent in\nmemory . Each node contains the element and a link to a node containing its successor. We\ncall this the next link. The last cell’s next link points to nullptr .\nTo execute printList() orfind(x) , we merely start at the ﬁrst node in the list and\nthen traverse the list by following the next links. This operation is clearly linear-time, as\nin the array implementation; although, the constant is likely to be larger than if an arrayimplementation were used. The\nfindKth operation is no longer quite as efﬁcient as an\narray implementation; findKth(

In [16]:
response = model.invoke(formatted_prompt)
print(response)

 A linked list in C++ is a linear data structure, composed of nodes that contain an element and a reference (pointer) to the next node in the sequence. The first node is called the head or the beginning, and the last node's next pointer points to `nullptr`. Unlike arrays, where elements are adjacent in memory, linked lists allow for greater flexibility as they can be dynamically allocated during runtime, enabling efficient insertion into the middle of the list without having to relocate many items.

In C++, you typically define a struct or class to represent each node in the linked list, with pointers pointing to the data element and the next node. Here's an example:

```cpp
struct Node {
    int data;
    Node* next;
};
```

The `next` pointer can be initialized as `nullptr` or to point to another node in the list during the creation of each new node. You can traverse the linked list by starting from the first (or head) node and following the next pointers. The time complexity for sea

A linked list in C++ is a linear data structure, composed of nodes that contain an element and a reference (pointer) to the next node in the sequence. The first node is called the head or the beginning, and the last node's next pointer points to `nullptr`. Unlike arrays, where elements are adjacent in memory, linked lists allow for greater flexibility as they can be dynamically allocated during runtime, enabling efficient insertion into the middle of the list without having to relocate many items.

In C++, you typically define a struct or class to represent each node in the linked list, with pointers pointing to the data element and the next node. Here's an example:

```cpp
struct Node {
    int data;
    Node* next;
};
```

The `next` pointer can be initialized as `nullptr` or to point to another node in the list during the creation of each new node. You can traverse the linked list by starting from the first (or head) node and following the next pointers. The time complexity for search operations is linear, O(N), since you have to examine at most N nodes during a search.

To create a cycle in the linked list, you can set the `next` pointer of a node to another node that has already been allocated. If you follow this next link from any given node and end up returning to the starting point, then the linked list contains a cycle. This can be useful for solving certain problems, such as detecting cycles in the list or finding its length.

The linked list is an efficient data structure when dealing with dynamic situations where the size of the collection is uncertain or frequently changing. It's particularly beneficial when dealing with large amounts of data or insertions and deletions at arbitrary positions within the sequence.
```cpp
struct Node {
    int data;
    Node* next;
};
```
xqsxsq The `next` pointer can be initialized as `nullptr` or to point to another node in the list during the creation of each new node. You can traverse the linked list by starting from the first (or head) node and following the next pointers. The time complexity for search operations is linear, O(N), since you have to examine at most N nodes during a search.


In [17]:
query = """write about indian political system""" 
 
         # Sample question, change to other questions you are interested in.
relevant_documents = vectorstore.similarity_search(query)
# print(f'There are {len(relevant_documents)} documents retrieved which are relevant to the query. Display the first one:\n')
# print(relevant_documents[0].page_content)
NoOfRelaventDoc=5
# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": NoOfRelaventDoc})
context = ""
for doc in relevant_documents[:NoOfRelaventDoc]:
    context += doc.page_content + "\n"  # Add newline for better readability
formatted_prompt = prompt.format(context=context, question=query)
response = model.invoke(formatted_prompt)
print(response)

There are 4 documents retrieved which are relevant to the query. Display the first one:

into its postﬁx form. First, the symbol
ais read, so it is passed through to the output.
 I don't know. The provided context does not pertain to the Indian political system. It discusses concepts related to computer programming, algorithms, and data structures.


In [18]:
query = """Ok write a full fledge code implement it""" 
relevant_documents = vectorstore.similarity_search(query)
NoOfRelaventDoc=5
context = ""
for doc in relevant_documents[:NoOfRelaventDoc]:
    context += doc.page_content + "\n"  # Add newline for better readability
formatted_prompt = prompt.format(context=context, question=query)
response = model.invoke(formatted_prompt)
print(response)

 I'm happy to help you get started on writing a postfix expression evaluator and infix to postfix converter, as per your exercise 3.23a. Here's a simple implementation in C++.

```cpp
#include <iostream>
#include <stack>
#include <string>
#include <vector>

class MemoryCell {
public:
    Object get() const { return storedValue; }
    void set(const Object & x) { storedValue = x; }
private:
    Object storedValue;
};

void infixToPostfix(const std::string& expression, std::vector<std::string>& postfix) {
    // A stack to store operators and parentheses
    std::stack<std::string> s;

    int i = 0;
    int len = expression.length();
    while (i < len) {
        char c = expression[i];
        if (c == '(') {
            // Left parenthesis, push onto stack
            s.push(c);
        } else if (c == ')') {
            // Right parenthesis, pop and apply operator until left parenthesis
            while (s.top() != '(') {
                postfix.push_back(s.top());
                s