In [2]:
#!pip install langchain
#!pip install langchain-openai
#!pip install langchain-community

In [1]:
#!pip install langchain-chroma

In [1]:
# Enter OpenAI API key
from getpass import getpass
OPENAI_API_KEY = getpass("Enter your OpenAI API Key here: ")

Enter your OpenAI API Key here: ··········


In [2]:
# Setup Environment Variables
import os
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

In [3]:
# OpenAI Embedding Model
from langchain_openai import OpenAIEmbeddings
openai_embed_model = OpenAIEmbeddings(model ="text-embedding-3-small")

In [4]:
# OpenAI Generating Model
from langchain_openai import ChatOpenAI
chatgpt = ChatOpenAI(model_name="gpt-4o-mini")

### Loading and Processing the data

#### if you can't download using the following code
#### go to https://drive.google.com/file/d/1aZxZejfteVuofISodUrY2CDoyuPLYDGZ download it
#### manually upload it on colab
!gdown 1aZxZejfteVuofISodUrY2CDoyuPLYDGZ

In [5]:
!gdown 1aZxZejfteVuofISodUrY2CDoyuPLYDGZ

Downloading...
From: https://drive.google.com/uc?id=1aZxZejfteVuofISodUrY2CDoyuPLYDGZ
To: /content/rag_docs.zip
  0% 0.00/5.92M [00:00<?, ?B/s]100% 5.92M/5.92M [00:00<00:00, 78.7MB/s]


In [6]:
!unzip /content/rag_docs.zip

Archive:  /content/rag_docs.zip
   creating: rag_docs/
  inflating: rag_docs/attention_paper.pdf  
  inflating: rag_docs/cnn_paper.pdf  
  inflating: rag_docs/resnet_paper.pdf  
  inflating: rag_docs/vision_transformer.pdf  
  inflating: rag_docs/wikidata_rag_demo.jsonl  


In [9]:
#!pip install jq
#!pip install pymupdf

### Load and Process JSON Documents(wikidata)

In [10]:
from langchain.document_loaders import JSONLoader

loader = JSONLoader(file_path='/content/rag_docs/wikidata_rag_demo.jsonl',
                    jq_schema=".", text_content=False,
                   json_lines=True )

wiki_docs = loader.load()
print(len(wiki_docs))

1801


In [11]:
wiki_docs[3]

Document(metadata={'source': '/content/rag_docs/wikidata_rag_demo.jsonl', 'seq_num': 4}, page_content='{"id": "71548", "title": "Chi-square distribution", "paragraphs": ["In probability theory and statistics, the chi-square distribution (also chi-squared or formula_1\\u00a0 distribution) is one of the most widely used theoretical probability distributions. Chi-square distribution with formula_2 degrees of freedom is written as formula_3. It is a special case of gamma distribution.", "Chi-square distribution is primarily used in statistical significance tests and confidence intervals. It is useful, because it is relatively easy to show that certain probability distributions come close to it, under certain conditions. One of these conditions is that the null hypothesis must be true. Another one is that the different random variables (or observations) must be independent of each other."]}')

In [12]:
wiki_docs[300]

Document(metadata={'source': '/content/rag_docs/wikidata_rag_demo.jsonl', 'seq_num': 301}, page_content='{"id": "34097", "title": "Florence Nightingale", "paragraphs": ["Florence Nightingale, OM (12 May 1820 \\u2013 13 August 1910), was an English nurse. She helped create the modern techniques of nursing. She became a leader of the team of nurses who helped wounded soldiers during the Crimean War.", "She was the first female to receive the Order of Merit, one of the highest honours awarded by the British monarch. As a nurse she was given the name \'The Lady with the Lamp\' because at night, she checked on the wounded soldiers and always carried \'The Lamp\' with her. Florence Nightingale was a wonderful woman who fought the odds of not living a life expected by her family. She helped make modern nursing possible. Nightingale was a prodigious and versatile writer, and lived to be 90 years old.", "In her lifetime she was concerned with spreading medical knowledge. Some of her books were 

In [13]:
wiki_docs[1300]

Document(metadata={'source': '/content/rag_docs/wikidata_rag_demo.jsonl', 'seq_num': 1301}, page_content='{"id": "471921", "title": "CamelCase", "paragraphs": ["CamelCase (camel case, camel caps or medial capitals) is the practice of writing compound words or phrases so that each next word or abbreviation begins with a capital letter. CamelCase usually starts with a capital. When used in a programming language, it usually starts with a lowercase letter. Common examples are PowerPoint or iPhone.", "There are many variations of CamelCase. A few important ones are:", "The first use of medial capitals was the notation for chemical formulae invented by the Swedish chemist Berzelius in 1813. He did this to replace the large amount of naming and symbol systems used by chemists at that time. He suggested to show each chemical element by a symbol of one or two letters, the first one being capitalized. The capitalization allowed formulae like \'NaCl\' to be written without spaces and still be re

In [15]:
import json
from langchain.docstore.document import Document
wiki_docs_processed = []
for doc in wiki_docs:
  doc = json.loads(doc.page_content)
  metadata = {
              "title":doc['title'],
              "id": doc['id'],
              "source": 'Wikipedia'}
  data = ' '.join(doc['paragraphs'])
  wiki_docs_processed.append(Document(page_content=data, metadata=metadata))

In [17]:
wiki_docs_processed[1]

Document(metadata={'title': 'Dattatreya', 'id': '86394', 'source': 'Wikipedia'}, page_content='Dattatreya is the God who is an incarnation of the Divine Trinity Brahma, Vishnu and Siva. The word Datta means "Given", Datta is called so because the divine trinity have "given" themselves in the form of a son to the sage couple Guru Atri and Mata Anusuya. He is the son of Guru Atri, hence the name "Atreya." In the Nath tradition, Dattatreya is seen as an Avatar or incarnation of the Lord Shiva and as the Adi-Guru (First Teacher) of the Adi-Nath sampradaya of the Nathas. Although Dattatreya was at first a "Lord of Yoga" with Tantric traits, he was adapted and assimilated into the more devotional cults; while still worshiped by millions of Hindus, he is approached more as a benevolent God than as a teacher of the highest essence of Indian thought. Though the Dattatreya of the Natha tradition coexisted and intermingled with the Puranic, Brahmanical tradition of the Datta sampradaya, here we s

# Loading and processing pdf file

In [19]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def create_simple_chunks(file_path, chunk_size=3000, chunk_overlap=300):
  print("Loading pages: ", file_path)
  loader = PyMuPDFLoader(file_path)
  doc_pages = loader.load()

  print("Chunking pages :", file_path)
  splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  doc_chunks = splitter.split_documents(doc_pages)

  print("Finished Processing :", file_path)
  print()
  return doc_chunks

In [20]:
from glob import glob
pdf_files = glob("/content/rag_docs/*.pdf")
pdf_files

['/content/rag_docs/attention_paper.pdf',
 '/content/rag_docs/vision_transformer.pdf',
 '/content/rag_docs/resnet_paper.pdf',
 '/content/rag_docs/cnn_paper.pdf']

In [21]:
paper_docs = []

for fp in pdf_files:
  paper_docs.extend(create_simple_chunks(file_path=fp, chunk_size=3000, chunk_overlap=300))

Loading pages:  /content/rag_docs/attention_paper.pdf
Chunking pages : /content/rag_docs/attention_paper.pdf
Finished Processing : /content/rag_docs/attention_paper.pdf

Loading pages:  /content/rag_docs/vision_transformer.pdf
Chunking pages : /content/rag_docs/vision_transformer.pdf
Finished Processing : /content/rag_docs/vision_transformer.pdf

Loading pages:  /content/rag_docs/resnet_paper.pdf
Chunking pages : /content/rag_docs/resnet_paper.pdf
Finished Processing : /content/rag_docs/resnet_paper.pdf

Loading pages:  /content/rag_docs/cnn_paper.pdf
Chunking pages : /content/rag_docs/cnn_paper.pdf
Finished Processing : /content/rag_docs/cnn_paper.pdf



In [25]:
print(len(paper_docs))
print(len(wiki_docs_processed))

93
1801


In [23]:
paper_docs[2]

Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-08-03T00:07:29+00:00', 'source': '/content/rag_docs/attention_paper.pdf', 'file_path': '/content/rag_docs/attention_paper.pdf', 'total_pages': 15, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2023-08-03T00:07:29+00:00', 'trapped': '', 'modDate': 'D:20230803000729Z', 'creationDate': 'D:20230803000729Z', 'page': 1}, page_content='described in section 3.2.\nSelf-attention, sometimes called intra-attention is an attention mechanism relating different positions\nof a single sequence in order to compute a representation of the sequence. Self-attention has been\nused successfully in a variety of tasks including reading comprehension, abstractive summarization,\ntextual entailment and learning task-independent sentence representations [4, 27, 28, 22].\nEnd-to-end memory networks are based on a recurrent attention mechanism instead of sequence-\n

In [24]:
paper_docs[40]

Document(metadata={'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'creationdate': '2021-06-04T00:19:58+00:00', 'source': '/content/rag_docs/vision_transformer.pdf', 'file_path': '/content/rag_docs/vision_transformer.pdf', 'total_pages': 22, 'format': 'PDF 1.5', 'title': '', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2021-06-04T00:19:58+00:00', 'trapped': '', 'modDate': 'D:20210604001958Z', 'creationDate': 'D:20210604001958Z', 'page': 9}, page_content='In ICCV, 2019.\nZilong Huang, Xinggang Wang, Yunchao Wei, Lichao Huang, Humphrey Shi, Wenyu Liu, and\nThomas S. Huang. Ccnet: Criss-cross attention for semantic segmentation. In ICCV, 2020.\nOlivier J. H´enaff, Aravind Srinivas, Jeffrey De Fauw, Ali Razavi, Carl Doersch, S. M. Ali Eslami,\nand Aaron van den Oord. Data-efﬁcient image recognition with contrastive predictive coding. In\nICML, 2020.\n10')

### Combine all documents in one list for embedding and storing into vector database

In [26]:
total_docs = wiki_docs_processed + paper_docs
print(len(total_docs))

1894


In [28]:
# Chromadb with embedding
from langchain_chroma import Chroma
# please execute only once if you are using your local machine or even using colab
chroma_db = Chroma.from_documents(documents=total_docs,
                                  collection_name="my_db",
                                  embedding = openai_embed_model,
                                  collection_metadata = {"hnsw:space":"cosine"},
                                  persist_directory="/content/my_chromadb")

In [29]:
## Load Vector DB from disk

chroma_db = Chroma(persist_directory="/content/my_chromadb",
                   embedding_function=openai_embed_model,
                   collection_name="my_db")
chroma_db

<langchain_chroma.vectorstores.Chroma at 0x7a54556fd310>

### Semantic Similarity based Retrieval

In [30]:
similarity_retriever = chroma_db.as_retriever(search_type="similarity", search_kwargs={"k":5})

In [35]:
from IPython.display import display, Markdown
def display_docs(docs):
  for doc in docs:
    print("Metadata :", doc.metadata)
    display(Markdown(doc.page_content[:1000]))
    print()

In [36]:
query ="What is Machine Learning?"
top_docs = similarity_retriever.invoke(query)
display_docs(top_docs)

Metadata : {'title': 'Machine learning', 'id': '564928', 'source': 'Wikipedia'}


Machine learning gives computers the ability to learn without being explicitly programmed (Arthur Samuel, 1959). It is a subfield of computer science. The idea came from work in artificial intelligence. Machine learning explores the study and construction of algorithms which can learn and make predictions on data. Such algorithms follow programmed instructions, but can also make predictions or decisions based on data. They build a model from sample inputs. Machine learning is done where designing and programming explicit algorithms cannot be done. Examples include spam filtering, detection of network intruders or malicious insiders working towards a data breach, optical character recognition (OCR), search engines and computer vision.


Metadata : {'source': 'Wikipedia', 'id': '359370', 'title': 'Supervised learning'}


In machine learning, supervised learning is the task of inferring a function from labelled training data. The results of the training are known beforehand, the system simply learns how to get to these results correctly. Usually, such systems work with vectors. They get the training data and the result of the training as two vectors and produce a "classifier". Usually, the system uses inductive reasoning to generalize the training data.


Metadata : {'id': '663523', 'title': 'Deep learning', 'source': 'Wikipedia'}


Deep learning (also called deep structured learning or hierarchical learning) is a kind of machine learning, which is mostly used with certain kinds of neural networks. As with other kinds of machine-learning, learning sessions can be unsupervised, semi-supervised, or supervised. In many cases, structures are organised so that there is at least one intermediate layer (or hidden layer), between the input layer and the output layer. Certain tasks, such as as recognizing and understanding speech, images or handwriting, is easy to do for humans. However, for a computer, these tasks are very difficult to do. In a multi-layer neural network (having more than two layers), the information processed will become more abstract with each added layer. Deep learning models are inspired by information processing and communication patterns in biological nervous systems; they are different from the structural and functional properties of biological brains (especially the human brain) in many ways, whic


Metadata : {'title': 'Artificial intelligence', 'source': 'Wikipedia', 'id': '6360'}


Artificial intelligence (AI) is the ability of a computer program or a machine to think and learn. It is also a field of study which tries to make computers "smart". They work on their own without being encoded with commands. John McCarthy came up with the name "Artificial Intelligence" in 1955. In general use, the term "artificial intelligence" means a programme which mimics human cognition. At least some of the things we associate with other minds, such as learning and problem solving can be done by computers, though not in the same way as we do. Andreas Kaplan and Michael Haenlein define AI as a system’s ability to correctly interpret external data, to learn from such data, and to use those learnings to achieve specific goals and tasks through flexible adaptation. An ideal (perfect) intelligent machine is a flexible agent which perceives its environment and takes actions to maximize its chance of success at some goal or objective. As machines become increasingly capable, mental facu


Metadata : {'source': 'Wikipedia', 'id': '44742', 'title': 'Artificial neural network'}


A neural network (also called an ANN or an artificial neural network) is a sort of computer software, inspired by biological neurons. Biological brains are capable of solving difficult problems, but each neuron is only responsible for solving a very small part of the problem. Similarly, a neural network is made up of cells that work together to produce a desired result, although each individual cell is only responsible for solving a small part of the problem. This is one method for creating artificially intelligent programs. Neural networks are an example of machine learning, where a program can change as it learns to solve a problem. A neural network can be trained and improved with each example, but the larger the neural network, the more examples it needs to perform well—often needing millions or billions of examples in the case of deep learning. There are two ways to think of a neural network. First is like a human brain. Second is like a mathematical equation.




In [37]:
query ="What is the difference between Transformers (paper - attention all you need) and Vision-Transformers?"
top_docs = similarity_retriever.invoke(query)
display_docs(top_docs)

Metadata : {'total_pages': 22, 'producer': 'pdfTeX-1.40.21', 'creationdate': '2021-06-04T00:19:58+00:00', 'title': '', 'keywords': '', 'author': '', 'page': 7, 'creator': 'LaTeX with hyperref', 'moddate': '2021-06-04T00:19:58+00:00', 'creationDate': 'D:20210604001958Z', 'format': 'PDF 1.5', 'modDate': 'D:20210604001958Z', 'source': '/content/rag_docs/vision_transformer.pdf', 'file_path': '/content/rag_docs/vision_transformer.pdf', 'subject': '', 'trapped': ''}


Published as a conference paper at ICLR 2021
4.4
SCALING STUDY
We perform a controlled scaling study of different models by evaluating transfer performance from
JFT-300M. In this setting data size does not bottleneck the models’ performances, and we assess
performance versus pre-training cost of each model. The model set includes: 7 ResNets, R50x1,
R50x2 R101x1, R152x1, R152x2, pre-trained for 7 epochs, plus R152x2 and R200x3 pre-trained
for 14 epochs; 6 Vision Transformers, ViT-B/32, B/16, L/32, L/16, pre-trained for 7 epochs, plus
L/16 and H/14 pre-trained for 14 epochs; and 5 hybrids, R50+ViT-B/32, B/16, L/32, L/16 pre-
trained for 7 epochs, plus R50+ViT-L/16 pre-trained for 14 epochs (for hybrids, the number at the
end of the model name stands not for the patch size, but for the total dowsampling ratio in the ResNet
backbone).
Figure 5 contains the transfer performance versus total pre-training compute (see Appendix D.5
for details on computational costs). Detailed results per mode


Metadata : {'title': '', 'author': '', 'trapped': '', 'moddate': '2021-06-04T00:19:58+00:00', 'subject': '', 'keywords': '', 'total_pages': 22, 'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'modDate': 'D:20210604001958Z', 'page': 1, 'format': 'PDF 1.5', 'source': '/content/rag_docs/vision_transformer.pdf', 'creationdate': '2021-06-04T00:19:58+00:00', 'creationDate': 'D:20210604001958Z', 'file_path': '/content/rag_docs/vision_transformer.pdf'}


Published as a conference paper at ICLR 2021
inherent to CNNs, such as translation equivariance and locality, and therefore do not generalize well
when trained on insufﬁcient amounts of data.
However, the picture changes if the models are trained on larger datasets (14M-300M images). We
ﬁnd that large scale training trumps inductive bias. Our Vision Transformer (ViT) attains excellent
results when pre-trained at sufﬁcient scale and transferred to tasks with fewer datapoints. When
pre-trained on the public ImageNet-21k dataset or the in-house JFT-300M dataset, ViT approaches
or beats state of the art on multiple image recognition benchmarks. In particular, the best model
reaches the accuracy of 88.55% on ImageNet, 90.72% on ImageNet-ReaL, 94.55% on CIFAR-100,
and 77.63% on the VTAB suite of 19 tasks.
2
RELATED WORK
Transformers were proposed by Vaswani et al. (2017) for machine translation, and have since be-
come the state of the art method in many NLP tasks. Large Transformer-based mo


Metadata : {'producer': 'pdfTeX-1.40.21', 'creator': 'LaTeX with hyperref', 'page': 0, 'total_pages': 22, 'moddate': '2021-06-04T00:19:58+00:00', 'author': '', 'trapped': '', 'keywords': '', 'title': '', 'file_path': '/content/rag_docs/vision_transformer.pdf', 'subject': '', 'source': '/content/rag_docs/vision_transformer.pdf', 'creationDate': 'D:20210604001958Z', 'creationdate': '2021-06-04T00:19:58+00:00', 'modDate': 'D:20210604001958Z', 'format': 'PDF 1.5'}


Published as a conference paper at ICLR 2021
AN IMAGE IS WORTH 16X16 WORDS:
TRANSFORMERS FOR IMAGE RECOGNITION AT SCALE
Alexey Dosovitskiy∗,†, Lucas Beyer∗, Alexander Kolesnikov∗, Dirk Weissenborn∗,
Xiaohua Zhai∗, Thomas Unterthiner, Mostafa Dehghani, Matthias Minderer,
Georg Heigold, Sylvain Gelly, Jakob Uszkoreit, Neil Houlsby∗,†
∗equal technical contribution, †equal advising
Google Research, Brain Team
{adosovitskiy, neilhoulsby}@google.com
ABSTRACT
While the Transformer architecture has become the de-facto standard for natural
language processing tasks, its applications to computer vision remain limited. In
vision, attention is either applied in conjunction with convolutional networks, or
used to replace certain components of convolutional networks while keeping their
overall structure in place. We show that this reliance on CNNs is not necessary
and a pure transformer applied directly to sequences of image patches can perform
very well on image classiﬁcation tasks. When pre-traine


Metadata : {'subject': '', 'producer': 'pdfTeX-1.40.21', 'moddate': '2021-06-04T00:19:58+00:00', 'format': 'PDF 1.5', 'total_pages': 22, 'creationDate': 'D:20210604001958Z', 'creator': 'LaTeX with hyperref', 'creationdate': '2021-06-04T00:19:58+00:00', 'file_path': '/content/rag_docs/vision_transformer.pdf', 'trapped': '', 'page': 3, 'keywords': '', 'modDate': 'D:20210604001958Z', 'author': '', 'title': '', 'source': '/content/rag_docs/vision_transformer.pdf'}


Published as a conference paper at ICLR 2021
The MLP contains two layers with a GELU non-linearity.
z0 = [xclass; x1
pE; x2
pE; · · · ; xN
p E] + Epos,
E ∈R(P 2·C)×D, Epos ∈R(N+1)×D
(1)
z′
ℓ= MSA(LN(zℓ−1)) + zℓ−1,
ℓ= 1 . . . L
(2)
zℓ= MLP(LN(z′
ℓ)) + z′
ℓ,
ℓ= 1 . . . L
(3)
y = LN(z0
L)
(4)
Inductive bias.
We note that Vision Transformer has much less image-speciﬁc inductive bias than
CNNs. In CNNs, locality, two-dimensional neighborhood structure, and translation equivariance are
baked into each layer throughout the whole model. In ViT, only MLP layers are local and transla-
tionally equivariant, while the self-attention layers are global. The two-dimensional neighborhood
structure is used very sparingly: in the beginning of the model by cutting the image into patches and
at ﬁne-tuning time for adjusting the position embeddings for images of different resolution (as de-
scribed below). Other than that, the position embeddings at initialization time carry no information
about the 2D pos


Metadata : {'moddate': '2021-06-04T00:19:58+00:00', 'creationDate': 'D:20210604001958Z', 'subject': '', 'total_pages': 22, 'modDate': 'D:20210604001958Z', 'creationdate': '2021-06-04T00:19:58+00:00', 'keywords': '', 'creator': 'LaTeX with hyperref', 'source': '/content/rag_docs/vision_transformer.pdf', 'page': 4, 'file_path': '/content/rag_docs/vision_transformer.pdf', 'trapped': '', 'producer': 'pdfTeX-1.40.21', 'author': '', 'title': '', 'format': 'PDF 1.5'}


Published as a conference paper at ICLR 2021
Model
Layers
Hidden size D
MLP size
Heads
Params
ViT-Base
12
768
3072
12
86M
ViT-Large
24
1024
4096
16
307M
ViT-Huge
32
1280
5120
16
632M
Table 1: Details of Vision Transformer model variants.
We also evaluate on the 19-task VTAB classiﬁcation suite (Zhai et al., 2019b). VTAB evaluates
low-data transfer to diverse tasks, using 1 000 training examples per task. The tasks are divided into
three groups: Natural – tasks like the above, Pets, CIFAR, etc. Specialized – medical and satellite
imagery, and Structured – tasks that require geometric understanding like localization.
Model Variants. We base ViT conﬁgurations on those used for BERT (Devlin et al., 2019), as
summarized in Table 1. The “Base” and “Large” models are directly adopted from BERT and we
add the larger “Huge” model. In what follows we use brief notation to indicate the model size and
the input patch size: for instance, ViT-L/16 means the “Large” variant with 16×16 input patch siz




In [38]:
query ="Who is Nilotpal?"
top_docs = similarity_retriever.invoke(query)
display_docs(top_docs)

Metadata : {'source': 'Wikipedia', 'id': '623001', 'title': 'Pratidaan'}


Pratidaan (); is a Bengali television Popular Soap Opera that premiered on August 21, 2017 and airs on STAR Jalsha. Produced by Boyhood Productions, it stars Sandipta Sen and Sheikh Rezwan Rabbani in lead roles and Tanuka Chatterjee in a Negative role. It replaced Star Jalsha's popular show "Milon Tithi". Shimul is a very well educated girl and she wants to pursue her study further. She wants to get married in a family where she gets her due respect as a highly educated girl. But on the other hand her would be mother-in-law wants to get a bride who is altoegther illiterate. As the destiny had it her son meets Shimul, a girl of diametrically opposite character. They get married and she will have to teach life lessons to her arrogant husband, Neel, was not the kind of marriage Shimul was hoping for. Will they ever fall in love? What happens next? http://www.newstechcafe.com/2017/07/protidan-serial-on-star-jalsha-tv-plot.html


Metadata : {'title': 'Tapas Paul', 'id': '739002', 'source': 'Wikipedia'}


Tapas Paul (29 September 1958 – 18 February 2020) was an Indian actor and politician. He is known for acting in "Dadar Kirti", "Bhalobasa Bhalobasa", "Anurager Choyan", "Amar Bandhan", "Guru Dakshina", "Uttara" and "Mondo Meyer Upakhyan". Outside of acting, he served as an MLA (2001–2009) and an MP (2009–2019). Paul was born in Chandannagar, West Bengal. On 18 February 2020, he died due to cardiac arrest in Mumbai, Maharastra. He was 61.


Metadata : {'source': 'Wikipedia', 'title': 'Nils John Nilsson', 'id': '692744'}


Nils John Nilsson (February 6, 1933 – April 23, 2019) was an American computer scientist. He was one of the founding researchers in the ideas of artificial intelligence. He was born in Saginaw, Michigan. He was the first Kumagai Professor of Engineering in computer science at Stanford University from 1991 until his retirement. His best known work was Shakey the robot. Nilsson died on April 23, 2019, at his home in Medford, Oregon, at the age of 86.


Metadata : {'source': 'Wikipedia', 'id': '798128', 'title': 'Mimlu Sen'}


Mimlu Sen (born 1949) is an Indian author, translator, musician, composer and producer. Sen was born in Shillong, Meghalaya, India. During the 1960s and 1970s she studying in Kolkata and participating in street protests demanding an end of Vietnam War. She has been jailed for Naxalite movement. Sen was published her first book "Baulsphere" in 2009. The following year it was published as "The Honey Gatherers". Piers Moore Ede stated that "The Honey Gatherers" recounts Sen's adventures in rural Bengal.


Metadata : {'title': 'Amloki', 'source': 'Wikipedia', 'id': '620477'}


Amloki (); is a Bengali television soap opera that premiered on 12 February 2018 and airs on Zee Bangla. It Replaced Zee Bangla's Popular Show Stree. The Show Stars Aishwarya Roy and Antara as The Main Female Protagonists and Indrajit Bose as The Main Male Antagonist and bhaswar chatterjee as the male lead.The show will go off air on 30th November 2018 and will get replaced byRanu Pelo Lottery. It narrates the story of a six-year-old girl named Amloki, who was born deaf and dumb. Amloki hails from a poor family. Her father,Ratan is a gambler and steals jewelry while her mother runs the family by preparing jaggery at daytime and acting in Jatra at night. The sole reason for her mother’s existence is Amloki. She believes that someday her daughter will be cured. However, destiny has its own plans. Amloki loses her mother suddenly.




# Building Augmentation Part (Enhanced prompt)

In [41]:
from langchain_core.prompts import ChatPromptTemplate


rag_prompt = """
You are an assistant specialized in question answering and translation.
Your task is to respond to the given question using only the information provided in the retrieved context.

Instructions:
- If the answer is not present in the context, clearly state: "I don’t know based on the given context."
- Do not invent or assume any information.
- Write the answer in clear, simple language with correct grammar.
- Make the response detailed, structured, and easy to understand.

Question:
{question}

Context:
{context}

Answer:
"""

In [42]:
rag_prompt_template = ChatPromptTemplate.from_template(rag_prompt)

# last part - LLM and Generation part

In [47]:
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

qa_rag_chain = (
    {"context":(similarity_retriever | format_docs),
     "question": RunnablePassthrough()} | rag_prompt_template | chatgpt
)

In [49]:
query ="What is Machine Learning?, please explain in a simple language so that any non-tech background can understand?"
result = qa_rag_chain.invoke(query)
display(Markdown(result.content))

Machine learning is a part of computer science that allows computers to learn from data without being specifically programmed for each task. This means computers can improve their performance on a task by learning from previous experiences and data inputs. 

Here's a simple breakdown of how it works:

1. **Learning from Data**: Instead of giving the computer explicit instructions, we provide it with examples or data. The computer then analyzes this data to find patterns and make predictions.

2. **Algorithms**: These are the instructions that guide the computer in learning from the data. They can create a model that helps the computer understand the patterns in the data it has received.

3. **Applications**: Machine learning is used in various real-world situations, like filtering spam emails, recognizing images or characters, and improving search engines. 

4. **Deep Learning**: This is a more advanced type of machine learning that uses complex structures called neural networks. It helps computers perform challenging tasks, like understanding speech or recognizing images, by processing information through multiple layers that gradually abstract the data.

5. **Types of Learning**: There are different ways machine learning can occur, including supervised learning where the computer is taught with examples that have known outcomes, and unsupervised learning where the computer has to find patterns in data without any prior knowledge.

Overall, machine learning helps computers to "learn" from data and make better decisions or predictions based on that learning.

In [50]:
query ="What is Machine Learning?, please explain in a simple hindi language so that any non-tech background can understand?"
result = qa_rag_chain.invoke(query)
display(Markdown(result.content))

Machine Learning (मशीन लर्निंग) एक ऐसा क्षेत्र है जो कंप्यूटरों को बिना अधिक निर्देश दिए सीखने की क्षमता देता है। इसे कंप्यूटर विज्ञान (computer science) का एक उपक्षेत्र (subfield) माना जाता है। मशीन लर्निंग का विचार आर्टिफिशियल इंटेलिजेंस (Artificial Intelligence) से आया है। इसमें ऐसे एल्गोरिदम (algorithms) का अध्ययन और निर्माण किया जाता है, जो डेटा पर सीख सकते हैं और भविष्यवाणियाँ (predictions) कर सकते हैं।

मशीन लर्निंग का मतलब है कि कंप्यूटर कार्यक्रमित निर्देशों का पालन करते हुए, डेटा के आधार पर नई भविष्यवाणियाँ या निर्णय ले सकते हैं। उदाहरण के लिए, ई-मेल में स्पैम फ़िल्टरिंग, नेटवर्क में अनधिकृत व्यक्तियों का पता लगाना, और तस्वीरों को पहचानना जैसी गतिविधियाँ मशीन लर्निंग का हिस्सा हैं। इसका उपयोग तब किया जाता है जब पारंपरिक तरीकों से स्पष्ट एल्गोरिदम बनाना संभव नहीं होता।

मशीन लर्निंग के कई प्रकार होते हैं, जिनमें से एक है "डीप लर्निंग" (Deep Learning)। यह विशेष प्रकार के न्यूरल नेटवर्क (Neural Networks) के साथ काम करता है। न्यूरल नेटवर्क एक तरह का सॉफ्टवेयर है, जो जैविक न्यूरॉन्स से प्रेरित है और जो एक साथ मिलकर किसी समस्या का समाधान करता है। 

सामान्यत: मशीन लर्निंग में दो प्रकार की तकनीकें होती हैं: "सुपरवाइज्ड लर्निंग" (Supervised Learning) और "अनसुपरवाइज्ड लर्निंग" (Unsupervised Learning)। सुपरवाइज्ड लर्निंग में, सिस्टम को पहले से दिए गए लेबल डेटा से सीखना होता है। 

इस तरह, मशीन लर्निंग वह प्रक्रिया है जो कंप्यूटरों को खुद से सुधारने और सीखने की क्षमता देती है, ताकि वे अधिक सटीकता के साथ काम कर सकें।

In [51]:
query ="Who is Nilotpal?"
result = qa_rag_chain.invoke(query)
display(Markdown(result.content))

I don’t know based on the given context.

In [52]:
query ="what is CNN and ComputerVision? please explain in a simple language with example so that layman can also understand and also provide response in Tamil, Telugu, Hindi, Bengali and Gujarati Language?"
result = qa_rag_chain.invoke(query)
display(Markdown(result.content))

**What is CNN and Computer Vision?**

**CNN (Convolutional Neural Network):**
CNN is a type of artificial intelligence model that is particularly good at recognizing patterns in images. It mimics the way our brain works to understand visual data. For example, if you show a CNN a picture of a cat, it can learn to identify the features that make it look like a cat (like ears, whiskers, and fur texture) and then tell you if a new picture is also a cat or not. 

To put it simply, think of CNN as a smart computer program that learns to see images just like humans do. It processes images using layers of artificial neurons that each focus on different details of the image, which helps it understand what the image is about.

**Computer Vision:**
Computer Vision is the field that focuses on enabling machines to interpret and understand visual information from the world. It involves using algorithms to process images and videos, allowing computers to "see" and analyze visual data. 

For instance, when you use a smartphone to unlock it with your face, that's computer vision at work. The phone's camera takes a picture of your face, and the software recognizes and matches it against stored images to confirm your identity.

### Translations:

**Tamil:**
**CNN (கோவுன்லூஷனல் நர்வல் நெட்‌வர்க்):**
CNN என்பது புகைப்படங்களில் உள்ள மாதிரிகளை அடையாளம் காண மிகவும் சிறந்த செயற்கை நுண்ணறிவு மாதிரியே ஆகும். இது பார்வை தரவுகளை புரிந்து கொள்ள எங்களின் மூளை செயல்படும் வழியை நகலெடுக்கிறது. எடுத்துக்காட்டு, நீங்கள் CNN க்கு ஒரு பூனைக்கான புகைப்படத்தை காட்டின், அது அந்த பூனை போல உள்ள அம்சங்களை (சேப்புகள், வெள்ளி மற்றும் உறையும் உருக்) அடையாளம் கண்டுதான், புதிய புகைப்படம் பூனை தான் என்று உனக்கு சொல்கிறது.

**கணினி பார்வை:**
கணினி பார்வை என்பது இயந்திரங்கள் உலகில் உள்ள பார்வை தகவல்களை விளக்க மற்றும் புரிந்து கொள்ள உதவுகிறது. இப்போது, கணினிகள் "பார்க்க" மற்றும் பார்வை தரவுகளை பகுப்பாய்வு செய்ய ஆல்கோரதங்களைப் பயன்படுத்துகிறது. எடுத்துக்காட்டு, நீங்கள் உங்கள் முகத்துடன் ஸ்மார்ட்போனை கண்காணிக்க பயன்படுகின்றீர்கள்.

---

**Telugu:**
**CNN (కన్వోల్యూషనల్ న్యూరల్ నెట్‌వర్క్):**
CNN అనేది చిత్రం ప్యాటర్న్‌లను గుర్తించడానికి చాలా మంచి యంత్రం. ఇది మన నాళికల వలె పనిచేస్తుంది. ఉదాహరణకు, మీరు CNN కి కుక్క యొక్క చిత్రాన్ని చూపిస్తే, అది కుక్కకు సంబంధించిన ఆకారాలు (చేవి, ముక్కు, తోలు) గుర్తించగలదు.

**కంప్యూటర్ విజన్:**
కంప్యూటర్ విజన్ అనేది యంత్రాలకు ప్రపంచంలో ఉన్న దృశ్య సమాచారాన్ని అర్థం చేసుకోవటానికి ఉపయోగించే పద్ధతులు. ఉదాహరణకు, మీరు మీ ముఖాన్ని చూపించి మీ ఫోన్‌ను మొదలి గ్రీన్ చేస్తే, అది కంప్యూటర్ విజన్ వలన ఏర్పడుతోంది.

---

**Hindi:**
**CNN (कन्वोल्यूशनल न्यूरल नेटवर्क):**
CNN एक तरह का कृत्रिम बुद्धिमत्ता मॉडल है जो चित्रों में पैटर्न पहचानने में बहुत अच्छा है। यह हमारे मस्तिष्क की तरह काम करता है। उदाहरण के लिए, यदि आप CNN को एक बिल्ली की तस्वीर दिखाते हैं, तो यह पहचान सकता है कि इसमें क्या विशेषताएं हैं (जैसे कान, मूंछें, और फर की बनावट) और फिर बताना कि क्या एक नई तस्वीर भी बिल्ली है या नहीं।

**कंप्यूटर विज़न:**
कंप्यूटर विज़न वह क्षेत्र है जो मशीनों को दृश्य जानकारी की व्याख्या और समझने में सक्षम बनाता है। उदाहरण के लिए, जब आप अपने चेहरे के साथ स्मार्टफोन को अनलॉक करते हैं, तो यह कंप्यूटर विज़न का उपयोग कर रहा है।

---

**Bengali:**
**CNN (কনভলিউশাল নিউরাল নেটওয়ার্ক):**
CNN হল একটি কৃত্রিম বুদ্ধিমত্তার পদ্ধতি যা চিত্রে নিদর্শন চিহ্নিত করতে খুব দক্ষ। এটি আমাদের মস্তিষ্কের কাজ করার পদ্ধতির অনুকরণ করে। উদাহরণস্বরূপ, আপনি যদি CNN-কে একটি বিড়ালের ছবি দেখান তবে এটি ছবিটি বিড়াল কিনা তা বোঝার জন্য বৈশিষ্ট্যগুলি চিহ্নিত করতে পারে।

**কম্পিউটার ভিশন:**
কম্পিউটার ভিশন হল সেই ক্ষেত্র যা যন্ত্রগুলিকে দৃষ্টি তথ্য ব্যাখ্যা এবং বুঝতে সক্ষম করে। উদাহরণস্বরূপ, যখন আপনি আপনার মুখ দিয়ে স্মার্টফোন আনলক করেন, তখন এটি কম্পিউটার ভিশনের কাজ করছে।

---

**Gujarati:**
**CNN (કન્વોલ્યુશનલ ન્યૂરલ નેટવર્ક):**
CNN એ એક પ્રકારની આર્ટિફિશ્યલ ઇન્ટેલિજન્સ મોડેલ છે જે ચિત્રોમાં પેટર્ન માન્ય કરવા માટે ખૂબ સારી છે. તે આપણા મગજની કામગીરીને અનુરૂપ છે. ઉદાહરણ તરીકે, જો તમે CNN ને એક બિલાડીનું છબી દર્શાવશો તો તે તેમાંથી બિલાડીની ખાસિયતો (કાન, મૂંછ અને વાળ) ઓળખી શકે છે.

**કમ્પ્યુટર વિઝન:**
કમ્પ્યુટર વિઝન એ ક્ષેત્ર છે જે મશીનોને વર્તમાન visual ડેટાને વ્યાખ્યાયિત કરવા અને સમજીને અસરો બનાવે છે. ઉદાહરણ તરીકે, જ્યારે તમે તમારા ચહેરા સાથે સ્માર્ટફોનને અનલોક કરતા છો, ત્યારે તે કમ્પ્યુટર વિઝન છે.

In [53]:
query ="Who is Narendra Modi in India?"
result = qa_rag_chain.invoke(query)
display(Markdown(result.content))

I don’t know based on the given context.