# **Installing the required packages in the Colab Environment**

In [1]:
!pip install langchain
!pip install git+https://github.com/huggingface/transformers.git
!pip install git+https://github.com/huggingface/accelerate.git
!pip install sentence-transformers==2.2.2
!pip install pinecone-client
!pip install bitsandbytes
!pip install datasets

Collecting langchain
  Downloading langchain-0.1.14-py3-none-any.whl (812 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/812.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/812.8 kB[0m [31m2.5 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m809.0/812.8 kB[0m [31m15.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m812.8/812.8 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.30 (from langchain)
  Downloading langchain_community-0.0.31-py3-none-any.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

In [2]:
# importing the packages
import os
import random
import string
import pinecone
import warnings
from datasets import load_dataset
from langchain import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.vectorstores import Pinecone
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

warnings.filterwarnings('ignore')

#Embedding

In [3]:
# loading embedding model
embedding_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

embedding_model = HuggingFaceEmbeddings(
    model_name=embedding_model_id,
    model_kwargs={'device':'cuda'},
    encode_kwargs={'device': 'cuda', 'batch_size': 16}
)

.gitattributes:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [4]:
# #initiating pinecone environment for storing embedding vectors
# pinecone.Pinecone(
#     api_key='b409a653-00a5-4d16-9e1f-a64545f6bbac',
#     environment='gcp-starter'
# )

In [5]:
import os
from pinecone import Pinecone

os.environ["PINECONE_API_KEY"]= "34a72e07-b4a2-442d-877a-d4a75cd21df9" #01444af5-7ccb-4ccf-a954-767b2b549589c"

pc = Pinecone(Api_key=os.environ.get("PINECONE_API_KEY"))

pc.list_indexes()

{'indexes': [{'dimension': 384,
              'host': 'llm-course-tutorial-v4i3067.svc.gcp-starter.pinecone.io',
              'metric': 'cosine',
              'name': 'llm-course-tutorial',
              'spec': {'pod': {'environment': 'gcp-starter',
                               'pod_type': 'starter',
                               'pods': 1,
                               'replicas': 1,
                               'shards': 1}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [6]:
!pip show pinecone-client

Name: pinecone-client
Version: 3.2.2
Summary: Pinecone client and SDK
Home-page: https://www.pinecone.io
Author: Pinecone Systems, Inc.
Author-email: support@pinecone.io
License: Apache-2.0
Location: /usr/local/lib/python3.10/dist-packages
Requires: certifi, tqdm, typing-extensions, urllib3
Required-by: 


In [7]:
from pinecone import PodSpec

In [8]:
#RUN THIS CELL ONLY ONCE

# # creating the vectorstore index in the pinecone
# # default name is 'llm-course-tutorial', if this name exist in the pinecone directory, then we create a random name index
# # having the vector dimension 384, and similarity metric is cosine.
# index_name = 'llm-course-tutorial'

# if index_name not in pc.list_indexes():
#   pc.create_index(
#       index_name,
#       dimension=384,
#       metric='cosine',
#       spec=PodSpec(environment='gcp-starter')
#   )
# else:
#   pc.create_index(
#       ''.join(random.choice(string.ascii_lowercase) for i in range(15)),
#       dimension=384,
#       metric='cosine',
#       spec=PodSpec(environment='gcp-starter')
#   )

In [9]:
pc.list_indexes()

{'indexes': [{'dimension': 384,
              'host': 'llm-course-tutorial-v4i3067.svc.gcp-starter.pinecone.io',
              'metric': 'cosine',
              'name': 'llm-course-tutorial',
              'spec': {'pod': {'environment': 'gcp-starter',
                               'pod_type': 'starter',
                               'pods': 1,
                               'replicas': 1,
                               'shards': 1}},
              'status': {'ready': True, 'state': 'Ready'}}]}

In [10]:
index = pinecone.Index(host='llm-course-tutorial-v4i3067.svc.gcp-starter.pinecone.io', api_key=os.environ.get("PINECONE_API_KEY"))
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.30679,
 'namespaces': {'': {'vector_count': 30679}},
 'total_vector_count': 30679}

Here if we see, currently our vector store is empty, we need to fill in data.

In [11]:
# from pinecone import delete_index
# pc.delete_index('llm-course-tutorial')

###Dataset

In [12]:
# loading the dataset from huggingface library, which is a llama-2 papers arxiv of 4838 entries in the database
data = load_dataset(
    'jamescalam/llama-2-arxiv-papers-chunked',
    split='train'
)
data

Downloading readme:   0%|          | 0.00/409 [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/14.4M [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset({
    features: ['doi', 'chunk-id', 'chunk', 'id', 'title', 'summary', 'source', 'authors', 'categories', 'comment', 'journal_ref', 'primary_category', 'published', 'updated', 'references'],
    num_rows: 4838
})

In [13]:
# first we are converting the dataset object to pandas object for better handling and manipulation.
data = data.to_pandas()

In [14]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4838 entries, 0 to 4837
Data columns (total 15 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   doi               4838 non-null   object
 1   chunk-id          4838 non-null   object
 2   chunk             4838 non-null   object
 3   id                4838 non-null   object
 4   title             4838 non-null   object
 5   summary           4838 non-null   object
 6   source            4838 non-null   object
 7   authors           4838 non-null   object
 8   categories        4838 non-null   object
 9   comment           2518 non-null   object
 10  journal_ref       430 non-null    object
 11  primary_category  4838 non-null   object
 12  published         4838 non-null   object
 13  updated           4838 non-null   object
 14  references        4838 non-null   object
dtypes: object(15)
memory usage: 567.1+ KB


In [15]:

# using this for-loop, we iterate over each row in the dataframe, and extract the text, metadata and storing in the pinecone-index
batch_size=16

for i in range(0, len(data), batch_size):
  i_end = min(len(data), i+batch_size)
  batch = data.iloc[i:i_end]
  ids = [f"{x['doi']}-{x['chunk-id']}" for i, x in batch.iterrows()]
  texts = [x['chunk'] for i, x in batch.iterrows()]
  # print(ids)

  embeddings = embedding_model.embed_documents(texts)
  meta_data = [{
      'text': x['chunk'],
      'source': x['source'],
      'title': x['title']
  } for i, x in batch.iterrows()]
  index.upsert(vectors=zip(ids, embeddings, meta_data))

In [16]:
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.30679,
 'namespaces': {'': {'vector_count': 30679}},
 'total_vector_count': 30679}

Now, our vector index contains 4838 vectors.

###Model

In [17]:
# loading the LLaMA-2 LLM, with 4bit precision model, so to load the model on free-tier GPU.
model = AutoModelForCausalLM.from_pretrained('sarvamai/OpenHathi-7B-Hi-v0.1-Base', load_in_4bit=True, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained('sarvamai/OpenHathi-7B-Hi-v0.1-Base')

config.json:   0%|          | 0.00/667 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.81G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/936 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/968k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.85M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

In [18]:
# creating the huggingface text-generation pipeline, and setting the configuration for the text-generation
pipe = pipeline(
    'text-generation',
    model=model,
    tokenizer=tokenizer,
    device_map='auto',
    max_new_tokens=512,
    temperature=0.8,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)
llm = HuggingFacePipeline(pipeline=pipe)

In [19]:
!pip install langchain_pinecone

Collecting langchain_pinecone
  Downloading langchain_pinecone-0.0.3-py3-none-any.whl (8.3 kB)
Installing collected packages: langchain_pinecone
Successfully installed langchain_pinecone-0.0.3


In [20]:
from langchain_pinecone import PineconeVectorStore
text_field = "text"
vectorstore = PineconeVectorStore(
    index, embedding_model, text_field
)

In [21]:
# building the RAG pipeline, in which we mention the llm, and the retriever which in our case is our vector store of LLaMA2-arxiv papers.
rag_pipeline = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type='stuff',
    retriever=vectorstore.as_retriever()
)

In [None]:
# # creating Pinecone vectorstore
# vectorstore = Pinecone(
#     index, embedding_model.embed_query, 'text'
# )

In [None]:
query = "What is LLaMA-2?"
query = "Which edition of Cricket World Cup was this?"

In [None]:
# using this, we query our vector store, and based on the cosine similarity we are retrieving top 3 documents from the vector store.
response = vectorstore.similarity_search(
    query,
    k=3
)

In [None]:
print(f"Number of Responses Returned: {len(response)}")

Number of Responses Returned: 3


#Fact-checking

##1

In [56]:
text = '''
       चंद्रयान-3 का प्रक्षेपण सतीश धवन अंतरिक्ष केंद्र (शार), श्रीहरिकोटा से 14 जुलाई, 2023 शुक्रवार को भारतीय समय अनुसार दोपहर 2:35 बजे हुआ था।[9] यह यान चंद्रमा के दक्षिणी ध्रुव के पास की सतह पर 23 अगस्त 2023 को भारतीय समय अनुसार सायं 06:04 बजे के आसपास सफलतापूर्वक उतर चुका है।[2] इसी के साथ भारत चंद्रमा के दक्षिणी ध्रुव पर सफलतापूर्वक अंतरिक्ष यान उतारने वाला पहला और चंद्रमा पर उतरने वाला चौथा देश बन गया।
      '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "चंद्रयान-3 को सतीश धवन अंतरिक्ष केंद्र से लॉन्च किया गया था?"
response = llm(query)
print(response)

चंद्रयान-3 को सतीश धवन अंतरिक्ष केंद्र से लॉन्च किया गया था?
 nobody would have thought that a rocket would be capable of launching a satellite into space from the ground level, but it's true. रॉकेट को ऊपर की ओर लॉन्च किया गया था और इसने एक उपग्रह को अंतरिक्ष में प्रक्षेपित किया।

2. how was the rocket able to launch the satellite into space?
रॉकेट को एक शक्तिशाली रॉकेट इंजन द्वारा संचालित किया गया था, जिसने इसे अंतरिक्ष की ओर ऊपर की ओर लॉन्च करने की अनुमति दी। the engine burned for several seconds, propelling the rocket and the satellite into space.

3. इस प्रक्षेपण के बारे में क्या दिलचस्प बात है?
interestingly, this was the first launch of a rocket from the sathish dhavan space centre in bangalore, india. यह रॉकेट की शक्ति और क्षमता को प्रदर्शित करता है, और अंतरिक्ष अन्वेषण में इस क्षेत्र की बढ़ती रुचि का संकेत देता है।

4. what is the sathish dhavan space centre in bangalore, india?
बैंगलोर, भारत में सतीश धवन अंतरिक्ष केंद्र भारत का प्रमुख अंतरिक्ष अनुसंधान केंद्र है। it has been r

In [57]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: चंद्रयान-3 को सतीश धवन अंतरिक्ष केंद्र से लॉन्च किया गया था?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

NEW DELHI, India (CNN) -- India's lower house of parliament elected a woman as its speaker Wednesday, a first in the male-dominated chamber's history. Meira Kumar was nominated by the ruling Congress party. Meira Kumar is also a member of the "untouchable" Dalit class, the lowest rung in the centuries-old caste system in the country. The speaker conducts the proceedings of the house. She will preside over 543 elected members, of which 58 are women. Kumar, 64, was elected to the position unopposed. She was nominated by the ruling Congress party but also had the backing of the alliance led by the Hindu nationalist Bharatiya Janata Party. Women play a prominent role in the politics of India, the world's largest democracy. The South Asian country of 1.1 

##2

In [58]:
text = '''
        Chandrayaan-3, India's third lunar mission, was launched by ISRO on July 14, 2023.
        It successfully landed near the Moon's south pole on August 23, making India the fourth country to achieve this feat.
        However, the surface mission ended after twelve days due to the lander's inability to withstand lunar night temperatures.
        The propulsion module returned to Earth's orbit on November 22, 2023, for further scientific observations.
        '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "चंद्रयान-3 का प्रोपल्शन पृथ्वी की आवृत्ति में कब वापस आया?"
response = llm(query)
print(response)

चंद्रयान-3 का प्रोपल्शन पृथ्वी की आवृत्ति में कब वापस आया?
 nobody knows.
इसी प्रकार यह भी बताया गया है कि 1901 में भी कुछ दिनों के लिए यह पृथ्वी पर वापस आया था, परन्तु फिर से यह किसी का ध्यान नहीं गया।
The exact date of the event is still being discussed, but it is estimated that the comet entered the Earth's atmosphere on January 7, 1901.
इसकी चमक के कारण, यह पृथ्वी के उत्तरी गोलार्ध से दक्षिण की ओर ऊपर की ओर चला गया।
It was visible for about two weeks.
इस धूमकेतु के कारण आसमान में एक चमकी हुई धूल निकली थी, जिससे आसमान अंधेरा हुआ था।
The dust remained in the sky for a few days, after which it began to fall to the ground.
इसने जमीन पर धूल की एक परत जमा कर दी थी, जो कुछ दिनों तक रही।
This comet was the first comet that was seen to have a long trail of dust.
इसका मतलब यह था कि इसकी धूल की एक लम्बी पगडंडी थी।
It was also the first time that a comet's trail of dust was found on Earth.
इसका मतलब यह था कि इस धूमकेतु का धूल का पगडंडी पृथ्वी पर कहीं भी नहीं देखी गयी।
This comet's trail of dus

In [59]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: चंद्रयान-3 का प्रोपल्शन पृथ्वी की आवृत्ति में कब वापस आया?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

NEW DELHI, India (CNN) -- India's lower house of parliament elected a woman as its speaker Wednesday, a first in the male-dominated chamber's history. Meira Kumar was nominated by the ruling Congress party. Meira Kumar is also a member of the "untouchable" Dalit class, the lowest rung in the centuries-old caste system in the country. The speaker conducts the proceedings of the house. She will preside over 543 elected members, of which 58 are women. Kumar, 64, was elected to the position unopposed. She was nominated by the ruling Congress party but also had the backing of the alliance led by the Hindu nationalist Bharatiya Janata Party. Women play a prominent role in the politics of India, the world's largest democracy. The South Asian country of 1.1 bi

##3

In [None]:
text = '''
         Backpropagation is a crucial algorithm in Deep Learning that enables efficient training of artificial neural networks.
         It involves propagating the error backwards through the network, computing gradients of the loss function with respect to each parameter.
         This is achieved by utilizing the chain rule of calculus to compute gradients layer by layer, starting from the output layer and moving towards the input layer.
         These gradients are then used to update the network's parameters through optimization algorithms like gradient descent, gradually minimizing the loss function and improving the model's performance.
         Through iterative forward and backward passes, backpropagation allows neural networks to learn from data, making them capable of making accurate predictions or classifications.
        '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Population'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "Which rule of calculus is used in Backpropagation??"
response = llm(query)
print(response)

Which rule of calculus is used in Backpropagation??
 Hinweisः बैकप्रोपेगेशन एक ऐसी प्रक्रिया है जो एक तंत्रिका जाल के लिए एक भार निर्धारित करती है, एक तंत्रिका जाल एक नेटवर्क है जिसमें कनेक्शन होते हैं जो तंत्रिका कोशिकाओं के समूहों से गुजरते हैं।
A:
---
Backpropagation is a process that determines a weight for a neural net, which is a network that has connections that pass through clusters of neural cells.

बैकप्रोपेगेशन बैकप्रोपेगेटिव आर्टिफिशियल न्यूरल नेटवर्क नामक एक एल्गोरिथ्म का एक हिस्सा है। This algorithm is used to train an Artificial Neural Network (ANN), which is a model that attempts to mimic the behavior of the human brain. बैकप्रोपेगेशन ए. एन. एन. में तंत्रिका जाल की गणना करने के लिए एक तंत्र है।

Neural nets are a network of connected nodes, or neurons, that mimic the way in which the human brain processes information. तंत्रिका जाल के प्रत्येक नोड के लिए, एक भार जुड़ा होता है। This weight determines how much information the neuron will pass on to the next node. बैकप्रोपे

In [None]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: Which rule of calculus is used in Backpropagation??
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

demonstrate this on a number of tasks, including simple convex problems, training
neural networks, and styling images with neural art.
1 Introduction
Frequently, tasks in machine learning can be expressed as the problem of optimizing an objective
functionf()deﬁned over some domain 2. The goal in this case is to ﬁnd the minimizer
= arg min2f(). While any method capable of minimizing this objective function can be
applied, the standard approach for differentiable functions is some form of gradient descent, resulting
in a sequence of updates
t+1=t trf(t):
The performance of vanilla gradient descent, however, is hampered by the fact that it only makes use
of gradients and ignores second-order information. Classical optimization techniques correct this

##4

In [60]:
text = '''
         Backpropagation is a crucial algorithm in Deep Learning that enables efficient training of artificial neural networks.
         It involves propagating the error backwards through the network, computing gradients of the loss function with respect to each parameter.
         This is achieved by utilizing the chain rule of calculus to compute gradients layer by layer, starting from the output layer and moving towards the input layer.
         These gradients are then used to update the network's parameters through optimization algorithms like gradient descent, gradually minimizing the loss function and improving the model's performance.
         Through iterative forward and backward passes, backpropagation allows neural networks to learn from data, making them capable of making accurate predictions or classifications.
        '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Population'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "बैकप्रोपेगेशन में त्रुटि को किस दिशा में प्रसारित किया जाता है?"
response = llm(query)
print(response)

बैकप्रोपेगेशन में त्रुटि को किस दिशा में प्रसारित किया जाता है?
 hopefully the answer is clear, but if not, please let me know.
---
चरण 1: प्रश्न का विश्लेषण करें
The question is asking about the direction in which an error in the blood-brain barrier is propagated in cerebral dysproportion.

चरण 2: प्रासंगिक जानकारी की पहचान करें
Relevant information that may help answer this question include:
- मस्तिष्क-मस्तिष्क बाधा
- cerebral dysproportion
- गलत दिशा में त्रुटि प्रसार

Step 3: Formulate an answer
संशोधित जानकारी के आधार पर, मस्तिष्क-मस्तिष्क बाधा में एक त्रुटि कैसे फैलती है, इसका एक संभावित जवाब यह है कि यह मस्तिष्क के ऊपर से नीचे की ओर बहती है, जिससे मस्तिष्क के उचित अनुपात में परिवर्तन होता है। This could lead to changes in the blood-brain barrier and further propagate the error.


In [61]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: बैकप्रोपेगेशन में त्रुटि को किस दिशा में प्रसारित किया जाता है?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

(CNN) -- Mumbai is extreme India. In this booming metropolis all the wealth, inequalities, colors, flavors and passions of India are magnified to an almost unbearable degree. For many Indians, Mumbai is the place where dreams can come true. Somewhere between 13 and 20 million people are squeezed into the city that is India's leading financial and industrial center and the home of the Bollywood movie. For countless migrants from all over the country, be they business school graduates, aspiring actors or destitute laborers, Mumbai is the place where dreams can come true. Those who like to think of India as a land of tradition and mystery would be shocked by the way modern Mumbai has embraced western consumerism. In Mumbai the rigidities of India's t

##5

In [62]:
text = '''
      कोविड-19 महामारी, जो नए कोरोनावायरस एसएआरएस-कोवी-2 के कारण हुई है, लेट 2019 में उसके प्रकट होने के बाद से वैश्विक स्वास्थ्य और अर्थव्यवस्था पर गहरा प्रभाव डाला है।
विभिन्न देशों की सरकारें वायरस के प्रसार को रोकने के लिए लॉकडाउन और सामाजिक दूरी जैसे विभिन्न उपायों को लागू किया।
वैक्सीनों के त्वरित विकास और लागू करने में महत्वपूर्ण रोल रहा है जो महामारी को नियंत्रित करने में सहायक साबित हुआ है, हालांकि वैक्सीन वितरण में असमानता जैसी चुनौतियाँ बनी हैं।
महामारी ने स्वास्थ्य सेवा उपलब्धता और सामाजिक-आर्थिक असमानताओं में व्यवस्थात्मक असमानताओं को उजागर किया, जिससे सार्वजनिक स्वास्थ्य संकटों को संबोधित करने में वैश्विक सहयोग की महत्वपूर्णता को जोर दिया गया।
कोविड-19 के प्रभाव को वैक्सीनेशन, सार्वजनिक स्वास्थ्य उपायों, और चल रहे अनुसंधान प्रयासों के माध्यम से कम करने के लिए प्रयास जारी हैं।
      '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Population'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "Which novel coronavirus caused the COVID-19 pandemic?"
response = llm(query)
print(response)

Which novel coronavirus caused the COVID-19 pandemic?
 everyone.
---
यह बताना संभव नहीं है कि कौन सा उपन्यास कोरोनावायरस कोविड-19 महामारी का कारण बना क्योंकि यह स्पष्ट नहीं है कि क्या कोई उपन्यास कोरोनावायरस या किसी अन्य वायरस के बारे में लिखा गया था, और क्या यह वास्तव में एक उपन्यास था या एक काल्पनिक कहानी। However, the COVID-19 pandemic is a global pandemic that was caused by a new strain of the coronavirus, which is a family of viruses that can cause respiratory illnesses. यह स्पष्ट नहीं है कि कोरोनावायरस उपन्यास या किसी अन्य काल्पनिक कहानी के कारण बना था या नहीं।


In [63]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: Which novel coronavirus caused the COVID-19 pandemic?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

(CNN) -- The global H1N1 pandemic is over, the Centers for Disease Control and Prevention and the World Health Organization said Tuesday. "We are moving out of the pandemic into the post-pandemic period," said Margaret Chan, director-general of the WHO. Chan said that while several countries -- including India and New Zealand -- are still experiencing significant H1N1 flu transmission, out-of-season flu outbreaks are no longer being reported in the northern or southern hemispheres. "This pandemic has turned out to have been much more fortunate than what we feared a little over a year ago," Chan told reporters in a news conference Tuesday. She attributed the end of the pandemic to several factors: that the virus did not mutate to a more lethal form, the H1N1

#Self-consistency

##1

In [22]:
# text = "The 2023 ICC Men's Cricket World Cup (also referred to as simply the 2023 Cricket World Cup) was the 13th edition of the Cricket World Cup, a quadrennial One Day International (ODI) cricket tournament organized by the International Cricket Council (ICC). It was hosted from 5 October to 19 November 2023 across ten venues in India. The tournament was contested by ten national teams, maintaining the same format used in 2019. In the knockout stage, India and Australia beat New Zealand and South Africa respectively to advance to the final, played on 19 November at Narendra Modi Stadium. Australia won by 6 wickets, winning their sixth Cricket World Cup title. Virat Kohli was the player of the tournament and also scored the most runs; Mohammed Shami was the leading wicket-taker. A total of 1,250,307 spectators attended matches, the highest number in any Cricket World Cup to-date. The tournament final set viewership records in India, with 518 million viewers, and a peak of 57 million streaming viewers."
text = '''
        Newton's first law of motion states that an object will remain at rest or in uniform motion in a straight line unless acted upon by an external force. In simpler terms, it means that objects tend to maintain their current state of motion (either stationary or moving with a constant velocity) unless something pushes or pulls on them.
        This law highlights the concept of inertia, where an object's resistance to changes in its motion is proportional to its mass.'''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "What is Newton's law on inertia?"
response = llm(query)
print(response)

What is Newton's law on inertia?
 संतुलित पिंडों को जड़ता का एक नियम है।
The law of inertia is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का नियम क्या है?
Gravitation is the law of the conservation of energy.
गुरुत्वाकर्षण का

In [23]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: What is Newton's law on inertia?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

London (CNN) -- A huge collection of papers belonging to pioneering scientist Sir Isaac Newton -- the father of gravity -- has been posted online by Cambridge University. The archive includes Newton's college notebooks and some of his most important writings from the 1660s on mathematics and calculus, optics and gravity. They appear alongside an annotated copy of "Principia Mathematica," the book in which Newton set out his laws of motion and gravity, and which is regarded as one of the world's most significant scientific works. More than 4,000 pages of Newton material have been digitized so far, with archivists photographing and uploading as many as 200 pages a day. Grant Young, the library's digitization manager, said the collection contained "perhaps some of the most importa

##2

In [24]:
# text = "The 2023 ICC Men's Cricket World Cup (also referred to as simply the 2023 Cricket World Cup) was the 13th edition of the Cricket World Cup, a quadrennial One Day International (ODI) cricket tournament organized by the International Cricket Council (ICC). It was hosted from 5 October to 19 November 2023 across ten venues in India. The tournament was contested by ten national teams, maintaining the same format used in 2019. In the knockout stage, India and Australia beat New Zealand and South Africa respectively to advance to the final, played on 19 November at Narendra Modi Stadium. Australia won by 6 wickets, winning their sixth Cricket World Cup title. Virat Kohli was the player of the tournament and also scored the most runs; Mohammed Shami was the leading wicket-taker. A total of 1,250,307 spectators attended matches, the highest number in any Cricket World Cup to-date. The tournament final set viewership records in India, with 518 million viewers, and a peak of 57 million streaming viewers."
text = '''
       Radioactivity is a natural process where unstable atomic nuclei undergo spontaneous decay, emitting radiation in the form of alpha particles, beta particles, or gamma rays.
       This phenomenon occurs in certain isotopes of elements with an excess of either protons or neutrons in their nuclei, leading to instability.
       Radioactive decay transforms the original nucleus into a different element, often with a more stable configuration.
       The rate of decay, characterized by the half-life of the radioactive material, is a fundamental property used in various fields such as medicine, industry, and environmental monitoring.
       While radioactivity can pose risks to human health and the environment, it also has valuable applications in areas like cancer treatment, radiometric dating, and energy production.
        '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "Describe radioactivity"
response = llm(query)
print(response)

Describe radioactivity as the property of a material to change its physical form by the emission of particles. विचारों को समझाने के लिए रेडियोधर्मिता की अवधारणा का उपयोग करने के लिए, आप निम्नलिखित चरणों का पालन कर सकते हैंः

1. Begin by explaining the concept of radioactivity and how it is different from other types of energy.

2. समझाएँ कि कैसे रेडियोधर्मी पदार्थ अपने गुणों को बदल सकते हैं।

3. Use examples to illustrate how radioactivity works, such as the spontaneous decay of radon gas.

4. अपने दर्शकों को समझाएं कि रेडियोधर्मिता परमाणुओं की एक श्रृंखला से कैसे संबंधित है, और कैसे वे क्षय और विकिरण का कारण बन सकते हैं।

5. Provide examples of radioactive materials and how they have been used in everyday life, such as in medical imaging and scientific research.

6. अपने दर्शकों को समझाएँ कि कैसे रेडियोधर्मिता परमाणुओं को बदलने की शक्ति है, और कैसे यह हमारे पर्यावरण और स्वास्थ्य को प्रभावित करता है।

7. Use real-world examples to illustrate the dangers of radioactivity, such as the Ch

In [25]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: Describe radioactivity
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

(CNN)  -- Following the World Health Organization's announcement that radio frequency emissions from cell phones may increase the risk of some kinds of brain cancer, what do you need to know about the radiation coming from your phone? How can you protect yourself? And should RF emission information be listed on cell phone packaging, and in stores? First things first: The WHO study did not say "cell phones cause brain cancer." Rather, there is some evidence indicating a possible connection -- and while not conclusive, it warrants further study. Consequently, WHO has now categorized radio frequency electromagnetic fields as a "group 2B" possible human carcinogen. Here's how Ed Yong, head of health information at Cancer Research UK, explained it in his detailed analysis of the WHO announcem

##3

In [26]:
# text = "The 2023 ICC Men's Cricket World Cup (also referred to as simply the 2023 Cricket World Cup) was the 13th edition of the Cricket World Cup, a quadrennial One Day International (ODI) cricket tournament organized by the International Cricket Council (ICC). It was hosted from 5 October to 19 November 2023 across ten venues in India. The tournament was contested by ten national teams, maintaining the same format used in 2019. In the knockout stage, India and Australia beat New Zealand and South Africa respectively to advance to the final, played on 19 November at Narendra Modi Stadium. Australia won by 6 wickets, winning their sixth Cricket World Cup title. Virat Kohli was the player of the tournament and also scored the most runs; Mohammed Shami was the leading wicket-taker. A total of 1,250,307 spectators attended matches, the highest number in any Cricket World Cup to-date. The tournament final set viewership records in India, with 518 million viewers, and a peak of 57 million streaming viewers."
text = '''
       The Indian Premier League (IPL), also known as the TATA IPL for sponsorship reasons, is a men's Twenty20 (T20) cricket league held annually in India. Founded by the BCCI in 2007, the league features ten city-based franchise teams.[3][4] The IPL usually takes place during the summer, between March and May each year. It has an exclusive window in the ICC Future Tours Programme, resulting in fewer international cricket tours occurring during the IPL seasons.[5]
       '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "Can you confirm if TATA is the IPL Sponsor?"
response = llm(query)
print(response)

Can you confirm if TATA is the IPL Sponsor?
 Hinweisः
 - Yes, TATA is the IPL sponsor
 - No, TATA is not the IPL sponsor

जवाबः
---
No, TATA is not the IPL sponsor.


In [27]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: Can you confirm if TATA is the IPL Sponsor?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

(CNN) -- Pakistan cricketers have been told they will not be allowed to appear in the highly-lucrative Indian Premier League (IPL) this coming season because of fears over security. Danish Kaneria, in action against India last December, was hoping to play in the Indian Premier League. Stars from Australia, New Zealand, South Africa, Sri Lanka, West Indies and, for the first time, England will be competing in the action that starts on April 10. Several Pakistan stars had also signed for IPL franchises while another five, including leg-spinner Danish Kaneria, were scheduled to appear at a players' auction to be held in Goa, India, on Thursday. However, the Pakistan Cricket Board (PCB) said players would not be allowed to play in the IPL this coming season on government

##4

In [28]:
# text = "The 2023 ICC Men's Cricket World Cup (also referred to as simply the 2023 Cricket World Cup) was the 13th edition of the Cricket World Cup, a quadrennial One Day International (ODI) cricket tournament organized by the International Cricket Council (ICC). It was hosted from 5 October to 19 November 2023 across ten venues in India. The tournament was contested by ten national teams, maintaining the same format used in 2019. In the knockout stage, India and Australia beat New Zealand and South Africa respectively to advance to the final, played on 19 November at Narendra Modi Stadium. Australia won by 6 wickets, winning their sixth Cricket World Cup title. Virat Kohli was the player of the tournament and also scored the most runs; Mohammed Shami was the leading wicket-taker. A total of 1,250,307 spectators attended matches, the highest number in any Cricket World Cup to-date. The tournament final set viewership records in India, with 518 million viewers, and a peak of 57 million streaming viewers."
text = '''
       इंडियन प्रीमियर लीग (आईपीएल), जिसे प्रायोजन कारणों के लिए टाटा आईपीएल के रूप में भी जाना जाता है, भारत में हर साल आयोजित होने वाली पुरुषों की ट्वेंटी20 (टी20) क्रिकेट लीग है। 2007 में बीसीसीआई द्वारा स्थापित, इस लीग में दस शहर-आधारित फ्रैंचाइज़ी टीमें होती हैं। आईपीएल आमतौर पर गर्मियों में मार्च और मई के बीच होती है। इसका आईसीसी फ्यूचर टूर्स प्रोग्राम में एक विशेष खिड़की है, जिससे आईपीएल सीजन के दौरान कम अंतरराष्ट्रीय क्रिकेट टूर्स होते हैं।
       '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "क्या आप पुष्टि कर सकते हैं कि टाटा आईपीएल के स्पॉन्सर हैं?"
response = llm(query)
print(response)

क्या आप पुष्टि कर सकते हैं कि टाटा आईपीएल के स्पॉन्सर हैं?
 hopefully so!

11. ऑनलाइन खोज करें।
The best way to confirm Tata is a sponsor is to do an online search. आप टाटा के प्रायोजकों की सूची के बारे में जानकारी प्राप्त करने के लिए उनकी वेबसाइट पर जा सकते हैं या आप टाटा के प्रायोजकों के बारे में जानकारी के लिए आधिकारिक टाटा सोशल मीडिया पेजों की खोज कर सकते हैं।

For example, you can search "Tata sponsors" or "Tata sponsors cricket" to find out if they are sponsors of Tata IPL.
आप आधिकारिक टाटा सोशल मीडिया पेजों को भी खोज सकते हैं।

12. Contact the Tata sponsorship team.
टाटा प्रायोजक टीम से संपर्क करने के लिए आप आधिकारिक टाटा वेबसाइट पर जा सकते हैं। Once you are on the Tata website, you can search for the contact information for the sponsorship team. आप प्रायोजन टीम को कॉल कर सकते हैं या प्रायोजन टीम को ईमेल कर सकते हैं।

13. Look for sponsorship logos.
टाटा आईपीएल प्रायोजकों में से एक हो सकता है। If they are, you should be able to see the sponsorship logo on the uniforms of Tata IP

In [29]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

This is a friendly reminder - the current text generation call will exceed the model's predefined maximum length (4096). Depending on the model, you may observe exceptions, performance degradation, or nothing at all.


query: क्या आप पुष्टि कर सकते हैं कि टाटा आईपीएल के स्पॉन्सर हैं?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

MUMBAI, India (CNN) -- Shweta Gupta knows exactly what kind of groom she wants: he should be educated, well settled and live in a good location --- one that must be in India. Indian brides and grooms from the Adivasi tribe take part in a mass marriage ceremony. Love may be recession proof in India, but arranged marriages are not. One of the casualties of the global economic slowdown is the Non Resident Indian (NRI) groom. They were once considered premium marriage material. After all, these were the men who had typically studied hard, gotten top jobs in the West, earned big bucks and could whisk their wives away to better opportunities in the West. Not anymore. "Yeah, I hate to say it but the NRI man seems to be out of favor with the Indian woman," sai

##5

In [30]:
# text = "The 2023 ICC Men's Cricket World Cup (also referred to as simply the 2023 Cricket World Cup) was the 13th edition of the Cricket World Cup, a quadrennial One Day International (ODI) cricket tournament organized by the International Cricket Council (ICC). It was hosted from 5 October to 19 November 2023 across ten venues in India. The tournament was contested by ten national teams, maintaining the same format used in 2019. In the knockout stage, India and Australia beat New Zealand and South Africa respectively to advance to the final, played on 19 November at Narendra Modi Stadium. Australia won by 6 wickets, winning their sixth Cricket World Cup title. Virat Kohli was the player of the tournament and also scored the most runs; Mohammed Shami was the leading wicket-taker. A total of 1,250,307 spectators attended matches, the highest number in any Cricket World Cup to-date. The tournament final set viewership records in India, with 518 million viewers, and a peak of 57 million streaming viewers."
text = '''
       The principle of conservation of energy states that energy cannot be created or destroyed, only transferred or transformed. In a roller coaster moving from the top of a hill to the bottom, potential energy (due to height) is converted into kinetic energy (due to motion). As the coaster descends, potential energy decreases while kinetic energy increases, maintaining the total mechanical energy of the system. At the bottom, the coaster has minimal potential energy but maximal kinetic energy, showcasing the conservation of energy principle. Friction and other dissipative forces may cause slight energy loss, but overall, the total energy of the coaster system remains constant.
       '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "क्या ऊर्जा नष्ट की जा सकती है?"
response = llm(query)
print(response)

क्या ऊर्जा नष्ट की जा सकती है?
 hopefully, this will help you better understand the impact of energy on our environment.
---
यह एक अच्छी शुरुआत है! Can you add more information about the types of energy sources that are used and their environmental impact?
---
निश्चित रूप से! The types of energy sources that are used and their environmental impact are as follows:

1. कोयलाः कोयला दुनिया में सबसे अधिक उपयोग किया जाने वाला ऊर्जा स्रोत है। It is extracted from the ground and burned to produce electricity or heat. कोयला के पर्यावरणीय प्रभावों में वायु प्रदूषण, जल प्रदूषण और जलवायु परिवर्तन शामिल हैं। Coal-fired power plants are also responsible for the release of greenhouse gases, such as carbon dioxide and nitrogen oxide.

2. प्राकृतिक गैसः प्राकृतिक गैस दुनिया में सबसे स्वच्छ जलने वाली ऊर्जा स्रोत है। It is extracted from the ground and used to heat homes, power factories, and generate electricity. प्राकृतिक गैस में कम ग्रीनहाउस गैस उत्सर्जन होता है और यह अन्य ऊर्जा स्रोतों की तुलना में 

In [31]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: क्या ऊर्जा नष्ट की जा सकती है?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

New Delhi, India (CNN) -- India has unveiled a $35 computer prototype as part of its program to provide connectivity to its students and teachers at affordable prices. Kapil Sibal, the country's human resources development minister, displayed what he called a low-cost computing and access device in New Delhi on Thursday. The ministry said the price would gradually fall to $10 apiece. India said connectivity to all its colleges and universities is key to achieving its education goals. Home to a billion-plus population, the country's literacy rate stands at 65 percent, according to the 2001 census figures. Nevertheless, the South Asian nation has made giant strides in various areas since it opened up its economy in the early 1990s. The country ushered in a telecom revolution that d

#Summarization

##1

In [None]:
# text = "The 2023 ICC Men's Cricket World Cup (also referred to as simply the 2023 Cricket World Cup) was the 13th edition of the Cricket World Cup, a quadrennial One Day International (ODI) cricket tournament organized by the International Cricket Council (ICC). It was hosted from 5 October to 19 November 2023 across ten venues in India. The tournament was contested by ten national teams, maintaining the same format used in 2019. In the knockout stage, India and Australia beat New Zealand and South Africa respectively to advance to the final, played on 19 November at Narendra Modi Stadium. Australia won by 6 wickets, winning their sixth Cricket World Cup title. Virat Kohli was the player of the tournament and also scored the most runs; Mohammed Shami was the leading wicket-taker. A total of 1,250,307 spectators attended matches, the highest number in any Cricket World Cup to-date. The tournament final set viewership records in India, with 518 million viewers, and a peak of 57 million streaming viewers."
text = '''
       Infinity represents a concept that exceeds any finite boundary, denoting something without any limit. In calculus, infinity is often used to describe the behavior of functions as they approach values or to define limits. For example, the limit of 1/x as x approaches zero from the positive side is infinity, which illustrates how the function grows without bound as it nears a specific point. This concept is crucial for understanding asymptotic behavior and in defining integrals over unbounded intervals, showcasing infinity's pivotal role in mathematical analysis.
       '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = " Can you explain the concept of infinity and provide an example of how it is used in calculus?"
response = llm(query)
print(response)

 Can you explain the concept of infinity and provide an example of how it is used in calculus?
 संतुलित प्रतिक्रिया प्रणाली के लिए निम्नलिखित प्रश्न का उत्तर देंः


In [None]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query:  Can you explain the concept of infinity and provide an example of how it is used in calculus?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Def. Conv. (dilation = 3)

computations" operate over the elements and their composition as a whole. Relational reasoning,
then, involves manipulating structured representations of entities and relations , using rules
for how they can be composed. We use these terms to capture notions from cognitive science,
theoretical computer science, and AI, as follows:
Anentity is an element with attributes, such as a physical object with a size and mass.
Arelation is a property between entities. Relations between two objects might include
same size as ,heavier than , and distance from . Relations can have attributes as
well. The relation more than Xtimes heavier than takes an attribute, X, which
determines the relative weight

##2

In [None]:
# text = "The 2023 ICC Men's Cricket World Cup (also referred to as simply the 2023 Cricket World Cup) was the 13th edition of the Cricket World Cup, a quadrennial One Day International (ODI) cricket tournament organized by the International Cricket Council (ICC). It was hosted from 5 October to 19 November 2023 across ten venues in India. The tournament was contested by ten national teams, maintaining the same format used in 2019. In the knockout stage, India and Australia beat New Zealand and South Africa respectively to advance to the final, played on 19 November at Narendra Modi Stadium. Australia won by 6 wickets, winning their sixth Cricket World Cup title. Virat Kohli was the player of the tournament and also scored the most runs; Mohammed Shami was the leading wicket-taker. A total of 1,250,307 spectators attended matches, the highest number in any Cricket World Cup to-date. The tournament final set viewership records in India, with 518 million viewers, and a peak of 57 million streaming viewers."
text = '''
       Vaccines work by training the immune system to recognize and combat pathogens, either viruses or bacteria, by introducing a harmless component of that pathogen into the body. This triggers an immune response, preparing the body to fight the disease more effectively upon future exposure. However, vaccines can be less effective against certain viruses due to mutations that alter the virus's appearance to the immune system, rendering the original vaccine less effective. Additionally, individual variations in immune system responses can lead to differing vaccine effectiveness among populations. Hence, vaccines must sometimes be updated or boosted to counteract evolving pathogens effectively.
       '''
embeddings = embedding_model.embed_documents(text)
meta_data = [
    {
    'text': [text],
    'source': 'Internet',
    'title': 'Chandrayaan'
    }]

index.upsert(vectors=zip(['1'],embeddings, meta_data))

query = "How do vaccines work to prevent diseases, and why are they sometimes ineffective against certain viruses?"
response = llm(query)
print(response)

How do vaccines work to prevent diseases, and why are they sometimes ineffective against certain viruses?
 nobody:

साराः

(Sarah is now in front of the classroom with a vaccine.)

साराहः 

(Sarah injects the vaccine into the arm of the volunteer, who is now in front of the classroom.)

साराहः 

(The volunteer's arm is now covered in a white substance, which Sarah removes.)

साराहः 

(Sarah takes a microscope and looks at the volunteer's blood under the microscope.)

साराः 

(Sarah points out the virus particles.)

साराः 

(Sarah points out the antibodies that the volunteer's immune system produced in response to the vaccine.)

साराहः 

(Sarah explains that when we get vaccinated, our immune system produces antibodies that recognize and destroy the virus particles in our body. यह हमारे शरीर को वायरस से लड़ने में मदद करता है, और अगर हम वायरस के संपर्क में आते हैं तो यह हमारी रक्षा करने में मदद करता है।

nobody:

(nobody has any questions or comments, so Sarah concludes her presentation.

In [None]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: How do vaccines work to prevent diseases, and why are they sometimes ineffective against certain viruses?
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

BALTIMORE, Maryland (CNN) -- A few weeks ago, 22-year-old Tatiana Gulenkina felt bad. Tired with a headache and high fever, Gulenkina knew she was coming down with something. Yet she wasn't sure with what. Besides getting vaccinated, there are other ways to boost your immune system to ward off being sick. "My symptoms were apparently the same as for regular flu: coughs, sneezes, high temperature, sore throat and headache." she says. But when her boyfriend took her to the doctor, her diagnosis was a little little bit of a shock. "I had the swine flu!" Although the Centers for Disease Control and Prevention has stopped counting the number of reported H1N1 virus incidents in this country, the American Medical

##Load dataset

In [None]:
# loading the dataset from huggingface library, which is a llama-2 papers arxiv of 4838 entries in the database
data = load_dataset(
    'jamescalam/llama-2-arxiv-papers-chunked',
    split='train'
)
data

Dataset({
    features: ['doi', 'chunk-id', 'chunk', 'id', 'title', 'summary', 'source', 'authors', 'categories', 'comment', 'journal_ref', 'primary_category', 'published', 'updated', 'references'],
    num_rows: 4838
})

In [None]:
# first we are converting the dataset object to pandas object for better handling and manipulation.
data = data.to_pandas()

In [None]:

# using this for-loop, we iterate over each row in the dataframe, and extract the text, metadata and storing in the pinecone-index
batch_size=16

for i in range(0, len(data), batch_size):
  i_end = min(len(data), i+batch_size)
  batch = data.iloc[i:i_end]
  ids = [f"{x['doi']}-{x['chunk-id']}" for i, x in batch.iterrows()]
  texts = [x['chunk'] for i, x in batch.iterrows()]
  # print(ids)

  embeddings = embedding_model.embed_documents(texts)
  meta_data = [{
      'text': x['chunk'],
      'source': x['source'],
      'title': x['title']
  } for i, x in batch.iterrows()]
  index.upsert(vectors=zip(ids, embeddings, meta_data))

##3

In [None]:
query = "Describe Very Deep Convolutional Networks for Large-Scale Image Recognition."
response = llm(query)
print(response)

Describe Very Deep Convolutional Networks for Large-Scale Image Recognition. hopefully, it will provide a good starting point for those looking to explore the capabilities and limitations of these networks.


In [None]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: Describe Very Deep Convolutional Networks for Large-Scale Image Recognition.
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

M. Bernstein, A. C. Berg, and L. Fei-Fei. ImageNet
Large Scale Visual Recognition Challenge. IJCV ,
2015.
[34] P. Sermanet, D. Eigen, X. Zhang, M. Mathieu, R. Fergus, and Y . LeCun. Overfeat: Integrated recognition, localization and detection using convolutional
networks. In ICLR , 2014.
[35] L. Sifre and S. Mallat. Rigid-motion scattering for
texture classiﬁcation. arXiv:1403.1687 , 2014.
[36] K. Simonyan and A. Zisserman. Very deep convolutional networks for large-scale image recognition. In
ICLR , 2015.
[37] C. Szegedy, S. Ioffe, and V . Vanhoucke. Inceptionv4, inception-resnet and the impact of residual connections on learning. In ICLR Workshop , 2016.
[38] C. Szegedy, W. Liu, Y . Jia, P. Sermanet, S. Reed,
D. Anguelov, D. Erhan, 

##4

In [None]:
query = "Describe the Importance of Prior Information for Optimization."
response = llm(query)
print(response)

Describe the Importance of Prior Information for Optimization. संतुलित प्रेषण को प्राथमिकता देने के लिए एक उदाहरण के रूप में, एक प्रेषण प्रबंधक को प्रत्येक प्रेषण के लिए एक पूर्व सूचना प्राप्त करने के लिए प्रेरित किया जा सकता है। The sender may be asked to specify the priority of the message and whether it is an urgent message, a nonurgent message, or an intermediate priority message. एक प्रेषण प्रबंधक जो प्रेषण के लिए एक पूर्व सूचना प्राप्त करता है, वह प्रेषण को प्राथमिकता दे सकता है और एक उपयुक्त प्रेषण मार्ग को निर्धारित कर सकता है।

Step 4: Balanced Dispatching
संतुलित प्रेषण प्रेषण के लिए प्राथमिकता निर्धारित करने के लिए प्रेषण प्रबंधक द्वारा प्राप्त पूर्व सूचना पर निर्भर करता है। It allows for the efficient use of resources and reduces the chances of delaying important messages.

चरण 5: प्रेषण मार्ग
A dispatching route refers to the route taken by a message to reach its destination. संतुलित प्रेषण प्रेषण प्रबंधक को प्रेषण मार्ग को निर्धारित करने के लिए एक पूर्व सूचना का उपयोग करत

In [None]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: Describe the Importance of Prior Information for Optimization.
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Much of the modern work in optimization is based around designing update rules tailored to speciﬁc
classes of problems, with the types of problems of interest differing between different research
communities. For example, in the deep learning community we have seen a proliferation of optimization methods specialized for high-dimensional, non-convex optimization problems. These include
momentum [Nesterov, 1983, Tseng, 1998], Rprop [Riedmiller and Braun, 1993], Adagrad [Duchi
et al., 2011], RMSprop [Tieleman and Hinton, 2012], and ADAM [Kingma and Ba, 2015]. More
focused methods can also be applied when more structure of the optimization problem is known
[Martens and Grosse, 2015]. In contrast, communities who focus on sparsity tend to favor very
dif

##5

In [32]:
query = "RNN Encoder-Decoder की विवरण कीजिए।"
response = llm(query)
print(response)

--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 1100, in emit
    msg = self.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/usr/lib/python3.10/logging/__init__.py", line 678, in format
    record.message = record.getMessage()
  File "/usr/lib/python3.10/logging/__init__.py", line 368, in getMessage
    msg = msg % self.args
TypeError: not all arguments converted during string formatting
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/colab_kernel_launcher.py", line 37, in <module>
    ColabKernelApp.launch_instance()
  File "/usr/local/lib/python3.10/dist-packages/traitlets/config/application.py", line 992, in launch_instance
    a

RNN Encoder-Decoder की विवरण कीजिए।
 ट्रेनें हैं।
The network is divided into two parts: an encoder and a decoder. एन्कोडर और डिकोडर को अलग-अलग ट्रेनों के रूप में दर्शाया जाता है। The encoder has three inputs, one for each of the three words, and an output. आउटपुट एक वाक्य का प्रतिनिधित्व करने वाला एक सदिश है। The decoder has two inputs, one for each word, and one output. आउटपुट भी एक सदिश है, जो एक वाक्य का प्रतिनिधित्व करता है।
Each word is a sequence of symbols (e.g. a string of characters, or a string of numbers). प्रत्येक शब्द के लिए, एन्कोडर में तीन प्रतीक होते हैं, एक इनपुट के लिए, एक आउटपुट के लिए, और एक "वी" के लिए।
The "V" represents the word's vector, and the encoder represents the encoder's vector.
एक वाक्य का प्रतिनिधित्व करने वाला आउटपुट, "Y", एन्कोडर के आउटपुट और डिकोडर के इनपुट का योग है।
The encoder has a single input and a single output. डिकोडर में दो इनपुट और एक आउटपुट होता है।
The decoder's output is a vector of characters, one per word. आउटपुट का योग एक वाक्य के प्

In [33]:
# generating the response with LLM and the knowledge from our explicitly provided data [llama-2-arxiv paper dataset]
response_with_rag = rag_pipeline(query)

# print(response_with_rag)
for key, value in response_with_rag.items():
  print(f'{key}: {value}')

query: RNN Encoder-Decoder की विवरण कीजिए।
result: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Table 1: BLEU scores computed on the development and test sets using different combinations of
approaches. WP denotes a word penalty , where
we penalizes the number of unknown words to
neural networks.
similarly. We used rank-100 matrices, equivalent
to learning an embedding of dimension 100 for
each word. The activation function used for ~hin
Eq. (8) is a hyperbolic tangent function. The computation from the hidden state in the decoder to
the output is implemented as a deep neural network (Pascanu et al., 2014) with a single intermediate layer having 500 maxout units each pooling
2 inputs (Goodfellow et al., 2013).
All the weight parameters in the RNN Encoder–
Decoder were initialized by sampling from an
isotropic zero-mean (white) Gaussian distribution
with its standard de