In [None]:
# RAG - Convert private document into vector and store in any vector database
# Doc --> Embed --> VectorDB
# Ques --> VectorDB
# Euri + LangChain --> LLM
# VectorDB --> Ques + Embed Doc --> LLM --> Output

In [None]:
# !pip install langchain faiss-cpu  tiktoken requests euriai langchain-community

### Importing Libraries

In [5]:
from langchain.embeddings import base
from langchain.llms.base import LLM
from typing import List, Dict, Any
import requests
import numpy as np
import faiss
import os

In [6]:
from dotenv import load_dotenv
load_dotenv()

True

In [7]:
EURI_API_KEY = os.getenv("EURI_API_KEY")
# EURI_API_KEY

### Defining Embedding and LLM Response Functions

In [8]:
def generate_embeddings(text: str) :
    url = "https://api.euron.one/api/v1/euri/alpha/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURI_API_KEY}"
    }
    payload = {
        "input":text,
        "model": "text-embedding-3-small"
    }
    print(text)
    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    # Convert to numpy array for vector operations
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding

In [9]:
from euriai import EuriaiClient
def generate_response(prompt):
    client = EuriaiClient(
        api_key=f"{EURI_API_KEY}",
        model="gpt-4.1-nano"  
    )

    response = client.generate_completion(
        prompt=prompt,
        temperature=0,
        max_tokens=300
    )

    print(response)
    return response['choices'][0]["message"]["content"]

In [10]:
### Prepare your private dataset like my_private.txt
# Read --> Split into chunks --> (using langchain here)

### Defining Text Loader and Splitter

In [11]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

## Use either method to read the data from the file

# with open("my_private.txt", "r") as f:
#     print(f.read())

document = TextLoader("my_private.txt").load()
print(document)



In [12]:
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=30)
# chunk_overlap: next chunk and previous chunk has 30 overlap
docs = splitter.split_documents(document)
print(len(docs))
docs[:3]

1670


[Document(metadata={'source': 'my_private.txt'}, page_content='Skip to main content\n\nSearch\nDonate  Search\nArts\nEducation\nPolicy\nOur Work\nOur Locations\nEvents\nAbout\n×\nIndia: A Historical Overview\nHoly Waters of the Gurudwara Bangla Sahib. (~FreeBirD®~/flickr)\nOrigins of the Indus Valley Civilization'),
 Document(metadata={'source': 'my_private.txt'}, page_content='The earliest evidence of humans in South Asia dates back two million years. Beginning about 30,000 years ago, stone age hunters and gatherers inhabited sites in the area. Between 8000 and 6500 B.C.E., there was a gradual shift from dependence on wild resources to domestic plants and animals.'),
 Document(metadata={'source': 'my_private.txt'}, page_content='During the period between 5000 and 2000 B.C.E., highly organized urban settlements spread throughout northern regions (present-day Pakistan and north India). Trade and communication networks linked these settlements to one another and to other distant ancient 

#### Converting docs to list

In [13]:
## Converting docs into embedings

texts = [doc.page_content for doc in docs]
print(len(texts))
texts[:3]

1670


['Skip to main content\n\nSearch\nDonate  Search\nArts\nEducation\nPolicy\nOur Work\nOur Locations\nEvents\nAbout\n×\nIndia: A Historical Overview\nHoly Waters of the Gurudwara Bangla Sahib. (~FreeBirD®~/flickr)\nOrigins of the Indus Valley Civilization',
 'The earliest evidence of humans in South Asia dates back two million years. Beginning about 30,000 years ago, stone age hunters and gatherers inhabited sites in the area. Between 8000 and 6500 B.C.E., there was a gradual shift from dependence on wild resources to domestic plants and animals.',
 'During the period between 5000 and 2000 B.C.E., highly organized urban settlements spread throughout northern regions (present-day Pakistan and north India). Trade and communication networks linked these settlements to one another and to other distant ancient cultures.']

In [14]:
# documents = ["doc1 content", "doc2 content", "doc3 content"]
# index = {}
# for i, doc in enumerate(documents):
#     index[i] = doc
# print(index)

#### generating embeddings

In [None]:
embeddings = []
valid_texts = []
for i ,text in enumerate(texts[4:10]):
    clean_text = text.strip()
    if not clean_text:
        print(f"Skipping empty text at index {i}")
        continue
    embedding = generate_embeddings(text)
    embeddings.append(embedding)
    valid_texts.append(text)
    print(f"Processed {i+1}/{len(texts)}: {text[:30]}... -> {embedding[:5]}")

Around 2600 B.C.E., regional cultures were united into a culturally integrated network in the Indus Valley region. Settlements in this civilization extended over a 650,000 square kilometer region. The peoples of the region shared a number of cultural characteristics, including planned urban
Processed 1/1670: Around 2600 B.C.E., regional c... -> [ 0.04603599 -0.01804645  0.04975916  0.05773114 -0.02888747]
including planned urban developments, the use of a still undeciphered script, standardized weights, and craft technologies.
Processed 2/1670: including planned urban develo... -> [ 0.06665847  0.04012987  0.05118857 -0.01775211  0.00607692]
The Indus Valley cultural system declined in the early centuries of the second millennium B.C.E., probably due to environmental changes in the region. Around 1500, Indo-Aryan culture began to dominate the region. Indo-Aryan culture is associated with Sanskrit, a language related to Greek, Latin,
Processed 3/1670: The Indus Valley cultural syst... -

In [18]:
# [generate_embeddings(i).astype('float32') for i in texts]

### Creating FAISS DB and storing on local

In [19]:
import faiss
from langchain.vectorstores.faiss import FAISS
from langchain.docstore.document import Document

In [23]:
dimension = embeddings[0].shape[0]
print(dimension)
print(embeddings[0])

faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(np.array(embeddings).astype('float32'))

1536
[ 0.04603599 -0.01804645  0.04975916 ... -0.0272887   0.02036797
 -0.00077406]


In [26]:
### To visualize the faiss database documents
# FAISS.from_documents(docs[0:6], embeddings)

In [48]:
from langchain.vectorstores.faiss import FAISS
from langchain.docstore.document import Document
document = [Document(page_content=text) for text in texts[:len(embeddings)]]

import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

faiss_index = faiss.IndexFlatL2(dimension)
vector_store = FAISS(
    embedding_function=generate_embeddings,
    index=faiss_index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)
vector_store.add_documents(document)

`embedding_function` is expected to be an Embeddings object, support for passing in a function will soon be removed.


Skip to main content

Search
Donate  Search
Arts
Education
Policy
Our Work
Our Locations
Events
About
×
India: A Historical Overview
Holy Waters of the Gurudwara Bangla Sahib. (~FreeBirD®~/flickr)
Origins of the Indus Valley Civilization
The earliest evidence of humans in South Asia dates back two million years. Beginning about 30,000 years ago, stone age hunters and gatherers inhabited sites in the area. Between 8000 and 6500 B.C.E., there was a gradual shift from dependence on wild resources to domestic plants and animals.
During the period between 5000 and 2000 B.C.E., highly organized urban settlements spread throughout northern regions (present-day Pakistan and north India). Trade and communication networks linked these settlements to one another and to other distant ancient cultures.
Indus Valley Civilization and the Rise of Indo-Aryan Culture
Around 2600 B.C.E., regional cultures were united into a culturally integrated network in the Indus Valley region. Settlements in this civ

['ba093654-c9ec-4f86-a5c2-0c927d3648f4',
 'd94a0b24-5dff-4c0b-8ba8-1a9d6826ad7a',
 'e03569bb-4376-44eb-8d0d-84a7a10eb8b6',
 'a939fdf5-97bc-4431-8a8c-2843c0439af5',
 '27016de6-3627-4ffa-a123-60a30e1fe7cc',
 '91191df4-d65b-4047-a181-19b7db822aab']

In [None]:
vector_store.save_local("faiss_index")
## Will see two files in the local directory: index.faiss and index.pkl

In [None]:
retriever = vector_store.as_retriever(search_kwargs={"k":2})  ## 2 is no of top matches from our text file stored in faiss db

#### Creating LLM

In [52]:
from euriai import EuriaiLangChainLLM

llm = EuriaiLangChainLLM(
    api_key=f"{EURI_API_KEY}",
    model="gpt-4.1-nano",
    temperature=0.0,
    max_tokens=300
)

print(llm.invoke("Write a poem about time travel in very short."))

Through shadows of the past we glide,  
Moments fleeting, dreams beside,  
Time’s river flows, a whispered song,  
In every tick, we drift along.


#### Creating retrieval chains

In [58]:
from langchain.chains import RetrievalQA

# retrival - fetch from vector DB
# llm - can be Euri, Open AI, Gemini, etc.
rag_chain = RetrievalQA.from_chain_type(
                llm = llm,
                retriever = retriever)

### Testing the RAG output with LLM output

In [67]:
questions = [
    'Why is India not among the developed country yet? Answer in 5 points with 20 words only.',
    "What is the history of India, highlight it's accomplishment for the world? Answer in 5 points with 20 words only.",
    "Precisely tell which dynastries ruled India in it's medival history?  Answer in 5 points with 20 words only."
    ]

In [59]:
rag_chain("What is Bharat?")

  rag_chain("What is Bharat?")


What is Bharat?


{'query': 'What is Bharat?',
 'result': 'Bharat is the name often used to refer to India in various Indian languages and contexts. It is considered the traditional name of the country and is derived from ancient Indian texts and history.'}

In [None]:
## Using Euri LLM
generate_response("What is Bharat?")

{'id': 'chatcmpl-BVc46Ltshr9qvSSGBEJJpeAKUIwbX', 'object': 'chat.completion', 'created': 1746874191, 'model': 'gpt-4.1-nano', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': '"Bharat" is the official name of India in several Indian languages, including Hindi, Sanskrit, and others. It is derived from ancient texts and signifies the land of the Bharata, an ancestral figure or dynasty mentioned in Indian history and mythology. The term "Bharat" is often used in formal contexts, official documents, and the Constitution of India to refer to the country. It embodies the cultural, historical, and national identity of India.'}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 22, 'completion_tokens': 92, 'total_tokens': 114}}


'"Bharat" is the official name of India in several Indian languages, including Hindi, Sanskrit, and others. It is derived from ancient texts and signifies the land of the Bharata, an ancestral figure or dynasty mentioned in Indian history and mythology. The term "Bharat" is often used in formal contexts, official documents, and the Constitution of India to refer to the country. It embodies the cultural, historical, and national identity of India.'

In [62]:
rag_chain(questions[1])

What is the history of India, highlight it's accomplishment for the world? Give in less than 5 sentences.


{'query': "What is the history of India, highlight it's accomplishment for the world? Give in less than 5 sentences.",
 'result': "India's history dates back to the Indus Valley Civilization around 2600 B.C.E., one of the world's earliest urban societies. It has contributed significantly to global culture, philosophy, mathematics, and science, including the development of concepts like zero and advanced mathematical ideas. India was also the birthplace of major religions such as Hinduism, Buddhism, Jainism, and Sikhism. Throughout history, it has been a center of trade, art, and learning, influencing regions across Asia and beyond. Today, India is recognized as a major emerging economy and a diverse, culturally rich nation."}

In [63]:
generate_response(questions[1])

{'id': 'chatcmpl-BVc6ODezHuhzxOkvD5dth9PQbvFWI', 'object': 'chat.completion', 'created': 1746874332, 'model': 'gpt-4.1-nano', 'choices': [{'index': 0, 'message': {'role': 'assistant', 'content': "India's history is rich and diverse, marked by ancient civilizations like the Indus Valley, and significant contributions to mathematics, astronomy, and philosophy. It was the birthplace of major religions such as Hinduism, Buddhism, Jainism, and Sikhism. India achieved independence in 1947, inspiring global movements for freedom and democracy. Its innovations include zero and decimal system, and it has made notable advancements in space technology with missions like Mars Orbiter Mission. Today, India is a leading global economy and a cultural hub influencing the world through its arts, science, and technology."}, 'finish_reason': 'stop'}], 'usage': {'prompt_tokens': 40, 'completion_tokens': 116, 'total_tokens': 156}}


"India's history is rich and diverse, marked by ancient civilizations like the Indus Valley, and significant contributions to mathematics, astronomy, and philosophy. It was the birthplace of major religions such as Hinduism, Buddhism, Jainism, and Sikhism. India achieved independence in 1947, inspiring global movements for freedom and democracy. Its innovations include zero and decimal system, and it has made notable advancements in space technology with missions like Mars Orbiter Mission. Today, India is a leading global economy and a cultural hub influencing the world through its arts, science, and technology."

In [None]:
### Play with the chunk_size and overlap by increasing their values to improve the output.