In [1]:
from pathlib import Path

docs = []
for p in Path("files").glob("*.txt"):
    text = p.read_text(encoding="utf-8", errors="ignore")
    docs.append({
        "text": text,
        "source": p.name
    })


In [2]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100
)

chunks = []
for d in docs:
    for c in splitter.split_text(d["text"]):
        chunks.append({
            "text": c,
            "source": d["source"]
        })


In [27]:
import json

with open("rag_metadata.json", "w", encoding="utf-8") as f:
    json.dump(chunks, f, indent=2, ensure_ascii=False)

print(f"Saved {len(chunks)} chunks to rag_metadata.json")

Saved 29353 chunks to rag_metadata.json


In [4]:
from openai import OpenAI
import os
from dotenv import load_dotenv
import time
import numpy as np
from tqdm import tqdm

load_dotenv()

client = OpenAI()#api_key=os.getenv("OPENAI_API_KEY"))

batch_size = 100
embeddings = []

for i in tqdm(range(0, len(chunks), batch_size)):
    batch = chunks[i:i+batch_size]
    texts = [c["text"] for c in batch]

    try:
        resp = client.embeddings.create(
            model="text-embedding-3-large",
            input=texts
        )
        embeddings.extend([e.embedding for e in resp.data])
    except Exception as e:
        print("Error at batch", i, e)
        time.sleep(5)


100%|██████████| 294/294 [07:11<00:00,  1.47s/it]


In [6]:
import faiss
import numpy as np

dim = len(embeddings[0])
index = faiss.IndexFlatL2(dim)
index.add(np.array(embeddings).astype("float32"))

In [26]:
faiss.write_index(index, "SEBI_RAG.faiss")
index = faiss.read_index("SEBI_RAG.faiss")

In [7]:
def embed_query(q):
    return client.embeddings.create(
        model="text-embedding-3-large",
        input=q
    ).data[0].embedding

In [12]:
metadata = chunks

def retrieve(query, k=5):
    q_emb = np.array([embed_query(query)]).astype("float32")
    distances, indices = index.search(q_emb, k)
    return [metadata[i] for i in indices[0]]

In [18]:
retrieve("tell me about CIR/HO/MIRSD/MIRSD2/CIR/P/2017/59 June 15, 2017")

[{'text': 'monitoring of client funds under CIR/HO/MIRSD/MIRSD2CIR/P/2017/64\nI.\nthe provisions of Enhanced dated June 22, 2017 and\nSupervision. CIR/HO/MIRSD/MIRSD2/CIR/PB/2017/107\ndated September 25, 2017.\nSubmission of data on monthly SEBI/HO/MIRSD/MIRSD2/CIR/P/2016/95\nbasis towards clients’ and fund dated September 26, 2016 read with\nII.\nbalance under the provisions of CIR/HO/MIRSD/MIRSD2/CIR/PB/2017/107\nEnhanced Supervision. dated September 25, 2017.\nIII. Daily margin trading reporting. CIR/MRD/DP/54/2017 dated June 13, 2017.\nUpdate in Income Tax Permanent\nAccount Number of Key SEBI/HO/MIRSD/MIRSD2/CIR/P/2016/95\nIV.\nManagement Personnel / dated September 26, 2016.\nDirectors.\nSEBI letter no.\nIssue of Annual Global Statement\nV. SEBI/HO/MIRSD1/RKD/OW/P/2017/29513/1\nto clients.',
  'source': 'Relaxation in timelines for compliance with regulatory requirements by trading members clearing members.txt'},
 {'text': 'information is received from the Issuer on or before the

In [22]:
def rag_answer(query):
    contexts = retrieve(query)

    context_text = "\n\n".join(
        f"[{c['source']}]\n{c['text']}"
        for c in contexts
    )
    print(context_text)

    prompt = f"""
Answer the question using the context below.

Context:
{context_text}

Question:
{query}
"""

    resp = client.chat.completions.create(
        model="gpt-4.1",
        messages=[{"role": "user", "content": prompt}]
    )

    return resp.choices[0].message.content


In [24]:
rag_answer("tell me about the circular and what it means")

[Delivery Instruction Slip (DIS) Issuance and Processing.txt]
circular. The measures listed above under the head 'Monitoring of DIS' shall be
made applicable to the DIS issued as per the provisions of this circular.

[Arbitration Mechanism in Stock Exchanges.txt]
circular.
V. This Circular is issued in exercise of the powers conferred under Section
11 (1) of the Securities and Exchange Board of India Act 1992, read with

[Amendment to bye-laws of recognised stock exchanges with respect to non-compliance of certain listing conditions and adopting Standard Operating Proce_f7403c05.txt]
of the requirements of this circular.
Page 1 of 6

[Alternate Risk Management Framework Applicable in case of Near Zero and Negative Prices.txt]
circular.
5. This circular is issued in exercise of the powers conferred under Section 11 (1) of
the Securities and Exchange Board of India Act, 1992, to protect the interests of
investors in securities and to promote the development of, and to regulate the
securi

'Certainly! Based on the context provided from the various files, here is a summary and explanation of **what a SEBI “circular” is and what it means**:\n\n---\n\n### What is a SEBI “Circular”?\n\n- **A circular** is an official communication issued by the Securities and Exchange Board of India (SEBI), the regulatory authority for securities markets in India.\n- These circulars are **issued under the powers given to SEBI by Section 11(1) of the SEBI Act, 1992.**\n- Their primary objectives are to **protect the interests of investors**, **regulate**, and **promote the development of the securities market**.\n\n### What Does it Mean When a Circular is Issued?\n\n- A SEBI circular is essentially a set of instructions, clarifications, amendments, or guidance sent to participants in the securities market (like stock exchanges, brokers, depositories, etc.).\n- It **requires compliance** from the regulated entities and often introduces new requirements, procedures, or alterations to existing p