In [None]:
pip install faiss-cpu

In [None]:
pip install numpy

In [None]:
pip install requests

In [24]:
data = """
Making an Impact
Helping Millions of Students Succeed
Sudhanshu's commitment to affordable education wasn't just a business strategy—it was his life's mission. Over the years, iNeuron has helped over 1.5 million students from 34+ countries, providing them with the skills they need to succeed in today's competitive job market. Many of these students, like Sudhanshu himself, came from disadvantaged backgrounds. They saw iNeuron as a lifeline—an opportunity to rise above their circumstances.

In 2022, iNeuron was acquired by PhysicsWallah in a deal worth ₹250 crore. While this acquisition was a significant milestone, Sudhanshu remained focused on his mission. Even after the acquisition, iNeuron continued to offer some of the most affordable and accessible tech courses in the world.

The Entrepreneur and Teacher: Sudhanshu's Dual Legacy
Sudhanshu's journey isn't just one of entrepreneurial success; it's also a story of dedication to teaching. Throughout his career, he has remained a passionate educator, constantly looking for ways to empower others through knowledge. Whether teaching courses in Big Data, Data Science, or programming, Sudhanshu has always sought to make complex subjects accessible to learners at all levels.

His commitment to affordable education has earned him the respect and admiration of countless students. Many credit Sudhanshu with changing their lives, helping them secure jobs, improve their skills, and break free from the limitations of their backgrounds."""

In [25]:
clean_data = data.strip()
clean_data

"Making an Impact\nHelping Millions of Students Succeed\nSudhanshu's commitment to affordable education wasn't just a business strategy—it was his life's mission. Over the years, iNeuron has helped over 1.5 million students from 34+ countries, providing them with the skills they need to succeed in today's competitive job market. Many of these students, like Sudhanshu himself, came from disadvantaged backgrounds. They saw iNeuron as a lifeline—an opportunity to rise above their circumstances.\n\nIn 2022, iNeuron was acquired by PhysicsWallah in a deal worth ₹250 crore. While this acquisition was a significant milestone, Sudhanshu remained focused on his mission. Even after the acquisition, iNeuron continued to offer some of the most affordable and accessible tech courses in the world.\n\nThe Entrepreneur and Teacher: Sudhanshu's Dual Legacy\nSudhanshu's journey isn't just one of entrepreneurial success; it's also a story of dedication to teaching. Throughout his career, he has remained 

In [26]:
max_char = 800
overlap = 100
chunks = []
i = 0
while i < len(clean_data):
    piece = clean_data[i:i+max_char]
    chunks.append(piece)
    i = i + max_char - overlap

In [27]:
import requests
import numpy as np

def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer euri-1f3025f0ddab44e290bf86bed48790c9f4fd1325a59b42e49701607ebb5b9546"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding


In [11]:
for i in chunks:
    embedding = generate_embeddings(i)
    print(embedding)


[-0.01310152 -0.0252581   0.0327155  ... -0.02060999  0.02083985
  0.02158048]
[ 0.00055135 -0.02026388  0.04535372 ... -0.04037207  0.00043906
  0.01220114]
[ 0.03170612  0.01639023  0.04585595 ... -0.01134607 -0.06964864
  0.01555172]


In [28]:
emb_list = []
meta = []

for idx,chunk in enumerate(chunks):
    vec = generate_embeddings(chunk)
    emb_list.append(vec.astype("float32"))
    
    meta.append({"chunk_index": idx, "chunk": chunk})

In [13]:
print(type(emb_list))
print(emb_list)
print(meta)

<class 'list'>
[array([-0.01310152, -0.0252581 ,  0.0327155 , ..., -0.02061   ,
        0.02083985,  0.02158048], dtype=float32), array([ 0.00054226, -0.02030935,  0.04533938, ..., -0.04035931,
        0.00043122,  0.01220376], dtype=float32), array([ 0.0317256 ,  0.01637366,  0.04584625, ..., -0.01135022,
       -0.06958151,  0.01558772], dtype=float32)]
[{'chunk_index': 0, 'chunk': "Making an Impact\nHelping Millions of Students Succeed\nSudhanshu's commitment to affordable education wasn't just a business strategy—it was his life's mission. Over the years, iNeuron has helped over 1.5 million students from 34+ countries, providing them with the skills they need to succeed in today's competitive job market. Many of these students, like Sudhanshu himself, came from disadvantaged backgrounds. They saw iNeuron as a lifeline—an opportunity to rise above their circumstances.\n\nIn 2022, iNeuron was acquired by PhysicsWallah in a deal worth ₹250 crore. While this acquisition was a significa

In [15]:
xb = np.vstack(emb_list)
xb

array([[-0.01310152, -0.0252581 ,  0.0327155 , ..., -0.02061   ,
         0.02083985,  0.02158048],
       [ 0.00054226, -0.02030935,  0.04533938, ..., -0.04035931,
         0.00043122,  0.01220376],
       [ 0.0317256 ,  0.01637366,  0.04584625, ..., -0.01135022,
        -0.06958151,  0.01558772]], dtype=float32)

In [16]:
import faiss

In [None]:
faiss.normalize_L2(xb) # Normalize the vectors

In [18]:
d = xb.shape[1]  # Dimension of the vectors
d

1536

In [None]:
index = faiss.IndexFlatIP(d) # Inner product (dot product) index
index.add(xb)

In [21]:
index_path = "index_vectordb.faiss"
meta_path = "meta_vectordb.json"
faiss.write_index(index, index_path)

In [22]:
import json
with open(meta_path, 'w') as f:
    for item in meta:
        f.write(json.dumps(item) + "\n")

In [23]:
query = "What is the mission on iNeuron"
q = generate_embeddings(query).astype("float32").reshape(1, -1)
faiss.normalize_L2(q)  # Normalize the query vector
index.search(q, k=5)  # Search for the top 5 nearest neighbors  

(array([[ 5.8033705e-01,  1.9204220e-01,  1.2751120e-01, -3.4028235e+38,
         -3.4028235e+38]], dtype=float32),
 array([[ 0,  1,  2, -1, -1]], dtype=int64))

In [29]:
test = np.array([[4,5,6,7,8]], dtype=np.float32)
test

array([[4., 5., 6., 7., 8.]], dtype=float32)

In [30]:
np.linalg.norm(test)

13.784049

In [31]:
faiss.normalize_L2(test)

In [32]:
np.linalg.norm(test)

0.99999994