In [None]:
import requests
import numpy as np

EURI_API_KEY = ""
def generate_embeddings(text):
    url = "https://api.euron.one/api/v1/euri/embeddings"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {EURI_API_KEY}"
    }
    payload = {
        "input": text,
        "model": "text-embedding-3-small"
    }

    response = requests.post(url, headers=headers, json=payload)
    data = response.json()
    
    embedding = np.array(data['data'][0]['embedding'])
    
    return embedding

In [2]:
data = """ Making an Impact Helping Millions of Students Succeed
Mentor's commitment to affordable education wasn't just a business strategy—it was his life's mission. Over the years, iNeuron has helped over 1.5 million students from 34+ countries, providing them with the skills they need to succeed in today's competitive job market. Many of these students, like the mentor himself, came from disadvantaged backgrounds. They saw iNeuron as a lifeline—an opportunity to rise above their circumstances.
In 2022, iNeuron was acquired by PhysicsWallah in a deal worth ₹250 crore. While this acquisition was a significant milestone, mentor remained focused on his mission. Even after the acquisition, iNeuron continued to offer some of the most affordable and accessible tech courses in the world."""

clean_data = data.strip()
max_char = 100
overlap = 20
chunks = []
i = 0
while i < len(clean_data):
    piece = clean_data[i:i+max_char]
    chunks.append(piece)
    i = i+max_char-overlap

In [3]:
emb_list = []
meta = []
for idx, chunk in enumerate(chunks):
    vec = generate_embeddings(chunk)
    print(f"Chunk {idx+1} embedding: {vec}")
    emb_list.append(vec.astype("float32"))
    meta.append({"id":idx, "text":chunk})
xb = np.vstack(emb_list)
d = xb.shape[1]
print(xb.shape)

Chunk 1 embedding: [ 0.01125363 -0.04198496  0.03206514 ... -0.00965171 -0.00748008
 -0.00309157]
Chunk 2 embedding: [ 0.00531732  0.01498619 -0.00798524 ... -0.00841447 -0.00369659
 -0.03715095]
Chunk 3 embedding: [-0.02063813 -0.0040367   0.04868835 ... -0.01943952 -0.00559351
 -0.00585872]
Chunk 4 embedding: [ 0.02577067 -0.00252622  0.08125098 ... -0.0271094  -0.02772728
  0.03045624]
Chunk 5 embedding: [-0.04102746  0.01827915  0.00184943 ... -0.01435128 -0.03389346
 -0.02301202]
Chunk 6 embedding: [ 0.00344221 -0.00734469  0.01752116 ... -0.00409945 -0.04278097
  0.00384982]
Chunk 7 embedding: [-0.0210235  -0.06276387  0.02796912 ... -0.0342615   0.03359494
  0.03884748]
Chunk 8 embedding: [ 0.00960911 -0.02048472  0.03460754 ... -0.03849536 -0.00506964
  0.00691783]
Chunk 9 embedding: [-0.0221011  -0.01159247  0.02120574 ... -0.01461626 -0.00757124
  0.00940121]
Chunk 10 embedding: [-0.05103014  0.01305801 -0.01491092 ... -0.02556515  0.01165581
  0.02030689]
(10, 1536)


In [None]:
import faiss
# faiss.normalize_L2(xb)
index = faiss.IndexFlatIP(d) ### define dimension of the dataset (find dot product by default)
index.add(xb)
### Storing my dataset (embeddings and meta data)
index_path = 'data/index.faiss'
meta_path = 'data/meta_data.json'
faiss.write_index(index, index_path)
import json, os
with open(meta_path, "w") as f:
    for item in meta:
        f.write(json.dumps(item) + "\n")

In [6]:
query = "What is the mission of ineuron?"
q = generate_embeddings(query).astype("float32").reshape(1,-1)
faiss.normalize_L2(q) ## using equilidean distance
index.search(q, 3)

(array([[0.48216653, 0.45979583, 0.44330272]], dtype=float32),
 array([[8, 2, 5]]))