In [10]:
import os
from dotenv import load_dotenv
from pinecone import Pinecone
import time
from nltk.tokenize import sent_tokenize
from sentence_transformers import SentenceTransformer
import uuid

load_dotenv()

api_key = os.getenv('API_KEY')
pc = Pinecone(api_key=api_key)
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

class Helper:
    def upsert_method(self, vector, index_name="test1", namespace="ns1"):
        while not pc.describe_index(index_name).status['ready']:
            time.sleep(1)

        index = pc.Index(index_name)

        vectors = []

        for v in vector:
            vectors.append((str(uuid.uuid4()), v))

        return index.upsert(
            vectors=vectors,
            namespace=namespace
        )

    def query_method(self, vector, top_k=10, index_name="test1", namespace="ns1"):
        while not pc.describe_index(index_name).status['ready']:
            time.sleep(1)

        index = pc.Index(index_name)

        
        result = []
        seen_ids = set()

        for v in vector:
            res = index.query(
                namespace=namespace,
                vector=v.tolist(),
                top_k=top_k,
                include_values=True,
                include_metadata=True,
            )

            for match in res['matches']:
                if match['id'] not in seen_ids:
                    result.append(match)
                    seen_ids.add(match['id'])

        return result
    
    def embed_sentences(self, sentences):
        embeddings = model.encode(sentences)
        return embeddings

    def split_text_into_sentences(self, text):
        sentences = sent_tokenize(text)
        return sentences
    
helper_obj = Helper()



In [4]:
text = "Hello! How are you doing today? I hope you're having a great day. Let's learn Python."
sentences = helper_obj.split_text_into_sentences(text)
vector = helper_obj.embed_sentences(sentences)
vector.shape

(4, 384)

In [5]:
# res = helper_obj.upsert_method(vector, "test1", "ns1")
# res

In [6]:
type(vector[1].tolist())

list

In [9]:
for v in vector:
    print(type(v.tolist()))

<class 'list'>
<class 'list'>
<class 'list'>
<class 'list'>


In [12]:
query_result = helper_obj.query_method(vector)
len(query_result)

4

In [28]:
len(query_result['matches'])

4

In [29]:
query_result['matches']

[{'id': '72c7a564-48ff-4519-9e81-e653282dac60',
  'score': 0.00106728077,
  'values': [0.000709112908,
             0.0495909,
             0.0710198879,
             0.0266110301,
             0.015643714,
             -0.0480789319,
             0.0935944766,
             -0.0314276852,
             -0.0977535546,
             2.44742841e-05,
             -0.0320192538,
             0.00252180197,
             -0.0313249677,
             0.0655177534,
             0.0129150469,
             -0.020245973,
             -0.0264634211,
             -0.0901172608,
             -0.11462386,
             0.075493753,
             -0.0844666362,
             -0.0262119696,
             0.0140530057,
             0.0748023093,
             0.00468847621,
             0.0819041,
             -0.0251977667,
             -0.0158522166,
             -0.00189221953,
             -0.114256494,
             -0.123460628,
             -0.00623109424,
             -0.0284224935,
             -0.039852