In [15]:
# Import the SentenceTransformer class from the sentence_transformers library. 
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec
from dotenv import load_dotenv, find_dotenv
import time
import os

In [16]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [17]:
sentences = [
[
    """
    The weather is lovely today. It's so sunny outside! He drove to the stadium.
    """
],
[
    """
    She is a great singer. He is a good dancer. They are happy together.
    """
],
[
    """
    The dog is very happy. The cat is very sleepy. The baby is very hungry. The tiger is angry.
    """
],
[
    """
    The sun is shining brightly. The clouds are white and fluffy. The sky is blue and clear. The grass is green and soft. The wind is blowing softly.
    The trees are swaying gently. The flowers are blooming beautifully. The birds are singing sweetly. The bees are buzzing happily.
    The butterflies are fluttering freely.
    """
]
]

In [18]:
embeddings = []
for sentence in sentences:
    embeddings.append(model.encode(sentence))

In [5]:
#similarities = model.similarity(embeddings, embeddings)

In [21]:
load_dotenv(find_dotenv('Keys.env'))

True

In [22]:
pine_cone_key = os.getenv("PINECONE_API_KEY")
pc_database = Pinecone(api_key = pine_cone_key) 

In [23]:
index_name = "sample"

In [40]:
for embed in embeddings:
    print(embed)

[[ 8.83325040e-02  1.24660596e-01  9.04427320e-02  6.02287464e-02
   6.82359263e-02 -1.48833236e-02  1.09645925e-01 -1.66125670e-02
  -5.51505722e-02  3.20862867e-02 -5.42901121e-02 -2.95878202e-02
  -8.45389813e-03  8.45121145e-02  8.54365975e-02  2.65274886e-02
  -2.02885382e-02 -2.31902525e-02 -5.36055863e-02  7.76851457e-03
  -8.17822963e-02  5.90036549e-02 -4.30413112e-02  1.69440247e-02
  -4.26128618e-02  5.15513495e-02  1.84305850e-02  9.10640806e-02
   8.26136768e-03  2.18566228e-03  1.60267875e-02  3.33287083e-02
   3.69256013e-03 -2.20295358e-02 -3.21219191e-02 -6.57981215e-03
   1.66011229e-02 -1.18287601e-01 -2.15978436e-02  5.11574782e-02
   1.11844530e-02 -6.88677877e-02 -2.26163827e-02  7.42716342e-02
  -1.70359667e-02 -5.76029159e-02 -6.68877689e-03 -9.54227988e-03
   1.57892048e-01 -1.00390222e-02 -3.45103815e-02  3.30045223e-02
   7.98522756e-02 -1.15698062e-01 -5.07436544e-02  1.04126103e-01
   7.55596673e-03 -2.61067948e-03  1.48293953e-02 -3.21609713e-02
   2.97377

In [25]:

if not pc_database.has_index(index_name):
    pc_database.create_index(
        name=index_name,
        dimension=384, # Replace with your model dimensions
        metric="cosine", # Replace with your model metric
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        ) 
    )

In [26]:
# Wait for the index to be ready
while not pc_database.describe_index(index_name).status['ready']:
    time.sleep(1)

In [44]:
records = []
id = 0
for d, e in zip(sentences, embeddings):
    #records.append((d, e))
    #print(d)
    #print(e)
    id += 1
    records.append({"id": str(id),"values":e[0],"metadata": {'text':d[0]}})


In [45]:
print(records)

[{'id': '1', 'values': array([ 8.83325040e-02,  1.24660596e-01,  9.04427320e-02,  6.02287464e-02,
        6.82359263e-02, -1.48833236e-02,  1.09645925e-01, -1.66125670e-02,
       -5.51505722e-02,  3.20862867e-02, -5.42901121e-02, -2.95878202e-02,
       -8.45389813e-03,  8.45121145e-02,  8.54365975e-02,  2.65274886e-02,
       -2.02885382e-02, -2.31902525e-02, -5.36055863e-02,  7.76851457e-03,
       -8.17822963e-02,  5.90036549e-02, -4.30413112e-02,  1.69440247e-02,
       -4.26128618e-02,  5.15513495e-02,  1.84305850e-02,  9.10640806e-02,
        8.26136768e-03,  2.18566228e-03,  1.60267875e-02,  3.33287083e-02,
        3.69256013e-03, -2.20295358e-02, -3.21219191e-02, -6.57981215e-03,
        1.66011229e-02, -1.18287601e-01, -2.15978436e-02,  5.11574782e-02,
        1.11844530e-02, -6.88677877e-02, -2.26163827e-02,  7.42716342e-02,
       -1.70359667e-02, -5.76029159e-02, -6.68877689e-03, -9.54227988e-03,
        1.57892048e-01, -1.00390222e-02, -3.45103815e-02,  3.30045223e-02,
  

In [46]:
index = pc_database.Index(index_name)

index.upsert(
    vectors=records,
    namespace="sample-namespace"
)

{'upserted_count': 4}

In [47]:

index.query(
    namespace="sample-namespace",
    id="1",
    top_k=3,
    include_values=True
)



{'matches': [{'id': '1',
              'score': 0.999569058,
              'values': [0.0191957,
                         0.120085329,
                         0.159598336,
                         0.0670659393,
                         0.050074894,
                         -0.0259187482,
                         0.0564682372,
                         -0.0928577483,
                         -0.0376114659,
                         0.00632389775,
                         -0.0428877696,
                         0.00402833661,
                         0.0047278069,
                         0.0324676,
                         0.0495198146,
                         0.0529818535,
                         -0.0404454134,
                         -0.0214836914,
                         -0.0302760378,
                         0.0220859032,
                         -0.16077587,
                         0.0808078,
                         -0.0280131176,
                         0.0806255788,
      

In [38]:
print(embeddings[1])

[[-2.68487465e-02 -8.79947543e-02 -1.52743729e-02 -2.17556208e-02
  -1.23019516e-01  3.67131904e-02  8.50789249e-02 -1.35904914e-02
  -2.22355817e-02 -1.44744795e-02  1.01525560e-02 -1.32794287e-02
   1.26951300e-02 -5.99091165e-02  3.98687534e-02  6.28173128e-02
   1.94420200e-02 -2.36896216e-03 -9.81499776e-02  2.11755689e-02
  -6.25363141e-02 -7.69998282e-02  3.31834704e-02  9.37165841e-02
  -9.05305818e-02 -5.81248105e-03  4.48831990e-02 -3.39649320e-02
   5.62749840e-02  4.26863171e-02 -9.03983600e-03  3.31113301e-02
  -5.63671961e-02  8.18704441e-02 -1.06493160e-02  8.99995491e-02
  -5.63913025e-03 -5.55978902e-03 -2.24697255e-02 -9.38793458e-03
  -1.01148686e-03 -9.72942933e-02 -1.97256710e-02 -1.91649143e-02
  -5.39264865e-02 -5.16493879e-02 -2.76916511e-02 -8.26691538e-02
  -1.55675132e-02 -1.72291100e-02 -5.25824837e-02  3.78103964e-02
   3.57092209e-02 -1.27384365e-01 -8.88232980e-03  4.19402085e-02
   6.28920794e-02  8.71248636e-03  4.34171073e-02 -7.71688391e-03
   9.31182

In [68]:
import numpy as np

sentenceOne = "The Jupiter is biggest planet in our solar system."

index.query(
    namespace="sample-namespace",
    vector=np.array(model.encode(sentenceOne)).tolist(),
    top_k=1,
    include_metadata=True # Include metadata in the response.
)

{'matches': [{'id': '2',
              'metadata': {'text': '\n'
                                   '    She is a great singer. He is a good '
                                   'dancer. They are happy together.\n'
                                   '    '},
              'score': 0.101946004,
              'values': []}],
 'namespace': 'sample-namespace',
 'usage': {'read_units': 6}}