# Getting Started With Text Embeddings w/ Mistral

In [23]:
import sys
#!{sys.executable} -m pip install -U google-generativeai

In [24]:
#import google.generativeai as palm
import os
import pprint
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

#palm.configure(api_key=os.environ['API_KEY'])

In [25]:
from dotenv import load_dotenv, find_dotenv

!pip install --quiet mistralai
from mistralai import Mistral

In [26]:
"""
for model in palm.list_models():
  if 'embedText' in model.supported_generation_methods:
    print(model.name)
"""

"\nfor model in palm.list_models():\n  if 'embedText' in model.supported_generation_methods:\n    print(model.name)\n"

In [35]:
import os
import numpy as np
import dotenv
from mistralai import Mistral

from google.colab import drive
drive.mount('/content/drive')

os.environ.pop("MISTRAL_API_KEY", None)

# Make sure dotenv is loaded first
!ls "/content/drive/MyDrive/Credentials"
dotenv.load_dotenv('/content/drive/MyDrive/Credentials/mistral.env')

api_key = os.getenv('MISTRAL_API_KEY')
print("Loaded API key:", api_key[:10] + "...")

def get_text_embedding(txt):
    client = Mistral(api_key=api_key)
    response = client.embeddings.create(
       model="mistral-embed"
,
        inputs=txt
    )
    return np.array(response.data[0].embedding)

embedding_x = get_text_embedding("life")
print(len(embedding_x))




Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
mistral.env
Loaded API key: XiOnyLYW3a...
1024


#### Use the embeddings model


In [36]:
x = 'life'
close_to_x = 'What is the meaning of life?'

"""
model = "models/embedding-gecko-001"

# Create an embedding
embedding_x = palm.generate_embeddings(model=model, text=x)
embedding_close_to_x = palm.generate_embeddings(model=model, text=close_to_x)

vector = embedding_x['embedding']
print(f"Length = {len(vector)}")
print(vector[:10])
"""

#text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
embedding_x = get_text_embedding(x)
embedding_close_to_x = get_text_embedding(close_to_x)
print(f"Length = {len(embedding_x)}")
print(embedding_x[:10])


Length = 1024
[-0.0453186   0.01611328  0.00431442 -0.01395416  0.00355721  0.00596619
  0.03372192 -0.00984192 -0.00011432 -0.02319336]


In [37]:
print(f"Length = {len(embedding_close_to_x)}")
print(embedding_close_to_x[:10])


Length = 1024
[-0.03460693  0.02758789  0.03076172  0.00062466  0.00250816 -0.02960205
  0.05349731 -0.01312256 -0.00432587 -0.03579712]


In [38]:
#similar_measure = np.dot(embedding_x['embedding'], embedding_close_to_x['embedding'])
similar_measure = np.dot(embedding_x, embedding_close_to_x)
print(similar_measure)

0.7722747528326863


#### Similarity

- Calculate the similarity between two sentences as a number between 0 and 1.
- Try out your own sentences and check if the similarity calculations match your intuition.


In [39]:
"""
emb_1 = embedding_model.get_embeddings(
    ["What is the meaning of life?"]) # 42!

emb_2 = embedding_model.get_embeddings(
    ["How does one spend their time well on Earth?"])

emb_3 = embedding_model.get_embeddings(
    ["Would you like a salad?"])

vec_1 = [emb_1[0].values]
vec_2 = [emb_2[0].values]
vec_3 = [emb_3[0].values]

emb_1 = palm.generate_embeddings(model=model, text=["What is the meaning of life?"])
emb_2 = palm.generate_embeddings(model=model, text=["How does one spend their time well on Earth?"])
emb_3 = palm.generate_embeddings(model=model, text=["Would you like a salad?"])

vec_1 = emb_1['embedding']
vec_2 = emb_2['embedding']
vec_3 = emb_3['embedding']
"""

text1="What is the meaning of life?"
text2="How does one spend their time well on Earth?"
text3="Would you like a salad?"

vec_1 = get_text_embedding(text1)
vec_2 = get_text_embedding(text2)
vec_3 = get_text_embedding(text3)
print(f"Length = {len(vec_1)}")


Length = 1024


In [40]:
"""
print(cosine_similarity(vec_1, vec_2))
print(cosine_similarity(vec_2, vec_3))
print(cosine_similarity(vec_1, vec_3])
"""
print(np.dot(vec_1, vec_2))
print(np.dot(vec_2, vec_3))
print(np.dot(vec_1, vec_3))


0.7439245758782818
0.6564006967482783
0.6934095253642454
