In [26]:
from mistralai import Mistral
import requests
import numpy as np
import os
from getpass import getpass
import pandas as pd
from sentence_transformers import SentenceTransformer
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [None]:
api_key = os.getenv("MISTRAL_API_KEY")
client = Mistral(api_key=api_key)

In [None]:
path = "./amazon.csv"
df = pd.read_csv(path)

In [41]:
df.head()

Unnamed: 0,product_id,product_name,category,discounted_price,actual_price,discount_percentage,rating,rating_count,about_product,user_id,user_name,review_id,review_title,review_content,img_link,product_link,stock
0,B07JW9H4J1,Wayona Nylon Braided USB to Lightning Fast Cha...,Computers&Accessories|Accessories&Peripherals|...,₹399,"₹1,099",64%,4.2,24269,High Compatibility : Compatible With iPhone 12...,"AG3D6O4STAQKAY2UVGEUV46KN35Q,AHMY5CWJMMK5BJRBB...","Manav,Adarsh gupta,Sundeep,S.Sayeed Ahmed,jasp...","R3HXWT0LRP0NMF,R2AJM3LFTLZHFO,R6AQJGUP6P86,R1K...","Satisfied,Charging is really fast,Value for mo...",Looks durable Charging is fine tooNo complains...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Wayona-Braided-WN3LG1-Sy...,90
1,B098NS6PVG,Ambrane Unbreakable 60W / 3A Fast Charging 1.5...,Computers&Accessories|Accessories&Peripherals|...,₹199,₹349,43%,4.0,43994,"Compatible with all Type C enabled devices, be...","AECPFYFQVRUWC3KGNLJIOREFP5LQ,AGYYVPDD7YG7FYNBX...","ArdKn,Nirbhay kumar,Sagar Viswanathan,Asp,Plac...","RGIQEG07R9HS2,R1SMWZQ86XIN8U,R2J3Y1WL29GWDE,RY...","A Good Braided Cable for Your Type C Device,Go...",I ordered this cable to connect my phone to An...,https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Ambrane-Unbreakable-Char...,93
2,B096MSW6CT,Sounce Fast Phone Charging Cable & Data Sync U...,Computers&Accessories|Accessories&Peripherals|...,₹199,"₹1,899",90%,3.9,7928,【 Fast Charger& Data Sync】-With built-in safet...,"AGU3BBQ2V2DDAMOAKGFAWDDQ6QHA,AESFLDV2PT363T2AQ...","Kunal,Himanshu,viswanath,sai niharka,saqib mal...","R3J3EQQ9TZI5ZJ,R3E7WBGK7ID0KV,RWU79XKQ6I1QF,R2...","Good speed for earlier versions,Good Product,W...","Not quite durable and sturdy,https://m.media-a...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Sounce-iPhone-Charging-C...,120
3,B08HDJ86NZ,boAt Deuce USB 300 2 in 1 Type-C & Micro USB S...,Computers&Accessories|Accessories&Peripherals|...,₹329,₹699,53%,4.2,94363,The boAt Deuce USB 300 2 in 1 cable is compati...,"AEWAZDZZJLQUYVOVGBEUKSLXHQ5A,AG5HTSFRRE6NL3M5S...","Omkar dhale,JD,HEMALATHA,Ajwadh a.,amar singh ...","R3EEUZKKK9J36I,R3HJVYCLYOY554,REDECAZ7AMPQC,R1...","Good product,Good one,Nice,Really nice product...","Good product,long wire,Charges good,Nice,I bou...",https://m.media-amazon.com/images/I/41V5FtEWPk...,https://www.amazon.in/Deuce-300-Resistant-Tang...,93
4,B08CF3B7N1,Portronics Konnect L 1.2M Fast Charging 3A 8 P...,Computers&Accessories|Accessories&Peripherals|...,₹154,₹399,61%,4.2,16905,[CHARGE & SYNC FUNCTION]- This cable comes wit...,"AE3Q6KSUK5P75D5HFYHCRAOLODSA,AFUGIFH5ZAFXRDSZH...","rahuls6099,Swasat Borah,Ajay Wadke,Pranali,RVK...","R1BP4L2HH9TFUP,R16PVJEXKV6QZS,R2UPDB81N66T4P,R...","As good as original,Decent,Good one for second...","Bought this instead of original apple, does th...",https://m.media-amazon.com/images/W/WEBP_40237...,https://www.amazon.in/Portronics-Konnect-POR-1...,109


In [None]:
useless_features = ['Full_Text', 'user_id', 'user_name', 'review_id', 'img_link', 'product_link', 'product_id']

df['Full_Text'] = df.apply(lambda row: ' '.join([f"{col}: {row[col]}" for col in df.columns if col not in useless_features]), axis=1)

Index(['product_id', 'product_name', 'category', 'discounted_price',
       'actual_price', 'discount_percentage', 'rating', 'rating_count',
       'about_product', 'user_id', 'user_name', 'review_id', 'review_title',
       'review_content', 'img_link', 'product_link', 'stock', 'Full_Text'],
      dtype='object')


In [None]:
# def get_text_embedding(input):
#     embeddings_batch_response = client.embeddings.create(
#           model="mistral-embed",
#           inputs=input
#       )
#     return embeddings_batch_response.data[0].embedding

def get_embedding(text, model_name="all-MiniLM-L6-v2"):
    try:
        model = SentenceTransformer(model_name)
        return model.encode(text)
    except Exception as e:
        return None
    
def preprocess_text(text):
    # Lowercasing
    text = text.lower()
    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)
    # Tokenization
    words = text.split()
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)

In [59]:
product_test_name = df["product_name"].values[2]
product_test_price = df["actual_price"].values[2]

product_test_embedding = text_embeddings["embedding"].values[2]

In [98]:
def cosine_similarity(vector_a, vector_b):
    norm_a = np.linalg.norm(vector_a)
    norm_b = np.linalg.norm(vector_b)
    if norm_a == 0 or norm_b == 0:
        raise ValueError()
    return np.dot(vector_a, vector_b)[0] / (norm_a * norm_b)

In [None]:
chunks = df["Full_Text"].values

def generate_chunks(question, text_embeddings, text_strings):
    chunks = [preprocess_text(chunk) for chunk in chunks]

    text_embeddings = pd.DataFrame([
        {"embedding": get_embedding(chunk), "text": chunk}
        for chunk in chunks[:5]
    ])
    k = 5
    question_embedding = np.array([get_embedding(preprocess_text(question))])
    similarities = np.array([cosine_similarity(question_embedding, vector) for vector in text_embeddings['embedding']])
    idx_sorted_similarities = np.argsort(similarities)[-k:][::-1]
    retrieved_chunks = text_strings[idx_sorted_similarities]
    return retrieved_chunks

In [None]:
question_1 = "I am looking for " + product_test_name
question_2 = "What is the actual price ?"

(1, 384)


In [92]:
def run_mistral(user_message, model="mistral-large-latest"):
    messages = [
        {
            "role": "user", "content": user_message
        }
    ]
    chat_response = client.chat.complete(
        model=model,
        messages=messages
    )
    return (chat_response.choices[0].message.content)

In [93]:
def next_query(discussion, question, retrieved_chunk):
    prompt = f"""
                Context information is below.
                ---------------------
                {[preprocess_text(chunk) for chunk in retrieved_chunk]}
                ---------------------
                Prior discussion is below.
                ---------------------
                {discussion}
                ---------------------
                Given the context information and the prior discussion, answer the query.
                Query: {question}
                Behave like a retail chat bot.
                Be as consise as possible.
                Answer:
                """
    print(prompt)
    answer = run_mistral(prompt)
    curr_discussion = discussion + "\n" + question + "\n" + answer
    return answer, curr_discussion

    

In [94]:
query_1, discussion_2 = next_query("What are you looking for ?", question_1, retrieved_chunks_1)
print("Guessed = ", query_1)
print("Real =",product_test_name)


                Context information is below.
                ---------------------
                ['product_name portronics konnect l 12m fast charging 3a 8 pin usb cable charge sync function iphone ipad grey category computersaccessoriesaccessoriesperipheralscablesaccessoriescablesusbcables discounted_price 154 actual_price 399 discount_percentage 61 rating 42 rating_count 16905 about_product charge sync function cable come charging data sync functionhigh quality material tpe nylon material make sure life cable enhanced significantlylong cord cable extra thick 12 meter long optimized easy use comfort home officemore durablethis cable unique interms design multiuse positioned provide best comfort performance usinguniversal compatibility compatible device like iphone x x xr 8 7 6 6 5 ipad pro ipad mini ipad air review_title good originaldecentgood one secondary usebest qualitygoodamazing product mind blowing pricenice qualitygood product review_content bought instead original apple w

In [None]:
query_2, discussion_3 = next_query(discussion_2, question_2, retrieved_chunks_2)
print("Guessed = ", query_2)
print("Real =", product_test_price)


                Context information is below.
                ---------------------
                ['product_name sounce fast phone charging cable data sync usb cable compatible iphone 13 1211 x 8 7 6 5 ipad air pro mini io device category computersaccessoriesaccessoriesperipheralscablesaccessoriescablesusbcables discounted_price 199 actual_price 1899 discount_percentage 90 rating 39 rating_count 7928 about_product fast charger data syncwith builtin safety proctections fourcore copper wire promote maximum signal quality strength enhance charging data transfer speed 480 mb transferring speed compatibilitycompatible iphone 13 1211 x 8 7 6 5 ipad air pro mini io device sturdy durablethe jacket enforced connector made tpe premium copper resistant repeatedly bending coiling ultra high quality according experimental result fishbone design accept least 20000 bending insertion test extra protection durability upgraded 3d aluminum connector exclusive laser welding technology ensure metal part

In [97]:
query_3, discussion_4 = next_query(discussion_3, "I want to buy an Iphone", retrieved_chunks_2)
print("Guessed = ", query_3)



                Context information is below.
                ---------------------
                ['product_name sounce fast phone charging cable data sync usb cable compatible iphone 13 1211 x 8 7 6 5 ipad air pro mini io device category computersaccessoriesaccessoriesperipheralscablesaccessoriescablesusbcables discounted_price 199 actual_price 1899 discount_percentage 90 rating 39 rating_count 7928 about_product fast charger data syncwith builtin safety proctections fourcore copper wire promote maximum signal quality strength enhance charging data transfer speed 480 mb transferring speed compatibilitycompatible iphone 13 1211 x 8 7 6 5 ipad air pro mini io device sturdy durablethe jacket enforced connector made tpe premium copper resistant repeatedly bending coiling ultra high quality according experimental result fishbone design accept least 20000 bending insertion test extra protection durability upgraded 3d aluminum connector exclusive laser welding technology ensure metal part

In [25]:
prompt1 = "The mystery number is 432"
prompt2 = "What is the mystery number ?"

print(run_mistral(prompt1))
print("+++++++++++++++++++++++++++++++++++++++++++++++++++++")
print(run_mistral(prompt2))

Alright, let's play a game! I'll try to guess the mystery number based on your clues. Since you've already told me the mystery number is 432, I'll use this information to provide clues or answer questions as if I were trying to guess it.

Here are some initial clues that describe the number 432:

1. It is a three-digit number.
2. It is an even number.
3. It is divisible by 2, 3, and 4.
4. The sum of its digits is 9.
5. It is greater than 400 but less than 500.

Now, you can ask me questions or provide more clues, and I'll try to guess the number based on that information. Let's have fun!
+++++++++++++++++++++++++++++++++++++++++++++++++++++
To I I To I toalousal to'ically to' to' to' to to to to it toally, to to to toously,ousousously,ically,ousized withated toously,ously,ated theously,ouslyously,ically, the "mystery number" could be anything without more context. Here are a few possibilities:

1. **A riddle**: If it's a riddle, the mystery number could be the answer. For example, if t

## LangChain

In [None]:
!pip install langchain langchain-mistralai langchain_community mistralai==0.4.2

Collecting mistralai==0.4.2
  Downloading mistralai-0.4.2-py3-none-any.whl (20 kB)
Installing collected packages: mistralai
  Attempting uninstall: mistralai
    Found existing installation: mistralai 0.5.5a20
    Uninstalling mistralai-0.5.5a20:
      Successfully uninstalled mistralai-0.5.5a20
Successfully installed mistralai-0.4.2


In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain

# Load data
loader = TextLoader("essay.txt")
docs = loader.load()
# Split text into chunks
text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)
# Define the embedding model
embeddings = MistralAIEmbeddings(model="mistral-embed", mistral_api_key=api_key)
# Create the vector store
vector = FAISS.from_documents(documents, embeddings)
# Define a retriever interface
retriever = vector.as_retriever()
# Define LLM
model = ChatMistralAI(mistral_api_key=api_key)
# Define prompt template
prompt = ChatPromptTemplate.from_template("""Answer the following question based only on the provided context:

<context>
{context}
</context>

Question: {input}""")

# Create a retrieval chain to answer questions
document_chain = create_stuff_documents_chain(model, prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)
response = retrieval_chain.invoke({"input": "What were the two main things the author worked on before college?"})
print(response["answer"])

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

The two main things the author worked on before college were writing and programming. He wrote short stories and tried writing programs on an IBM 1401 using Fortran. His stories were awful and he couldn't figure out what to do with the 1401 due to the lack of input data. His programming skills improved with the advent of microcomputers, which allowed him to write simple games, a program to predict how high his model rockets would fly, and a word processor.


## LlamaIndex

In [None]:
!pip install llama-index==0.10.55 llama-index-llms-mistralai==0.1.18 llama-index-embeddings-mistralai mistralai==0.4.2



In [None]:
import os
from llama_index.core import Settings, SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.mistralai import MistralAI
from llama_index.embeddings.mistralai import MistralAIEmbedding

# Load data
reader = SimpleDirectoryReader(input_files=["essay.txt"])
documents = reader.load_data()
# Define LLM and embedding model
Settings.llm = MistralAI(model="mistral-medium", api_key=api_key)
Settings.embed_model = MistralAIEmbedding(model_name='mistral-embed', api_key=api_key)
# Create vector store index
index = VectorStoreIndex.from_documents(documents)
# Create query engine
query_engine = index.as_query_engine(similarity_top_k=2)
response = query_engine.query(
    "What were the two main things the author worked on before college?"
)
print(str(response))

The two main things the author worked on before college, outside of school, were writing and programming. They wrote short stories and tried writing programs on an IBM 1401 in 9th grade.


# Haystack

In [None]:
!pip install mistral-haystack==0.0.1 mistralai==0.4.2

Collecting mistral-haystack==0.0.1
  Downloading mistral_haystack-0.0.1-py3-none-any.whl (11 kB)
Collecting haystack-ai>=2.0.0b6 (from mistral-haystack==0.0.1)
  Downloading haystack_ai-2.3.0-py3-none-any.whl (349 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m349.9/349.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Collecting haystack-experimental (from haystack-ai>=2.0.0b6->mistral-haystack==0.0.1)
  Downloading haystack_experimental-0.1.0-py3-none-any.whl (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.2/40.2 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting lazy-imports (from haystack-ai>=2.0.0b6->mistral-haystack==0.0.1)
  Downloading lazy_imports-0.3.1-py3-none-any.whl (12 kB)
Collecting posthog (from haystack-ai>=2.0.0b6->mistral-haystack==0.0.1)
  Downloading posthog-3.5.0-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Col

In [None]:
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.dataclasses import ChatMessage
from haystack.utils.auth import Secret

from haystack.components.builders import DynamicChatPromptBuilder
from haystack.components.converters import TextFileToDocument
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.writers import DocumentWriter
from haystack_integrations.components.embedders.mistral import MistralDocumentEmbedder, MistralTextEmbedder
from haystack_integrations.components.generators.mistral import MistralChatGenerator

document_store = InMemoryDocumentStore()

docs = TextFileToDocument().run(sources=["essay.txt"])
split_docs = DocumentSplitter(split_by="passage", split_length=2).run(documents=docs["documents"])
embeddings = MistralDocumentEmbedder(api_key=Secret.from_token(api_key)).run(documents=split_docs["documents"])
DocumentWriter(document_store=document_store).run(documents=embeddings["documents"])


text_embedder = MistralTextEmbedder(api_key=Secret.from_token(api_key))
retriever = InMemoryEmbeddingRetriever(document_store=document_store)
prompt_builder = DynamicChatPromptBuilder(runtime_variables=["documents"])
llm = MistralChatGenerator(api_key=Secret.from_token(api_key),
                           model='mistral-small')

chat_template = """Answer the following question based on the contents of the documents.\n
                Question: {{query}}\n
                Documents:
                {% for document in documents %}
                    {{document.content}}
                {% endfor%}
                """
messages = [ChatMessage.from_user(chat_template)]

rag_pipeline = Pipeline()
rag_pipeline.add_component("text_embedder", text_embedder)
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", llm)


rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
rag_pipeline.connect("retriever.documents", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder.prompt", "llm.messages")

question = "What were the two main things the author worked on before college?"

result = rag_pipeline.run(
    {
        "text_embedder": {"text": question},
        "prompt_builder": {"template_variables": {"query": question}, "prompt_source": messages},
        "llm": {"generation_kwargs": {"max_tokens": 225}},
    }
)

print(result["llm"]["replies"][0].content)

Calculating embeddings: 100%|██████████| 3/3 [00:08<00:00,  2.96s/it]


Based on the documents provided, the two main things the author worked on before college were writing and programming. The author specifically mentions writing short stories outside of school and programming as two main areas of focus.
