<a href="https://colab.research.google.com/github/ebamberg/research-projects-ml/blob/main/vector_embeddings/examples_recommender_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
!pip install ollama langchain_community --quiet
!pip install openai --quiet
!pip install faiss-cpu --quiet

host="localhost:11434"
modelid="chevalblanc/gpt-4o-mini"
embedding_model = "mxbai-embed-large"

get_ipython().system_raw("curl -fsSL https://ollama.com/install.sh | sh")
get_ipython().system_raw("ollama serve &")
get_ipython().system_raw(f"ollama pull {modelid}")



In [9]:
get_ipython().system_raw(f"ollama pull {modelid}")
get_ipython().system_raw(f"ollama pull {embedding_model}")


In [10]:
from openai import OpenAI


llm = OpenAI(
        base_url=f"http://{host}/v1",
        api_key="ollama",  # required, but unused
    )




In [11]:
def embed(text: str) -> list[float]:
  return llm.embeddings.create(input = [text], model=embedding_model).data[0].embedding

def createEmbeddings(texts: list[str]) -> list[list[float]]:
  return [embed(txt) for txt in texts]


In [12]:
import faiss
import numpy as np

# Sample sentences
sentences = [
    "Artificial Intelligence is the future.",
    "AI requires large datasets to train models.",
    "Machines learn by analyzing data."
]

# Convert to embeddings
embeddings = createEmbeddings(sentences)

# Store in FAISS index for similarity search
dimension = len(embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(np.array(embeddings))

In [13]:
# Query the most similar sentence
query = embed("AI evolves with data.")
D, I = index.search(np.array([query]), 1)
print(f"The most similar sentence is: {sentences[I[0][0]]}")

The most similar sentence is: AI requires large datasets to train models.


In [14]:
from pydantic import BaseModel

def call(system_prompt: str, message: str, output_schema: BaseModel | None = None , model: str = modelid) -> str:

  messages=[
          {
              "role": "user",
              "content": message,
          },]
  completion = llm.chat.completions.parse(
      model=modelid,
      messages=[ {"role": "system", "content": system_prompt},]
      +messages,
      temperature=0.0,
   #   max_tokens=4096,
      response_format=output_schema
  )

  return completion.choices[0].message.parsed

NameError: name 'BaseModel' is not defined