In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain_chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings, OllamaEmbeddings
from langchain.document_loaders import TextLoader
from langchain.schema import Document, Generation, LLMResult
from langchain.llms import Ollama, BaseLLM
from langchain.chains import StuffDocumentsChain, RetrievalQA, LLMChain
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import OpenAI
import json
import requests

class LocalOllamaLLM(BaseLLM):
    api_url : str
    def _generate(self, prompt, stop):
        print(f"_generate : {prompt} ; {type(prompt)}")
        response = requests.post(f"{self.api_url}/api/generate", json={"model": "mistral-large", "prompt": str(prompt) })
        response.raise_for_status()
        response_text=''.join([json.loads(line)['response'] for line in response.text.splitlines()])
        generations=[]
        generations.append([Generation(text=response_text)])
        return LLMResult(generations=generations)


    def _llm_type(self):
        return "local"  # Or whatever type is appropriate for your local setup


In [None]:
embedder = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
vector_store = Chroma(embedding_function=embedder)

# Create some sample documents
documents = [
    Document(page_content="This is the first document.", metadata={"label": "doc1"}),
    Document(page_content="This is the second document.", metadata={"label": "doc2"}),
    Document(page_content="This is the third document.", metadata={"label": "doc3"}),
    Document(page_content="The Sun is the star at the center of the Solar System. It is a massive, nearly perfect sphere of hot plasma, heated to incandescence by nuclear fusion reactions in its core, radiating the energy from its surface mainly as visible light and infrared radiation with 10% at ultraviolet energies. It is by far the most important source of energy for life on Earth. The Sun has been an object of veneration in many cultures. It has been a central subject for astronomical research since antiquity.", metadata={"label": "doc4"}),
             
]

# Add documents to the vector store
vector_store.add_documents(documents)

In [None]:
llm = LocalOllamaLLM(api_url="http://127.0.0.1:11434")

In [None]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

retriever= vector_store.as_retriever()


system_prompt = (
    "Use the given context to answer the question. "
    "If you don't know the answer, say you don't know. "
    "Use three sentence maximum and keep the answer concise. "
    "Context: {context}"
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(retriever, question_answer_chain)
query= "what is the sun?"
chain.invoke({"input": query})

## Simple example with requests

In [None]:
import requests
import json

# URL to which the request is to be sent
url = 'http://localhost:11434/api/generate'

# Data to be sent in the POST request
data = {
    "model": "mistral-large",
    "prompt": """
    ["System: Use the given context to answer the question. If you don't know the answer, say you don't know. Use three sentence maximum and keep the answer concise. Context: This is the first document.\n\nThis is the second document.\n\nThis is the third document.\nHuman: what is the sun?"]
    """
}

# Sending the POST request
response = requests.post(url, json=data)

# Printing the response
print(response.status_code)  # HTTP status code
print(response.text)
complete_response = ''.join([json.loads(line)['response'] for line in response.text.splitlines()])

# Print the complete response
print(complete_response)

## EXPERIMENTAL