In [41]:
import os
import warnings
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core.llms import CustomLLM, CompletionResponse, CompletionResponseGen, LLMMetadata, LLM
from llama_index.core.llms.callbacks import llm_completion_callback
from huggingface_hub import InferenceClient
from snowflake.snowpark import Session
from snowflake.cortex import Complete
from snowflake.cortex import CompleteOptions
from dotenv import load_dotenv
from typing import Any, Union, List

In [42]:
warnings.filterwarnings("ignore")
load_dotenv("../.env")

connection_params = {
    "account": os.environ["SNOWFLAKE_ACCOUNT"],
    "user": os.environ["SNOWFLAKE_USER"],
    "password": os.environ["SNOWFLAKE_USER_PASSWORD"],
}

In [43]:
snowflake_session = Session.builder.configs(connection_params).create()

In [None]:
def complete(user_text: str):
    completion = Complete(
        model="mistral-large2", #changes here!
        prompt=user_text,
        session=snowflake_session,
        options=CompleteOptions
    )
    return completion

In [33]:
class RagoonBot(CustomLLM):
    context_window: int = 3900
    num_output: int = 256
    model_name: str = "mistral-large2"

    @property
    def metadata(self) -> LLMMetadata:
        """Get LLM metadata."""
        return LLMMetadata(
            context_window=self.context_window,
            num_output=self.num_output,
            model_name=self.model_name,
        )

    @llm_completion_callback()
    def complete(self, prompt: str | List[str],
                 **kwargs: Any) -> CompletionResponse:
        response = complete(prompt)
        return CompletionResponse(text=response)

    @llm_completion_callback()
    def stream_complete(
        self, prompt: str, **kwargs: Any
    ) -> CompletionResponseGen:
        # In streaming mode, we'll still receive the full response at the end of generate.
        # To truly stream token by token, you'd need to yield from within the generate function itself.
        # Here we simulate token-level streaming by splitting the final response.
        full_response = complete(prompt)

        accumulated_text = ""
        for char in full_response:
            accumulated_text += char
            yield CompletionResponse(text=accumulated_text, delta=char)

In [34]:
llm = RagoonBot()

In [37]:
llm.complete("Hello")

CompletionResponse(text=" Hello! How can I assist you today? Let's have a friendly conversation. How are you doing?", additional_kwargs={}, raw=None, logprobs=None, delta=None)

In [10]:
class HyDETransformer(HyDEQueryTransform):
    def __init__(self, 
                 llm: LLM,
                 hyde_prompt: str = None,
                 include_original: bool = True):
        """
        Initializes the Hypothetical Document Embeddings 

        :param llm: str, default None. The LLM model to use.
        :param hyde_prompt: str, default None. The prompt to use for the HyDE model.
        :param include_original: bool, default True. Whether to include the original text in the output.
        """
        super().__init__(
            llm=llm,
            hyde_prompt=hyde_prompt,
            include_original=include_original
        )
        

    def transform(
        self,
        text: str = None
    ):
        """
        Transforms the input text into hypothetical document embeddings.

        :param text: str. The text to transform.
        :return: str. The transformed text.
        """
        if text is None:
            return "Please provide a text to transform."
        
        response = self.run(text)
        return response.custom_embedding_strs

In [11]:
text = "What are the effects of schizophrenia on memory?"
transformer = HyDETransformer(
    llm=llm
)
queries = transformer.transform(text)
for query in queries:
    print(query)

 Schizophrenia, a complex mental health disorder, significantly impacts various cognitive functions, including memory. One of the most pronounced effects is on working memory, the system that actively holds and manipulates information over short periods. Individuals with schizophrenia often experience deficits in working memory, which can manifest as difficulties in recalling recent events, following conversations, or performing tasks that require holding information in mind temporarily.

Long-term memory is also affected, with patients frequently reporting problems in both episodic memory (recollection of personal experiences and events) and semantic memory (knowledge about the world). These impairments can lead to challenges in daily life, such as forgetting important appointments, misplacing items, or struggling to remember previously learned information.

Additionally, schizophrenia can disrupt procedural memory, the type of memory involved in learning and performing automatic task