In [1]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, LLMPredictor, ServiceContext, StorageContext, load_index_from_storage
import os
from langchain import HuggingFaceHub
import sys
import logging
import torch

sys.path.append('../')
from apikeys import huggingface_key
from huggingface_hub import InferenceClient
import tqdm as notebook_tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [3]:
os.environ['HUGGINGFACEHUB_API_TOKEN'] = huggingface_key

In [4]:
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")

In [5]:
def format_prompt(message, history):
    prompt = "<s>"
    for user_prompt, bot_response in history:
        prompt += f"[INST] {user_prompt} [/INST]"
        prompt += f" {bot_response}</s> "
        prompt += f"[INST] {message} [/INST]"
    return prompt

In [6]:
def format_prompt2(message):
    prompt = "<s>"
    prompt += f"[INST] {message} [/INST]"
    prompt += f" </s> "
        #prompt += f"[INST] {message} [/INST]"
    return prompt

In [7]:
def generate(
    prompt, history, temperature=0.2, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    formatted_prompt = format_prompt2(prompt)
    print(f"prompt: {formatted_prompt}")

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output

In [29]:
for line in generate("how sleep should a newborn get?", [("","")]):
    continue

prompt: <s>[INST] how sleep should a newborn get? [/INST] </s> 
DEBUG:urllib3.connectionpool:https://api-inference.huggingface.co:443 "POST /models/mistralai/Mixtral-8x7B-Instruct-v0.1 HTTP/1.1" 200 None
https://api-inference.huggingface.co:443 "POST /models/mistralai/Mixtral-8x7B-Instruct-v0.1 HTTP/1.1" 200 None


In [30]:
print(line)


Newborns generally require a lot of sleep to support their rapid growth and development. On average, newborns (0-3 months old) need about 14-16 hours of sleep per day. However, it's important to note that newborns have not yet established a regular sleep-wake cycle, so they may sleep for short periods of time throughout the day and night.

During the first few weeks of life, newborns may only sleep for 1-2 hours at a time before waking up to feed. As they grow and develop, their sleep patterns will become more predictable and they may start to sleep for longer stretches at night.

It's also important to note that newborns should be put to sleep on their backs on a firm, flat surface to reduce the risk of sudden infant death syndrome (SIDS). Parents and caregivers should also avoid letting newborns sleep for long periods of time in car seats, strollers, or other devices, as this can increase the risk of SIDS and other sleep-related causes of infant death.

If you have concerns about yo

In [10]:
from llama_index.prompts import PromptTemplate

template = (
    "<s> [INST] We have provided context information below. If information does not provide enough information to ask the question respond with not enough context\n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, please answer the question: {query_str}\n [/INST] </s> "
)
qa_template = PromptTemplate(template)

In [11]:
llm = LLMPredictor(llm=HuggingFaceHub(repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",verbose=True))
service_context = ServiceContext.from_defaults(
    llm_predictor=llm, embed_model="local:BAAI/bge-small-en-v1.5"
)

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
Starting new HTTPS connection (1): huggingface.co:443




DEBUG:urllib3.connectionpool:https://huggingface.co:443 "GET /api/models/mistralai/Mixtral-8x7B-Instruct-v0.1 HTTP/1.1" 200 4994
https://huggingface.co:443 "GET /api/models/mistralai/Mixtral-8x7B-Instruct-v0.1 HTTP/1.1" 200 4994
LLMPredictor is deprecated, please use LLM instead.
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
https://huggingface.co:443 "HEAD /BAAI/bge-small-en-v1.5/resolve/main/tokenizer_config.json HTTP/1.1" 200 0


In [12]:
# load the documents and create the index
documents = SimpleDirectoryReader("../data").load_data()
index = VectorStoreIndex.from_documents(documents,service_context=service_context)

DEBUG:llama_index.readers.file.base:> [SimpleDirectoryReader] Total files added: 4
> [SimpleDirectoryReader] Total files added: 4
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Caring for you and your babyA Guide for New Par...
> Adding chunk: Caring for you and your babyA Guide for New Par...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Now that your baby has arrived, we have more in...
> Adding chunk: Now that your baby has arrived, we have more in...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Taking Care of Yourself
Activity and Rest
Givin...
> Adding chunk: Taking Care of Yourself
Activity and Rest
Givin...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: Constipation
You may be constipated the first f...
> Adding chunk: Constipation
You may be constipated the first f...
DEBUG:llama_index.node_parser.node_utils:> Adding chunk: 5• Anytime for comfort, relaxation and enjoymen...
> Adding chunk: 5• Anytime for comfort, relaxation and enjoyme

In [13]:
from llama_index.llms import ChatMessage, MessageRole
from llama_index.prompts import ChatPromptTemplate

# Text QA Prompt
chat_text_qa_msgs = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content=(
            "Always answer the question, even if the context isn't helpful."
        ),
    ),
    ChatMessage(
        role=MessageRole.USER,
        content=(
            "Context information is below.\n"
            "---------------------\n"
            "{context_str}\n"
            "---------------------\n"
            "Given the context information and not prior knowledge, "
            "answer the question: {query_str}\n"
        ),
    ),
]
text_qa_template = ChatPromptTemplate(chat_text_qa_msgs)

# Refine Prompt
chat_refine_msgs = [
    ChatMessage(
        role=MessageRole.SYSTEM,
        content=(
            "Always answer the question, even if the context isn't helpful."
        ),
    ),
    ChatMessage(
        role=MessageRole.USER,
        content=(
            "We have the opportunity to refine the original answer "
            "(only if needed) with some more context below.\n"
            "------------\n"
            "{context_msg}\n"
            "------------\n"
            "Given the new context, refine the original answer to better "
            "answer the question: {query_str}. "
            "If the context isn't useful, output the original answer again.\n"
            "Original Answer: {existing_answer}"
        ),
    ),
]
refine_template = ChatPromptTemplate(chat_refine_msgs)

In [31]:

print(
    index.as_query_engine(
        text_qa_template=text_qa_template, refine_template=refine_template
    ).query("how much sleep should a newborn get?")
)
#query_engine = index.as_query_engine()
#response = query_engine.query("how much should a one month old drink?")
#print(response)

DEBUG:llama_index.indices.utils:> Top 2 nodes:
> [Node 89969ce7-0d29-49a9-94fe-9b4be44cef23] [Similarity score:             0.831066] SLEEPY TIME ADVICE FOR PARENTS  
Put  your  baby  to  sleep  on  his  back;  advise  your  relati...
> [Node 50f5df64-8fe5-4c1c-98a8-c4d3f73d0a42] [Similarity score:             0.786154] will be fantastic and they will sleep really well. That helps us to keep 3 hours between supper a...
> Top 2 nodes:
> [Node 89969ce7-0d29-49a9-94fe-9b4be44cef23] [Similarity score:             0.831066] SLEEPY TIME ADVICE FOR PARENTS  
Put  your  baby  to  sleep  on  his  back;  advise  your  relati...
> [Node 50f5df64-8fe5-4c1c-98a8-c4d3f73d0a42] [Similarity score:             0.786154] will be fantastic and they will sleep really well. That helps us to keep 3 hours between supper a...
DEBUG:urllib3.connectionpool:https://api-inference.huggingface.co:443 "POST /pipeline/text-generation/mistralai/Mixtral-8x7B-Instruct-v0.1 HTTP/1.1" 200 None
https://api-inference.huggin

In [32]:
query_engine = index.as_query_engine()
response = query_engine.query("how much sleep should a newborn get?")
print(response)

DEBUG:llama_index.indices.utils:> Top 2 nodes:
> [Node 89969ce7-0d29-49a9-94fe-9b4be44cef23] [Similarity score:             0.831066] SLEEPY TIME ADVICE FOR PARENTS  
Put  your  baby  to  sleep  on  his  back;  advise  your  relati...
> [Node 50f5df64-8fe5-4c1c-98a8-c4d3f73d0a42] [Similarity score:             0.786154] will be fantastic and they will sleep really well. That helps us to keep 3 hours between supper a...
> Top 2 nodes:
> [Node 89969ce7-0d29-49a9-94fe-9b4be44cef23] [Similarity score:             0.831066] SLEEPY TIME ADVICE FOR PARENTS  
Put  your  baby  to  sleep  on  his  back;  advise  your  relati...
> [Node 50f5df64-8fe5-4c1c-98a8-c4d3f73d0a42] [Similarity score:             0.786154] will be fantastic and they will sleep really well. That helps us to keep 3 hours between supper a...
DEBUG:urllib3.connectionpool:https://api-inference.huggingface.co:443 "POST /pipeline/text-generation/mistralai/Mixtral-8x7B-Instruct-v0.1 HTTP/1.1" 200 None
https://api-inference.huggin