In [13]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
# transformers
from transformers import AutoModelForMaskedLM, AutoTokenizer
from transformers import pipeline
# langchain-huggingface
from langchain_huggingface.llms import HuggingFacePipeline

import torch

# webscrappers
import requests
import wikipedia


import os
os.environ["HUGGINGFACEHUB_API_TOKEN"]="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"
repo_id_mistral = "mistralai/Mistral-7B-Instruct-v0.3"


#Text wrapper
import textwrap
def print_text(llm_response):
    wrapper=textwrap.TextWrapper(width=70)
    string=wrapper.fill(text=llm_response)
    return print(string)

In [14]:
# text data
topic = "Oscillating water column"
wiki_article=wikipedia.WikipediaPage(topic).content

## Instantiate model form huggingface
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name="allenai/scibert_scivocab_uncased"
tokenizer=AutoTokenizer.from_pretrained(model_name)
model=AutoModelForMaskedLM.from_pretrained(model_name)

## Create a pipeline
pipe = pipeline("text-generation", model=model,tokenizer=tokenizer,
                max_new_tokens=128)
## Create the llm
llm=HuggingFacePipeline(pipeline=pipe)

# Construct the template for prompt


template= """ Follow these instructions {instructions} to give the answer:
\n\nContext:{context}

Question: {query}

\nAnswer: 
"""

prompt_template=PromptTemplate(input_variables=['query',"context", "instructions"],
                      template=template)





# define the chain with two components: prompt_template, llm

llm_chain= prompt_template | llm


The model 'BertForMaskedLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'Mu

In [10]:
# Define instructions and context from wikipedia article
instruction="""Instruction: Use your scientific and technical knowldege. Think like a mechanical engineer while giving the answer. If you do not 
know the facts, do not make assumptions and avoid hallucinations and just say your knowldge is limited. Just use the context provided to you and your scientific and technical knowledge."""

## Query from user
user_query="""Pakistan has a huge potential of installing OCWs at its ocean shores?
 If so which type of such plant is feasible for Pakistan considering its geological and environmental concerns?"""


# format the template based on input variables (context and query) to make a prompt

prompt = prompt_template.format(
    instructions=instruction,
    query=user_query,
    context=wiki_article
)


max_tokens = 512 - len(user_query) - len(instruction)
truncated_context = wiki_article[:max_tokens]
# get response from the model
response = llm_chain.invoke({
    "instructions": instruction,
    "query": user_query,
    "context": truncated_context
})
print_text(response)

 Follow these instructions Instruction: Use your scientific and
technical knowldege. Think like a mechanical engineer while giving the
answer. If you do not  know the facts, do not make assumptions and
avoid hallucinations and just say your knowldge is limited. Just use
the context provided to you and your scientific and technical
knowledge. to give the answer:   Context:Oscillating wat  Question:
Pakistan has a huge potential of installing OCWs at its ocean shores?
If so which type of such plant is feasible for Pakistan considering
its geological and environmental concerns?   Answer:   : : : : : : : :
: : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : :
: : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : :
: : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : : :
: : : : : : : : : : : : : : :


In [4]:
llm=HuggingFaceEndpoint(repo_id=repo_id_mistral,
                        max_new_tokens=300, 
                        temperature=0.01,
                         model_kwargs={"token":"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",
                                       "add_to_git_credential":True},
                        )


# build prompt template for simple question ansewring
template="""Question: {question}
Answer:"""
prompt = PromptTemplate(template=template,input_variables=['question']
                        )
llm_chain = prompt | llm

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.

Token is valid (permission: write).

Your token has been saved to C:\Users\Pakistan\.cache\huggingface\token

Login successful


In [None]:
from langchain.vectorstores import Chroma
from langchain_mistralai import MistralAIEmbeddings
