In [6]:
import os
import dotenv
from os import environ
env_file = '.env'

In [7]:
dotenv.load_dotenv(env_file, override=True)

True

## Load data

In [8]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

loader = PyPDFLoader("data\e274394a0bfb4de196edae9a00adda36.pdf")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

## Load embeddings

In [9]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()

  from .autonotebook import tqdm as notebook_tqdm


## Create vectorstore

In [10]:
import chromadb
from langchain.vectorstores import Chroma

persist_directory = "db"

def init_chromadb():
    client_settings = chromadb.config.Settings(
        chroma_db_impl="duckdb+parquet",
        persist_directory=persist_directory,
        anonymized_telemetry=False
    )

    vectorstore = Chroma(
        collection_name="langchain_store",
        embedding_function=embeddings,
        client_settings=client_settings,
        persist_directory=persist_directory,
    )

    if os.path.exists(persist_directory) and os.path.isdir(persist_directory):
        print("Directory 'db' exists. Using existing vectordb")
        pass
    else:
        print("Directory 'db' does not exist. Creating new vectordb")
        vectorstore.add_documents(documents=docs, embedding=embeddings)
        vectorstore.persist()
    
    return vectorstore

In [11]:
def query_chromadb(vectorstore):
    result = vectorstore.similarity_search_with_score(query="How to turn on the machine?", k=4)
    print(result)

In [12]:
vectorestore = init_chromadb()

Using embedded DuckDB with persistence: data will be stored in: db


Directory 'db' exists. Using existing vectordb


In [None]:
query_chromadb(vectorestore)

## Setup LLM

### Problema nel caricamento di modelli grandi

| HF Model                | Esito                                                                           | Size |
|-------------------------|---------------------------------------------------------------------------------|------|
| google/flan-t5-small    | ok                                                                              |300MB |
| google/flan-t5-xl       |ValueError: Error raised by inference API: Model {model name} time out           |12GB  |
| databricks/dolly-v2-12b |ValueError: Error raised by inference API: Model {model name} time out           |24GB  |

### Define model from huggingface

[Load model from disk](https://github.com/hwchase17/langchain/issues/2667#issuecomment-1501967127)

#### flan-T5-xl

In [7]:
from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration

model_path = "D:\\hf_models\\flan-t5-xl"
tokenizer_path = "D:\\hf_models\\flan-t5-xl"

tokenizer = T5Tokenizer(
    vocab_file=f"{tokenizer_path}/spiece.model",
    tokenizer_file=f"{tokenizer_path}/spiece.model",
    config_file=f"{tokenizer_path}/tokenizer_config.json",
    use_fast=False
)
model = T5ForConditionalGeneration.from_pretrained(model_path)
# text_generator = pipeline("text-generation", model=model, max_new_tokens=64, model_kwargs={"temperature":0}, tokenizer=tokenizer)


Loading checkpoint shards: 100%|██████████| 2/2 [00:17<00:00,  8.84s/it]
Xformers is not installed correctly. If you want to use memorry_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForC

In [10]:
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
input_text = "Write something about you "

generated_text = llm(input_text, max_length=50, num_return_sequences=1)[0]['generated_text']
print(generated_text)

The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPre

Write something about you thing about: / something about: / something about: / something about: / something about: / something about: / something about: / something about: / something about


#### flan-alpaca-gpt4-xl

In [15]:
from transformers import pipeline, LlamaForCausalLM, T5Tokenizer

model_path = "D:\\hf_models\\flan-alpaca-gpt4-xl"
tokenizer_path = "D:\\hf_models\\flan-alpaca-gpt4-xl"

# tokenizer = AutoTokenizer(
#     vocab_file=f"{tokenizer_path}/spiece.model",
#     tokenizer_file=f"{tokenizer_path}/spiece.model",
#     config_file=f"{tokenizer_path}/tokenizer_config.json",
#     use_fast=False
# )
# model = LlamaForCausalLM.from_pretrained(model_path)

tokenizer = T5Tokenizer.from_pretrained(model_path)
model = LlamaForCausalLM.from_pretrained(model_path)

text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

You are using a model of type t5 to instantiate a model of type llama. This is not supported for all configurations of models and can yield errors.


RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 180355072 bytes.

RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 180355072 bytes.

#### flan-alpaca-large

In [13]:
from transformers import pipeline, LlamaForCausalLM, T5Tokenizer, AutoTokenizer, T5Model, AutoModelForSeq2SeqLM

# model_path = "D:\\hf_models\\flan-alpaca-large"
# tokenizer_path = "D:\\hf_models\\flan-alpaca-large"
model_path = "declare-lab/flan-alpaca-base"
tokenizer_path = "declare-lab/flan-alpaca-base"

# tokenizer = T5Tokenizer(
#     vocab_file=f"{tokenizer_path}/tokenizer.json",
#     tokenizer_file=f"{tokenizer_path}/tokenizer.json",
#     config_file=f"{tokenizer_path}/tokenizer_config.json",
#     use_fast=False
# )
# model = LlamaForCausalLM.from_pretrained(model_path)

tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
model = LlamaForCausalLM.from_pretrained(model_path)

text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

You are using a model of type t5 to instantiate a model of type llama. This is not supported for all configurations of models and can yield errors.


In [12]:
from langchain import HuggingFacePipeline
llm = HuggingFacePipeline.from_model_id(model_id="bigscience/bloom-1b7", task="text-generation", model_kwargs={"temperature":0, "max_length":64})

In [11]:
from langchain import PromptTemplate,  LLMChain

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])

llm_chain = LLMChain(prompt=prompt, llm=hf)

question = "What is electroencephalography?"

print(llm_chain.run(question))



 First, we need to understand what is an electroencephalogram. An electroencephalogram is a recording of brain activity. It is a recording of brain activity that is made by placing electrodes on the scalp. The electrodes are placed


#### GPT4all

In [1]:
from langchain import PromptTemplate, LLMChain

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

In [7]:
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

local_path = 'D:\gpt4all\ggml-gpt4all-j-v1.3-groovy\ggml-gpt4all-j-v1.3-groovy.bin'

# Callbacks support token-wise streaming
callbacks = [StreamingStdOutCallbackHandler()]

# If you want to use GPT4ALL_J model add the backend parameter
llm = GPT4All(model=local_path, backend='gptj', callbacks=callbacks, verbose=True)

: 

: 

In [None]:
llm_chain = LLMChain(prompt=prompt, llm=llm)
question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"

llm_chain.run(question)

#### alpaca-native

In [31]:
from transformers import LlamaTokenizer, LlamaForCausalLM

tokenizer = LlamaTokenizer.from_pretrained("chavinlo/alpaca-native")

base_model = LlamaForCausalLM.from_pretrained(
    "chavinlo/alpaca-native",
    load_in_8bit=True,
    device_map='auto',
)

Downloading (…)lve/main/config.json: 100%|██████████| 556/556 [00:00<00:00, 111kB/s]
Downloading (…)model.bin.index.json: 100%|██████████| 26.8k/26.8k [00:00<00:00, 4.48MB/s]
Downloading (…)l-00001-of-00003.bin: 100%|██████████| 9.88G/9.88G [02:12<00:00, 74.8MB/s]
Downloading (…)l-00002-of-00003.bin: 100%|██████████| 9.89G/9.89G [03:38<00:00, 45.2MB/s]
Downloading (…)l-00003-of-00003.bin: 100%|██████████| 7.18G/7.18G [01:57<00:00, 61.1MB/s]
Downloading shards: 100%|██████████| 3/3 [08:20<00:00, 166.88s/it]

Unexpected exception formatting exception. Falling back to standard exception



Traceback (most recent call last):
  File "c:\Users\simone.marotta\PycharmProjects\chatbot-with-data\venv\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\simone.marotta\AppData\Local\Temp\3\ipykernel_10168\2180908035.py", line 5, in <module>
    base_model = LlamaForCausalLM.from_pretrained(
  File "c:\Users\simone.marotta\PycharmProjects\chatbot-with-data\venv\lib\site-packages\transformers\modeling_utils.py", line 2626, in from_pretrained
NameError: name 'init_empty_weights' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\Users\simone.marotta\PycharmProjects\chatbot-with-data\venv\lib\site-packages\IPython\core\interactiveshell.py", line 2105, in showtraceback
    stb = self.InteractiveTB.structured_traceback(
  File "c:\Users\simone.marotta\PycharmProjects\chatbot-with-data\venv\lib\site-packages\IPytho

: 

In [30]:
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model=base_model, 
    tokenizer=tokenizer, 
    max_length=256,
    temperature=0.6,
    top_p=0.95,
    repetition_penalty=1.2
)

local_llm = HuggingFacePipeline(pipeline=pipe)

NameError: name 'base_model' is not defined

## Chain run

In [22]:
from langchain import HuggingFaceHub
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

HUGGINGFACE_MODEL_NAME = "google/flan-t5-base"

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"
llm = HuggingFaceHub(repo_id=HUGGINGFACE_MODEL_NAME, model_kwargs={"temperature":0, "max_length":512})
# llm = HuggingFacePipeline(pipeline=pipe)

chain = RetrievalQA.from_chain_type(llm=llm,
                                    chain_type="stuff",
                                    retriever=vectorestore.as_retriever(),
                                    input_key="question")

In [None]:
print(chain.run('How do you turn on the system?'))

In [15]:
from langchain import PromptTemplate

german_few_shot_doc_prompt = """Given the following {summaries} of a long document and a {question}, create a final answer with references ("SOURCES")."""
GERMAN_QA_PROMPT = PromptTemplate(template=german_few_shot_doc_prompt, input_variables=["summaries", "question"])
GERMAN_DOC_PROMPT = PromptTemplate(
    template="Inhalt: {page_content}\nQuelle: {source}",
    input_variables=["page_content", "source"])

In [19]:
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.llms import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain

qa_chain = load_qa_with_sources_chain(OpenAI(temperature=0), chain_type="stuff",
                                    prompt=GERMAN_QA_PROMPT,
                                    document_prompt=GERMAN_DOC_PROMPT) 
chain = RetrievalQAWithSourcesChain(combine_documents_chain=qa_chain, retriever=vectorestore.as_retriever(),
                                     reduce_k_below_max_tokens=True, max_tokens_limit=3375,
                                     return_source_documents=True)

In [22]:
chain({"question": "how to turn on the system?"}, return_only_outputs=True)

{'answer': '\n\nAnswer: To turn on the system, press and hold the Power On button on the review module for 2 seconds. The system will take 5 minutes to become fully functional. If the system is not switched on, press and hold Power On on the review module until the indicator light stops flashing. To log off, select System from the menu bar of the review window, and then select Log Off. To switch the system off, press Power Off on the review module for 3 seconds.\n\nSources: Azurion Release 2.1 Instructions for Use, Philips 4522 203 78556, data\\e274394a0bfb4de196edae9a00adda36.pdf',
 'sources': '',
 'source_documents': [Document(page_content='4 Starting and Stopping the System\nThis section provides information about starting and stopping the system during normal use. For\ninformation about stopping the system in an emergency, see Emergency Stop  (page  20).\nYou start and stop the system using the review module.\nFigure 32 Review module\nLegend\n1 Power On\n2 Power Off\n3 Video Only\n