## MODEL SETUP

In [None]:
%%capture
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

tok_path = "./models/Llama2-13B-nous-hermes"
model_path = "./models/Llama2-13B-nous-hermes"
tokenizer = AutoTokenizer.from_pretrained(tok_path)
model = AutoModelForCausalLM.from_pretrained(model_path,
                                             load_in_8bit = True,
                                             device_map="auto",
                                             torch_dtype=torch.float16,
                                            )
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
import torch

pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.0,
    top_p=0.95,
    repetition_penalty=1.15,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

## FALCON SETUP

In [None]:
%%capture
from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

model_path = "./models/Falcon-7B-instruct"

tokenizer = AutoTokenizer.from_pretrained(model_path)

pipeline = pipeline(
    "text-generation",
    model=model_path,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map = {"":0},
    max_length=2048,       # Lunghezza massima della risposta
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    trust_remote_code=True,
)
# Pesa circa 14GB

In [None]:
pipeline.model.eval()

RWForCausalLM(
  (transformer): RWModel(
    (word_embeddings): Embedding(65024, 4544)
    (h): ModuleList(
      (0-31): 32 x DecoderLayer(
        (input_layernorm): LayerNorm((4544,), eps=1e-05, elementwise_affine=True)
        (self_attention): Attention(
          (maybe_rotary): RotaryEmbedding()
          (query_key_value): Linear(in_features=4544, out_features=4672, bias=False)
          (dense): Linear(in_features=4544, out_features=4544, bias=False)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (mlp): MLP(
          (dense_h_to_4h): Linear(in_features=4544, out_features=18176, bias=False)
          (act): GELU(approximate='none')
          (dense_4h_to_h): Linear(in_features=18176, out_features=4544, bias=False)
        )
      )
    )
    (ln_f): LayerNorm((4544,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=4544, out_features=65024, bias=False)
)

In [None]:
local_llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})

## MPT SETUP

In [None]:
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer

path_tok = "./models/MPT-7B-Instruct/models--EleutherAI--gpt-neox-20b/snapshots/4e49eadb5d14bd22f314ec3f45b69a87b88c7691/"
path_mod = "./models/MPT-7B-Instruct/models--mosaicml--mpt-7b-instruct/snapshots/bbe7a55d70215e16c00c1825805b81e4badb57d7/"

tokenizer = AutoTokenizer.from_pretrained(path_tok)

model = AutoModelForCausalLM.from_pretrained(path_mod,
                                            load_in_8bit=True,
                                            device_map={"":0},
                                            trust_remote_code=True,
                                            torch_dtype=torch.float16,
                                            max_seq_len = 2048
                                            )

# Occupa circa 14GB

You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.

Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
  warn(msg)
  warn(msg)
  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
import torch
from transformers import StoppingCriteria, StoppingCriteriaList

model.eval() # probabilmente non necessario ma giusto per sicurezza
model.tie_weights()

# mtp-7b is trained to add "<|endoftext|>" at the end of generations
stop_token_ids = tokenizer.convert_tokens_to_ids(["<|endoftext|>"])

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_id in stop_token_ids:
            if input_ids[0][-1] == stop_id:
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [None]:
%%capture
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
import torch

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=2048,
    temperature=0,
    top_p=0.95,
    #top_p = 0.15,
    #top_k = 0,
    repetition_penalty=1.15,
    stopping_criteria=stopping_criteria
)

local_llm = HuggingFacePipeline(pipeline=pipe)

The model 'MPTForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausalLM', 'RobertaForCausalLM', 'RobertaPreLayerNormFor

# PYDANTIC OUTPUT PARSER

## TEST GPU QUESTION

In [None]:
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field, validator
from langchain.prompts import PromptTemplate, ChatPromptTemplate

In [None]:
gpu_text = """
RTX 4090 is the ultimate GeForce GPU. It's a huge leap forward in performance, efficiency,
and AI-powered graphics, it supports the third generation of Ray-Tracing. Experience ultra-high-performance
gaming, incredibly detailed virtual worlds,
unprecedented productivity, and new ways to create. Includes 24GB of GDDR6X memory to deliver the
ultimate experience for gamers and creators, 16384 CUDA cores. Starting at €1,789.
"""

In [None]:
# Define your desired data structure.
class GPU(BaseModel):
    name: str = Field(description="name of the GPU (graphics card) model")
    memory: int = Field(description="number of Gigabytes of memory of the gpu",
                       enum=[6,10,12,24])
    price: float = Field(description="price of the gpu, answer with the just a float number")
    rtx_support: str = Field(description="Wether the gpu supports rtx (ray-tracing) or not",
                             enum=["True", "False"])

    def get_values():
        return ['name','memory','price','rtx_support']

    # You can add custom validation logic easily with Pydantic.
    @validator('rtx_support')
    def check_rtx(cls, field):
        if field != "True" and field != "False":
            raise ValueError("Badly formed answer!")
        return field

In [None]:
parser = PydanticOutputParser(pydantic_object=GPU)

gpu_template = """
The following is a text about a GPU:

text: {text}

From the text extract the following information: {info}

{format_instructions}

formatted answer:
"""

prompt = PromptTemplate(
    template= gpu_template,
    input_variables=["text","info"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)
input = prompt.format_prompt(text=gpu_text, info=gpu_info)

In [None]:
print(input.to_string())


For the following text about a GPU, extract the following information:

['name', 'memory', 'price', 'rtx support']

text: 
RTX 4090 is the ultimate GeForce GPU. It's a huge leap forward in performance, efficiency, 
and AI-powered graphics, it supports the third generation of Ray-Tracing. Experience ultra-high-performance 
gaming, incredibly detailed virtual worlds, 
unprecedented productivity, and new ways to create. Includes 24GB of GDDR6X memory to deliver the 
ultimate experience for gamers and creators, 16384 CUDA cores. Starting at €1,789.


The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema

In [None]:
len(tokenizer.tokenize(input.to_string()))

466

In [None]:
output = local_llm(input.to_string())
print(output)



```
{
    "name": "RTX 4090",
    "memory": 24,
    "price": 1789.0,
    "rtx_support": "True"
}
```


In [None]:
JSON_output = parser.parse(output)
JSON_output

GPU(name='RTX 4090', memory=24, price=1789.0, rtx_support='True')

## Genera una battuta con setup e formatta il tutto in un json

In [None]:
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke, must end with a question mark")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic
   # @validator('setup')
    #def question_ends_with_question_mark(cls, field):
     #   if field[-1] != '?':
      #      raise ValueError("Badly formed question!")
       # return field

joke_query = "Tell me a joke."

parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query: {query}.\n{format_instructions}\n formatted answer:",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

_input = prompt.format_prompt(query=joke_query)


In [None]:
output = local_llm(_input.to_string())
print(output)

 
{
    "setup": "Why don't scientists trust atoms? ",
    "punchline": "Because they make up everything!"
}


In [None]:
parser.parse(output)

Joke(setup="Why don't scientists trust atoms? ", punchline='Because they make up everything!')

## PARSING PER SOLE STRINGHE (obsoleto)

In [None]:
from langchain.output_parsers import ResponseSchema
from langchain.output_parsers import StructuredOutputParser
from langchain.prompts import PromptTemplate, ChatPromptTemplate

In [None]:
gift_schema = ResponseSchema(name="gift",
                             description="""Was the item purchased
                             as a gift for someone else?
                             Answer True if yes,
                             False if not or unknown.""")
delivery_days_schema = ResponseSchema(name="delivery_days",
                                      description="""How many days
                                      did it take for the product
                                      to arrive? If this
                                      information is not found,
                                      output -1.""")
price_value_schema = ResponseSchema(name="price_value",
                                    description="""Extract any
                                    sentences about the value or
                                    price, and output them as a
                                    comma separated Python list.""")

response_schemas = [gift_schema,
                    delivery_days_schema,
                    price_value_schema]

In [None]:
output_parser = StructuredOutputParser.from_response_schemas(response_schemas)

format_instructions = output_parser.get_format_instructions()
print(format_instructions)

The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```":

```json
{
	"gift": string  // Was the item purchased
                             as a gift for someone else? 
                             Answer True if yes,
                             False if not or unknown.
	"delivery_days": string  // How many days
                                      did it take for the product
                                      to arrive? If this 
                                      information is not found,
                                      output -1.
	"price_value": string  // Extract any
                                    sentences about the value or 
                                    price, and output them as a 
                                    comma separated Python list.
}
```


In [None]:
customer_review = """
This leaf blower is pretty amazing.  It has four settings:
candle blower, gentle breeze, windy city, and tornado.
It arrived in two days, just in time for my wife's
anniversary present.
I think my wife liked it so much she was speechless.
So far I've been the only one using it, and I've been
using it every other morning to clear the leaves on our lawn.
It's slightly more expensive than the other leaf blowers
out there, but I think it's worth it for the extra features.
"""

In [None]:
review_template = """
For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price, and output them as a comma separated Python list.

text: {text}

{format_instructions}
"""

prompt = ChatPromptTemplate.from_template(template=review_template)

input = prompt.format_messages(text=customer_review,
                                format_instructions=format_instructions)

In [None]:
print(input[0].content)


For the following text, extract the following information:

gift: Was the item purchased as a gift for someone else? Answer True if yes, False if not or unknown.

delivery_days: How many days did it take for the product to arrive? If this information is not found, output -1.

price_value: Extract any sentences about the value or price, and output them as a comma separated Python list.

text: 
This leaf blower is pretty amazing.  It has four settings:
candle blower, gentle breeze, windy city, and tornado. 
It arrived in two days, just in time for my wife's
anniversary present. 
I think my wife liked it so much she was speechless. 
So far I've been the only one using it, and I've been 
using it every other morning to clear the leaves on our lawn. 
It's slightly more expensive than the other leaf blowers 
out there, but I think it's worth it for the extra features.


The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```jso

In [None]:
output = local_llm(input[0].content)
print(output)




In [None]:
output_dict = output_parser.parse(output)
output_dict

{'gift': 'True',
 'delivery_days': '-1',
 'price_value': ["It's slightly more expensive than the other leaf blowers out there.",
  "I think it's worth it for the extra features."]}

In [None]:
output_dict["gift"]

'True'