# Despliegue de LLM



In [None]:
!pip install -q langchain huggingface_hub transformers sentence_transformers accelerate bitsandbytes python_dotenv

## 1 - Pipeline Local (descargamos el modelo)


#### A - HuggingFace - Transformers

In [None]:
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM

In [None]:
# Descargamos el modelo
# google/flan-t5-small
model_id = 'google/flan-t5-base'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id,
                                              load_in_8bit=True,
                                              device_map='auto')

In [None]:
model_generator = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=128
)

In [None]:
model_generator("In which country is madrid? Let's think step by step")

In [None]:
model_generator('Create an email for my boss requesting vacations')

#### B - Langchain

In [None]:
from langchain.llms import HuggingFacePipeline

local_llm = HuggingFacePipeline(pipeline=model_generator)
print(local_llm("In which country is madrid? Let's think step by step"))

In [None]:
local_llm('Create an email for my boss requesting vacations')

## 2 - Inference API

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import os
# os.environ['HUGGINGFACEHUB_API_TOKEN'] = 'HUGGINGFACEHUB_API_TOKEN'

#### A - HuggingFace

In [None]:
from huggingface_hub.inference_api import InferenceApi

model_api = InferenceApi(
    repo_id='google/flan-t5-large',
    token=os.environ['HUGGINGFACEHUB_API_TOKEN']
)

In [None]:
pregunta = "What is the capital of Spain?"

prompt = f"""Question: {pregunta}

Answer: Let's think step by step."""

In [None]:
print(prompt)

In [None]:
model_api(prompt)

#### B - Langchain

In [None]:
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

plantilla = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=plantilla, input_variables=["question"])

In [None]:
llm_chain = LLMChain(prompt=prompt,
                     llm=HuggingFaceHub(repo_id="google/flan-t5-large",
                                        model_kwargs={"temperature":0,
                                                      "max_length":128}))

In [None]:
question = "What is the capital of Spain?"

print(llm_chain.run(question))

In [None]:
question = "what is the answer to life the universe and everything?"

print(llm_chain.run(question))