# Using Open-Source HuggingFace ðŸ¤— Transformers

In [None]:
# !pip install -r requirements.txt --upgrade --quiet
# !pip install --upgrade accelerate --quiet

Restart `Jupyter Kernel` at this point, as `accelerate` sometimes has issues starting after installs. 

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import accelerate

In [None]:
MODEL_NAME = "tiiuae/falcon-7b-instruct"

## Load a model
All models are available by searching the HuggingFace website: https://huggingface.co/models.

**Note: if running on a `CPU` these responses will be substantially slower than GPU**

In [None]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float32,
    device_map="auto",
    trust_remote_code=False,
    low_cpu_mem_usage=True
)

## QA / Text Generation

In [None]:
USER_PROMPT = "Explain how gravity affects planetary orbits."

In [None]:
messages = [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": USER_PROMPT}
]

In [None]:
def generated_hf_response(model, 
                    tokenizer, 
                    messages, 
                    max_new_tokens=512, 
                    return_type="pt",
                    skip_special_tokens=True,
                    **gen_kwargs):
    
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer([text], 
                       return_tensors=return_type).to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        **gen_kwargs
    )
    gen_tokens = outputs[0][len(inputs.input_ids[0]):]
    return tokenizer.decode(gen_tokens, 
                            skip_special_tokens=skip_special_tokens)

In [None]:
generated_hf_response(model=model, tokenizer=tokenizer,
                      messages=messages, 
                      max_new_tokens=100, 
                      return_type="pt",
                      skip_special_tokens=True,
                      do_sample=False)