# HF Unit 1 - Transformers

In [12]:
classifier = pipeline("sentiment-analysis")
classifier([
    "This Hugging face course is the best ever!",
    """NVIDIA's quarterly earnings disappointed its shareholders,
    with the market opening 50 basis points lower than the previous day's close""",
    """NVIDIA's quarterly earnings was met with a lukewarm response,
    with the market opening 5 basis points higher than the previous day's close, producing a 5% YTD return for investors.""",
])

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


[{'label': 'POSITIVE', 'score': 0.9998713731765747},
 {'label': 'NEGATIVE', 'score': 0.9996659755706787},
 {'label': 'NEGATIVE', 'score': 0.8500027060508728}]

In [1]:
import torch
import transformers

In [2]:
import logging

stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s %(levelname)s %(module)s %(funcName)s %(message)s',
    handlers=[
        stream_handler,
    ]
)


In [3]:
from transformers import AutoTokenizer, AutoModelForCausalLM,  LlamaTokenizer, BertTokenizer
from transformers import BitsAndBytesConfig
from optimum.bettertransformer import BetterTransformer
#model_id = "distilbert-base-uncased"

model_id = "tiiuae/falcon-7b-instruct"
#model_id = "codellama/CodeLlama-7b-hf"
# model_id = "bert-base-uncased"
#model_id = "google/bert_uncased_L-2_H-128_A-2"

#tokenizer = AutoTokenizer.from_pretrained(model)
#model = AutoModelForCausalLM.from_pretrained(model, trust_remote_code=True)

2023-09-01 20:44:57,882 INFO instantiator <module> Created a temporary directory at /tmp/tmptdp92n5t
2023-09-01 20:44:57,883 INFO instantiator _write Writing /tmp/tmptdp92n5t/_remote_module_non_scriptable.py


In [8]:
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)
with torch.device("cuda"):
    logging.info("Running Tokenizer ...")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model_4bit = AutoModelForCausalLM.from_pretrained(
        model_id,
        #load_in_4bit=True,
        trust_remote_code=True,
        quantization_config=quantization_config,
    )
    #model = BetterTransformer.transform(model)
    logging.info("Making new pipeline w/GPU ...")
    pipeline = transformers.pipeline(
        "text-generation",
        model=model_4bit,
        use_cache=True,
        tokenizer=tokenizer,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        max_length=500,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id,
        # device=-1,
    )

2023-09-01 20:53:06,768 INFO 2476881417 <module> Running Tokenizer ...
2023-09-01 20:53:06,770 DEBUG connectionpool _get_conn Resetting dropped connection: huggingface.co
2023-09-01 20:53:07,032 DEBUG connectionpool _make_request https://huggingface.co:443 "HEAD /tiiuae/falcon-7b-instruct/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
2023-09-01 20:53:07,355 DEBUG connectionpool _make_request https://huggingface.co:443 "HEAD /tiiuae/falcon-7b-instruct/resolve/main/config.json HTTP/1.1" 200 0
2023-09-01 20:53:07,605 DEBUG connectionpool _make_request https://huggingface.co:443 "HEAD /tiiuae/falcon-7b-instruct/resolve/main/configuration_RW.py HTTP/1.1" 200 0
2023-09-01 20:53:07,866 DEBUG connectionpool _make_request https://huggingface.co:443 "HEAD /tiiuae/falcon-7b-instruct/resolve/main/config.json HTTP/1.1" 200 0
2023-09-01 20:53:08,118 DEBUG connectionpool _make_request https://huggingface.co:443 "HEAD /tiiuae/falcon-7b-instruct/resolve/main/modelling_RW.py HTTP/1.1" 200 0


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

2023-09-01 20:53:12,372 DEBUG connectionpool _make_request https://huggingface.co:443 "HEAD /tiiuae/falcon-7b-instruct/resolve/main/generation_config.json HTTP/1.1" 200 0
2023-09-01 20:53:12,414 INFO 2476881417 <module> Making new pipeline w/GPU ...


In [10]:
with torch.device("cuda"):
    logging.info("Generating sequences")
    sequences = pipeline(
       """Sally is a kindergarden teacher and Val is her student.Sally needs to contact Val's parent Soph to discuss her child's performance in school.Val is always late for class, and Sally needs to highlight to Soph about it.Val is extroverted and has made many new friends in school.
       Soph: Morning Sally! How is Val doing in school?
       Sally:""",
    )
    logging.info("Printing sequences")
    for seq in sequences:
        logging.info(f"Result: {seq['generated_text']}")
    logging.info("Operation Complete")

2023-09-01 20:55:01,882 INFO 1916992056 <module> Generating sequences
2023-09-01 20:55:10,013 INFO 1916992056 <module> Printing sequences
2023-09-01 20:55:10,014 INFO 1916992056 <module> Result: Sally is a kindergarden teacher and Val is her student.Sally needs to contact Val's parent Soph to discuss her child's performance in school.Val is always late for class, and Sally needs to highlight to Soph about it.Val is extroverted and has made many new friends in school.
       Soph: Morning Sally! How is Val doing in school?
       Sally: Hi Soph, Val is doing very well in all her classes. However, she is always late for school. We are concerned that she may be having trouble waking up in the morning.
       Soph: I see. Well, I'm sure there's nothing to be concerned about. But if you notice any change in her routine or performance, please let me know so we can address it.
2023-09-01 20:55:10,014 INFO 1916992056 <module> Operation Complete


In [14]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain

with torch.device("cuda"):
    llm = HuggingFacePipeline(pipeline=pipeline)
    
    template = """Question: {question}
    Answer: Let's think step by step."""
    
    prompt = PromptTemplate(
        template=template, 
        input_variables= ["question"]
    )
    
    llm_chain = LLMChain(prompt=prompt, llm=llm)
    logging.info("Asking Question...")
    output = llm_chain.run("Who was the first man on the moon?")
    print(output)
    logging.info("Question Answered")

2023-09-01 21:08:21,645 INFO 1843959056 <module> Asking Question...
2023-09-01 21:08:26,617 INFO 1843959056 <module> Question Answered


 The first man on the moon was the first person ever sent into space by NASA in 1954, but it was a Soviet spacecraft that launched the first human in space. Therefore, the first man to land on the moon was the cosmonaut Yuri Gagarin from the Soviet Union.
