In [2]:
#install libraries
# sentence transformers for use later on
!pip -q install langchain huggingface_hub transformers sentence_transformers

# HuggingFace
Two huggingface LLM wrappers:
1. local pipeline
2. models hosted on Hugging Face Hub

Wrappers only work for models that support:
1. text2text-generation => encoder/decoder models (i.e. BART, T5)
2. text-generation => decoder ONLY (i.e. GPT)

# HuggingFaceHub - standard method

In [5]:
#initiate huggingface API token
import os

#token
os.environ['HUGGINGFACEHUB_API_TOKEN'] = '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'

In [6]:
#import libraries
#set up LLM chain
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

#template
template = """Question: {question}

Answer: Let's think step by step."""


#prompt
prompt = PromptTemplate(template=template, input_variables=["question"])

In [16]:
#LLMchain
llm_chain = LLMChain(prompt=prompt,
                     llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", #flan t5 xxl model
                                        model_kwargs={"temperature":1,
                                                      "max_length":64}))



In [17]:
#ping model with query
question = "Translate to German: My name is Adam"

print(llm_chain.run(question))

Ich bin Adam.


In [18]:
#another question
question = "How many muscles attach to the humerus bone?"

print(llm_chain.run(question))

The humerus bone is the largest bone in the arm. The humerus bone has a total of ten muscles that attach to it. The answer: ten.


# Local model from HF

Reasons to use:
- fine-tuned models
- GPU hosted etc.
- some models only work locally

# T5-Flan-Encoder-Decoder

In [23]:
#import libraries
from langchain.llms import HuggingFacePipeline #bring in huggingface pipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

#model import
model_id = 'google/flan-t5-large'
tokenizer = AutoTokenizer.from_pretrained(model_id) #setup tokenizer
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) #encoder-decoder model initiate

#huggingface pipeline setup
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

#local model
local_llm = HuggingFacePipeline(pipeline=pipe)

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [30]:
#query local LLM
print(local_llm("What is the capital of France'?")) #direct conditional generation from LLM

paris


In [31]:
#local llm chain
llm_chain = LLMChain(prompt=prompt,
                     llm=local_llm
                     )

question = "What is the capital of Germany?"

#output
print(llm_chain.run(question))

The capital of Germany is Berlin. Berlin is located in Germany. So the answer is Berlin.


# GPT2-medium - Decoder Only Model

microsoft/DialoGPT-large

In [33]:
#model creation
model_id = "gpt2-medium"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

#set up pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

#local llm
local_llm = HuggingFacePipeline(pipeline=pipe)

config.json:   0%|          | 0.00/718 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.52G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [34]:
#set up LLM chain
llm_chain = LLMChain(prompt=prompt,
                     llm=local_llm
                     )

#query model
question = "What is the capital of Germany?"

#print output
print(llm_chain.run(question))


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 If we look at each country according to its income and expenditure, we can define each economy as a single entity. As such, we can look at the income distribution (in millions of U.S. dollars) and see a clear distribution of the national income. While Germany spends heavily on defense, there are few resources devoted to the research, development and training of the nation's defense industry. Germany


#### output is not great! :)

# BlenderBot - EncoderDecoder model

In [35]:
#library import
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM

#model
model_id = 'facebook/blenderbot-1B-distill'
tokenizer = AutoTokenizer.from_pretrained(model_id) #autotokenizer
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) #seq2seqLM

#set up pipeline
pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

#local llm
local_llm = HuggingFacePipeline(pipeline=pipe)

tokenizer_config.json:   0%|          | 0.00/1.05k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/127k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/62.9k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/16.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/310k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.87G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/347 [00:00<?, ?B/s]

In [36]:
#run query on model
llm_chain = LLMChain(prompt=prompt,
                     llm=local_llm
                     )

#query
question = "What area is best for growing wine in France?"

#print output
print(llm_chain.run(question))

 I'm not sure, but I do know that France is one of the largest producers of wine in the world.


# Sentence Transformers

In [38]:
#imports
from langchain.embeddings import HuggingFaceEmbeddings

#instantiate model
model_name = "sentence-transformers/all-mpnet-base-v2" #turns text into vector

#hf model
hf = HuggingFaceEmbeddings(model_name=model_name)

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [39]:
#demo embedding of string
hf.embed_query('this is an embedding')

[0.010657302103936672,
 -0.09967266023159027,
 -0.026967110112309456,
 0.06531771272420883,
 0.021004971116781235,
 0.04262350872159004,
 0.011534038931131363,
 -0.0062293498776853085,
 0.0517582893371582,
 0.0073067424818873405,
 0.02135339006781578,
 0.04269154369831085,
 0.023143809288740158,
 0.009952686727046967,
 0.05646302551031113,
 -0.0613798052072525,
 0.052743811160326004,
 0.02468394674360752,
 -0.013267810456454754,
 -0.007051174063235521,
 0.026656419038772583,
 -0.005913480184972286,
 0.004097468685358763,
 0.038412392139434814,
 -0.014230645261704922,
 0.023023519665002823,
 -0.007326602470129728,
 -0.03562537953257561,
 -0.01793411746621132,
 -0.013930185697972775,
 0.011977546848356724,
 -0.007365901954472065,
 0.024451451376080513,
 -0.06637255102396011,
 1.5677629789934144e-06,
 0.018217215314507484,
 0.001974888378754258,
 -0.018329504877328873,
 -0.014930637553334236,
 -0.005393486004322767,
 -0.011222314089536667,
 0.015792980790138245,
 -0.027141883969306946,
 -

In [40]:
#embed documents of strings
#manually create embeddings
hf.embed_documents(['this is an embedding', 'this is another embedding'])

[[0.010657289065420628,
  -0.09967265278100967,
  -0.026967117562890053,
  0.06531771272420883,
  0.021004974842071533,
  0.04262351244688034,
  0.011534007266163826,
  -0.006229364313185215,
  0.0517583005130291,
  0.007306757383048534,
  0.02135336585342884,
  0.04269154742360115,
  0.0231438297778368,
  0.009952668100595474,
  0.056463032960891724,
  -0.06137979030609131,
  0.05274383723735809,
  0.02468392439186573,
  -0.013267814181745052,
  -0.007051148451864719,
  0.026656461879611015,
  -0.005913481581956148,
  0.004097471479326487,
  0.03841241076588631,
  -0.01423061452805996,
  0.02302350476384163,
  -0.0073265875689685345,
  -0.0356253944337368,
  -0.017934128642082214,
  -0.013930183835327625,
  0.011977538466453552,
  -0.007365903817117214,
  0.024451429024338722,
  -0.06637256592512131,
  1.5677625242460635e-06,
  0.01821722649037838,
  0.0019749049097299576,
  -0.018329529091715813,
  -0.014930647797882557,
  -0.005393484141677618,
  -0.01122230663895607,
  0.0157929565