In [2]:
# check for GPU
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


# Install required Packages

In [3]:
# Needed packages
!pip install -q transformers einops accelerate langchain bitsandbytes

# Logging with a Hugging Face account

---



In [4]:
# hugging face login code
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: read).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your term

# Import all required libraries

In [6]:
!pip install langchain_community

Collecting langchain_community
  Downloading langchain_community-0.2.1-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.21.2-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.3/49.3 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)
Installing collected packages: mypy-extensio

In [4]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer        # for converting text into array of numbers as inputs to the model
import transformers
import torch
import warnings
warnings.filterwarnings('ignore')

# Load the Llama2 model

In [5]:
# llama2 chat model with 7 billion parameters. Ensure huggingface grants you access to use them
model = 'meta-llama/Llama-2-7b-chat-hf'
# model = 'daryl149/llama-2-7b-chat-hf'    # use this if not granted access to above code by huggingface

In [7]:
# load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model)

In [8]:
# create huggingface pipeline: processing of text to number, input numbers to model and get predictions with default params
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map='auto',
    max_length=1000,
    #truncation=True,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id

)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [9]:
# Hugging face pipeline with pipeline config above
llm = HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature':0})

In [10]:
# define prompt
prompt = 'Tell me good companies to work as a site reliability and devops entry level engineer in the USA?'

In [None]:
# output prompt generation
print(llm(prompt))

Creating a Prompt Template

In [None]:
# import all libraries
from langchain.prompt import PromptTemplate
from langchain.chains import LLMChain

In [None]:
# estimator object of the PromptTemplate class with some parameters
prompt_template = PromptTemplate(
    input_variables=['country'],
    template='You are a google map and a current affairs expert. Please tell me what is the capital of {country}?'
)

# call a method off prompt_template
input_prompt = prompt_template.format(country='Nigeria')
input_prompt

In [None]:
chain = LLMChain(llm=llm, prompt=prompt_template, verbose=True)
chain.run('Nigeria').strip()

Prompt Two

In [None]:
prompt_template1 = PromptTemplate(
    input_variables=['bookname'],
    template='You are a genius and intellect. Summarize for me details of this {bookname}?'
)

# call a method off prompt_template
prompt = prompt_template1.format(bookname='Titanic')
prompt

In [None]:
chain = LLMChain(llm=llm, prompt=prompt_template1, verbose=True)
chain.run('Titanic').strip()

# Note: the meta llama2 model is too large to download and it gave a very slow process.