In [11]:
!pip -q install git+https://github.com/huggingface/transformers # need to install from github
!pip install -q datasets loralib sentencepiece 
!pip -q install bitsandbytes accelerate

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


## StableVicuna - RLHF Chat model

In [12]:
!nvidia-smi

Thu May 25 06:35:13 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   45C    P8     9W /  70W |      3MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [13]:
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
import torch

In [17]:
tokenizer = LlamaTokenizer.from_pretrained("TheBloke/stable-vicuna-13B-HF")

base_model = LlamaForCausalLM.from_pretrained(
    "TheBloke/stable-vicuna-13B-HF",
    load_in_8bit=True,
    device_map='auto',
)

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/700 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/576 [00:00<?, ?B/s]

In [10]:
pipe = pipeline(
    "text-generation",
    model=base_model, 
    tokenizer=tokenizer, 
    max_length=512,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15
)

### The prompt & response

In [None]:
import json
import textwrap

human_prompt = 'What is the meaning of life?'

def get_prompt(human_prompt):
    prompt_template=f"### Human: {human_prompt} \n### Assistant:"
    return prompt_template

print(get_prompt('What is the meaning of life?'))

def remove_human_text(text):
    return text.split('### Human:', 1)[0]

def parse_text(data):
    for item in data:
        text = item['generated_text']
        assistant_text_index = text.find('### Assistant:')
        if assistant_text_index != -1:
            assistant_text = text[assistant_text_index+len('### Assistant:'):].strip()
            assistant_text = remove_human_text(assistant_text)
            wrapped_text = textwrap.fill(assistant_text, width=100)
            print(wrapped_text)

data = [{'generated_text': '### Human: What is the capital of England? \n### Assistant: The capital city of England is London.'}]
parse_text(data)


## Run it as a HF model

In [None]:
%%time 
raw_output = pipe(get_prompt('What are the difference between Llamas, Alpacas and Vicunas?'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Write a short note to Sam Altman giving reasons to open source GPT-4'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('What is the capital of England?'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Write a story about a Koala playing pool and beating all the camelids.'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('As an AI do you like the Simpsons? What dow you know about Homer?'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Answer the following question by reasoning step by step. The cafeteria had 23 apples. If they used 20 for lunch, and bought 6 more, how many apple do they have?'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Answer the following yes\/no question by reasoning step-by-step. \n Can you write a whole Haiku in a single tweet?'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Can Geoffrey Hinton have a conversation with George Washington? Give the rationale before answering.'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Could Marcus Aurelius have had dinner with George Washington? Give the rationale before answering.'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('tell me about 3 facts about Marcus Aurelius that most people dont know'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Who was Marcus Aureliuss son?'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Who was Marcus Aureliuss son and what was he like?'))
parse_text(raw_output)

In [None]:
%%time 
raw_output = pipe(get_prompt('Who was the emperor Commodus?'))
parse_text(raw_output)