# Evaluating the Original OpenLlama and the Finetuned Model

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
model = '3B' #'7B' # Pick your poison

if model == '7B':
    model_name = ("togethercomputer/RedPajama-INCITE-Base-7B-v0.1","togethercomputer/RedPajama-INCITE-Base-7B-v0.1")
    run_name = 'redpj7B-lora-int8-alpaca'
    dataset = 'johnrobinsn/alpaca-cleaned'
    peft_name = 'redpj7B-lora-int8-alpaca'
    output_dir = 'redpj7B-lora-int8-alpaca-results'
else: #3B
    model_name = ("togethercomputer/RedPajama-INCITE-Base-3B-v1","togethercomputer/RedPajama-INCITE-Base-3B-v1")
    run_name = 'redpj3B-lora-int8-alpaca'
    dataset = 'johnrobinsn/alpaca-cleaned'
    peft_name = 'redpj3B-lora-int8-alpaca'
    output_dir = 'redpj3B-lora-int8-alpaca-results'

model_name[1],dataset,peft_name,run_name

report_to = "none"

## load base LLM model and tokenizer

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name[0],
    load_in_8bit=True,
    device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(model_name[1])
tokenizer.pad_token_id = 0
tokenizer.add_special_tokens({'eos_token':'<eos>'})

model.eval()

## Formatting Prompts

In [None]:
def generate_prompt(data_point):
    # sorry about the formatting disaster gotta move fast
    if data_point["input"]:
        return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{data_point["instruction"]}

### Input:
{data_point["input"]}

### Response:"""
    else:
        return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{data_point["instruction"]}

### Response:"""

In [None]:
def generate(instruction,input=None,maxTokens=256):
    prompt = generate_prompt({'instruction':instruction,'input':input})
    input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
    outputs = model.generate(input_ids=input_ids, max_new_tokens=maxTokens, 
                             do_sample=True, top_p=0.9,pad_token_id=tokenizer.eos_token_id,
                             forced_eos_token_id=tokenizer.eos_token_id)
    outputs = outputs[0].tolist()
    # Stop decoding when hitting the EOS token
    if tokenizer.eos_token_id in outputs:
        eos_index = outputs.index(tokenizer.eos_token_id)
        decoded = tokenizer.decode(outputs[:eos_index])
        # Don't show the prompt template
        sentinel = "### Response:"
        sentinelLoc = decoded.find(sentinel)
        if sentinelLoc >= 0:
            print(decoded[sentinelLoc+len(sentinel):])
        else:
            print('Warning: Expected prompt template to be emitted.  Ignoring output.')
    else:
        print('Warning: no <eos> detected ignoring output')

# Generating using the Base Model

## Write a short story in third person narration about a protagonist who has to make an important career decision.
The protagonist’s character is presented from the point of view of the protagonist. The first paragraph should describe a decision the protagonist made. In the second paragraph, the reader should learn more about the protagonist and why she made this decision. In the last paragraph, the reader should learn more about what the protagonist decided.

### Examples:
Write about a character from a novel who makes an important decision.
Write about a character from a film that makes an important decision.
Write about a character from a television show that makes an important decision.

## Writing Prompt 13: Write a Short Story

### Instructions:
In the following prompt, write a short story in third person narration. The story can take place in the past or in the present. Write a story that contains:

- An unreliable narrator
- A dramatic situation
- A situation that takes place during a specific time

### Response:
Write a short story in third person narration about a character who finds an important item. The character finds the item during a specific time. The story contains the following characteristics:

- An unreliable narrator
- A dramatic situation
- A situation that takes place during a specific time

### Examples:
Write a short story about a character who finds an important item during a specific time.

## Before finetuning

In [None]:
torch.manual_seed(42)
instruction = 'Who was the first man to walk on the moon and tell me where he was born.'
generate(instruction ,maxTokens=300)

## Load the LORA Adapter

In [None]:
peft_model_id = f'johnrobinsn/{peft_name}' # By default use my pretrained adapter weights
#peft_model_id = peft_name # Uncomment to use locally saved adapter weights if you trained above

# Load the LoRA model
model = PeftModel.from_pretrained(model, peft_model_id, device_map={"":0})
model.eval()

print("Peft model adapter loaded")

In [None]:
torch.manual_seed(42)
instruction = 'Who was the first man to walk on the moon and tell me where he was born.'
generate(instruction ,maxTokens=300)

## A few more prompts

In [None]:
torch.manual_seed(42)
generate('Identify the odd one out','Twitter, Instagram, Telegram')

In [None]:
torch.manual_seed(42)
generate('Write a poem about about a cat',maxTokens=1000)