# Getting Started
#### To start this notebook, you must have a huggingface account and request access from Meta to use Llama 2.
https://huggingface.co/

#### In huggingface, create an access token
https://huggingface.co/docs/hub/security-tokens

#### Inside your home directory access .apikeys and create a huggingface_api_key.txt and paste you access token inside the file
path - /home/{your username}/.apikeys
#### Using the link below request access from Meta
https://huggingface.co/meta-llama/Llama-2-7b-hf

#### Once you recieve access from Meta inside terminal create a conda environment using
conda create --name {environment_name} python=3.10

#### Then Install ipykernel using
conda install ipykernel

#### To allow your environment to be used in the notebook run the following line and select your environment on the top right besides the debugging symbol
python -m ipykernel install --user --name={environment_name}

#### Go back to terminal and install all the packages with
pip install -r packages.txt


#### Edit the data set, test set, and validation set under Load Datasets with the path and you are good to go!
##### All imported data must be a csv. Csv must have at least 2 columns for output and test
##### Import the data inside the llama-recipes folder
##### Always run the first 3 cells

## Access Huggingface API Key

In [1]:
# Set cache directory and load Huggingface api key
import os

username = os.getenv('USER')
directory_path = os.path.join('/scratch', username)

# Set Huggingface cache directory to be on scratch drive
if os.path.exists(directory_path):
    hf_cache_dir = os.path.join(directory_path, 'hf_cache')
    if not os.path.exists(hf_cache_dir):
        os.mkdir(hf_cache_dir)
    print(f"Okay, using {hf_cache_dir} for huggingface cache. Models will be stored there.")
    assert os.path.exists(hf_cache_dir)
    os.environ['TRANSFORMERS_CACHE'] = f'/scratch/{username}/hf_cache/'
else:
    error_message = f"Are you sure you entered your username correctly? I couldn't find a directory {directory_path}."
    raise FileNotFoundError(error_message)

# Load Huggingface api key
api_key_loc = os.path.join('/home', username, '.apikeys', 'huggingface_api_key.txt')

if os.path.exists(api_key_loc):
    print('Huggingface API key loaded.')
    with open(api_key_loc, 'r') as api_key_file:
        huggingface_api_key = api_key_file.read().strip()  # Read and store the contents
else:
    error_message = f'Huggingface API key not found. You need to get an HF API key from the HF website and store it at {api_key_loc}.\n' \
                    'The API key will let you download models from Huggingface.'
    raise FileNotFoundError(error_message)

Okay, using /scratch/sdj5/hf_cache for huggingface cache. Models will be stored there.
Huggingface API key loaded.


In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer, LlamaForCausalLM, GenerationConfig, pipeline
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from configs import fsdp_config, train_config
from peft import get_peft_model, prepare_model_for_int8_training, PeftModelForCausalLM, LoraConfig, TaskType, prepare_model_for_int8_training, PeftModel
from utils.dataset_utils import get_preprocessed_dataset
from utils.train_utils import (
    train,
    freeze_transformer_layers,
    setup,
    setup_environ_flags,
    clear_gpu_cache,
    print_model_size,
    #get_policies
)
from utils.config_utils import (
    update_config,
    generate_peft_config,
    generate_dataset_config,
)
from datasets import load_dataset
from datasets import Dataset
from pathlib import Path
import sys
import csv
import json
from configs.datasets import samsum_dataset, alpaca_dataset, grammar_dataset
from ft_datasets.utils import Concatenator

## Load Llama 2 7B

In [3]:
tokenizer = AutoTokenizer.from_pretrained(
        "meta-llama/Llama-2-7b-hf",
        cache_dir=os.path.join('/scratch', username),
        load_in_8bit=True if train_config.quantization else None,
        token=huggingface_api_key,
)

tokenizer.add_special_tokens(
    {
        "pad_token": "<PAD>",
    }
)

model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-hf",
        load_in_8bit=True if train_config.quantization else None,
        device_map="auto" if train_config.quantization else None,
        cache_dir=os.path.join('/scratch', username),
        token=huggingface_api_key
)
#the code will output "Error displaying widget: model not found" it is not an error just the code failing to create a loading bar

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



## Load in your dataset

In [8]:
#add testset and rename current test set to validation set 

#edit file path to your unique dataset
dataset = load_dataset('csv', data_files='train_set.csv',split = 'train')
valset = load_dataset('csv', data_files='validation_set.csv',split = 'train')
testset = load_dataset('csv', data_files='test_set.csv',split = 'train')


#Sampling only for testing
sample_fraction = 1

num_samples = int(len(dataset) * sample_fraction)
num_s = int(len(valset) * sample_fraction)
num_sa = int(len(testset) * sample_fraction)

# Sample 1/10 of the data randomly
dataset = dataset.shuffle(seed=42).select(list(range(num_samples)))
valset = valset.shuffle(seed=42).select(list(range(num_s)))
testset = testset.shuffle(seed=42).select(list(range(num_sa)))

#Edit the prompt to tell the model what to do
prompt = (
    f"{{llama_prompt}}{{next_line}}{{eos_token}}"
)

#edit the variables in prompt.format to match your data: essentially what you what the model to read
def apply_prompt_template(sample):
    return {
        "text": prompt.format(
            llama_prompt = sample["llama_prompt"],
            next_line = sample["next_line"],
            eos_token=tokenizer.eos_token,
        )
    }

dataset = dataset.map(apply_prompt_template, remove_columns=list(dataset.features))
valset = valset.map(apply_prompt_template, remove_columns=list(valset.features))
testset = testset.map(apply_prompt_template, remove_columns=list(testset.features))

dataset = dataset.map(
    lambda sample: tokenizer(sample["text"]),
    batched=True,
    remove_columns=list(dataset.features), 
).map(Concatenator(), batched=True)
valset = valset.map(
    lambda sample: tokenizer(sample["text"]),
    batched=True,
    remove_columns=list(valset.features), 
).map(Concatenator(), batched=True)
testset = testset.map(
    lambda sample: tokenizer(sample["text"]),
    batched=True,
    remove_columns=list(testset.features), 
).map(Concatenator(), batched=True)


train_dataset = dataset
val_dataset = dataset
test_dataset = testset

Found cached dataset csv (/home/sdj5/.cache/huggingface/datasets/csv/default-5defd72c5a44f7d1/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Found cached dataset csv (/home/sdj5/.cache/huggingface/datasets/csv/default-d3211e4ef040aa2b/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Found cached dataset csv (/home/sdj5/.cache/huggingface/datasets/csv/default-9f4eae84c3258a64/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Loading cached shuffled indices for dataset at /home/sdj5/.cache/huggingface/datasets/csv/default-5defd72c5a44f7d1/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-85573b2a742d2739.arrow
Loading cached shuffled indices for dataset at /home/sdj5/.cache/huggingface/datasets/csv/default-d3211e4ef040aa2b/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-aae07c843831a232.arrow
Loading cached shuffled indices for dataset at /home/sdj5/.cache/hugging

Map:   0%|          | 0/32854 [00:00<?, ? examples/s]

Map:   0%|          | 0/9616 [00:00<?, ? examples/s]

Map:   0%|          | 0/9660 [00:00<?, ? examples/s]

Map:   0%|          | 0/32854 [00:00<?, ? examples/s]

Map:   0%|          | 0/32854 [00:00<?, ? examples/s]

Map:   0%|          | 0/9616 [00:00<?, ? examples/s]

Map:   0%|          | 0/9616 [00:00<?, ? examples/s]

Map:   0%|          | 0/9660 [00:00<?, ? examples/s]

Map:   0%|          | 0/9660 [00:00<?, ? examples/s]

In [25]:
testset = load_dataset('csv', data_files='test_set.csv',split = 'train')
sample_fraction = .3
num_sa = int(len(testset) * sample_fraction)
testset = testset.shuffle(seed=42).select(list(range(num_sa)))
testset = testset.map(apply_prompt_template, remove_columns=list(testset.features))
testset = testset.map(
    lambda sample: tokenizer(sample["text"]),
    batched=True,
    remove_columns=list(testset.features), 
).map(Concatenator(), batched=True)
tokenizer.decode(testset["labels"][1])

Found cached dataset csv (/home/sdj5/.cache/huggingface/datasets/csv/default-9f4eae84c3258a64/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)
Loading cached shuffled indices for dataset at /home/sdj5/.cache/huggingface/datasets/csv/default-9f4eae84c3258a64/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-e648dda52a599d9b.arrow
Loading cached processed dataset at /home/sdj5/.cache/huggingface/datasets/csv/default-9f4eae84c3258a64/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-e0f6e0836e353a7d.arrow
Loading cached processed dataset at /home/sdj5/.cache/huggingface/datasets/csv/default-9f4eae84c3258a64/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-cae86e3e8260cc69.arrow
Loading cached processed dataset at /home/sdj5/.cache/huggingface/datasets/csv/default-9f4eae84c3258a64/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1/cache-28427d9383226983.arrow


": \nQ. Dor.: The thirsty earth is broke with many a gap,\n \tAnd lands are lean where rivers do not run:\n\tWhere soul is reft from that it loveth best,\n\n\nNext line:\n[/INST]How can it thrive or boast of quiet rest?</s><s> [INST]<<SYS>>\nYou are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.\n<</SYS>>\n\nTitle: The Scottish History of James the Fourth\nYear: 1590\nAuthor: Robert Greene\nGenre: Romance\nCharacters in play: ['Q. Dor.', '1st Lady.', 'Lawyer.', '1st Revel.', 'Purv.', 'Bart.', 'K. of Eng.', 'Ross.', 'Scout.', 'Andrew.', 'Morton.', 'Slip.', 'All.', 'K. James.', 'Bohan.', 'Ida.', '1st Hunts.', 'Lady A.', 'Bishop.', 'Ober.', 'Jaques.', 'Doug.', 'Ateuk.', 'Merch.', 'Cuth.', 'Nano.', 'Divine.', 'C. of Arran.', 'Eust.']\nLines of play: \nK. James.: Andrew?

## Test the model before finetuning

In [4]:
#Edit eval_prompt to match your data
eval_prompt = """
"You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.


Title: Dido, Queen of Carthage
Year: 1585-1586
Author: Christopher Marlowe
Genre: Tragedy
Characters in play: ['Aeneas.', 'First Lord.', 'Cloan.', 'Dido.', 'Achat.', 'Nurse.', 'Serg.', 'Jup.', 'Ilio.', 'Venus.', 'Iarb.', 'nan', 'Asca.', 'Cupid.', 'Juno.', 'Anna.']
Lines of play: 
Jup.: Come, gentle Ganymede, and play with me:
 	I love thee well, say Juno what she will.
Gany.: I am much better for your worthless love,
 

Next line:
"

"""

model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))


"You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.


Title: Dido, Queen of Carthage
Year: 1585-1586
Author: Christopher Marlowe
Genre: Tragedy
Characters in play: ['Aeneas.', 'First Lord.', 'Cloan.', 'Dido.', 'Achat.', 'Nurse.', 'Serg.', 'Jup.', 'Ilio.', 'Venus.', 'Iarb.', 'nan', 'Asca.', 'Cupid.', 'Juno.', 'Anna.']
Lines of play: 
Jup.: Come, gentle Ganymede, and play with me:
 	I love thee well, say Juno what she will.
Gany.: I am much better for your worthless love,
 

Next line:
"

### Examples

```
Aeneas. 	Aeneas.
Juno. 	Juno.
Dido. 	Dido.
Cloan. 	Cloan.
Gany. 	Gany.
Jup. 	Jup.
Anna. 	Anna.
```

### Solution

```python
import re
import collections


class Solution:
    def


In [7]:
#Edit eval_prompt to match your data
eval_prompt = """
[INST]<<SYS>>
You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.
<</SYS>>

Title: Dido, Queen of Carthage
Year: 1585-1586
Author: Christopher Marlowe
Genre: Tragedy
Characters in play: ['Aeneas.', 'First Lord.', 'Cloan.', 'Dido.', 'Achat.', 'Nurse.', 'Serg.', 'Jup.', 'Ilio.', 'Venus.', 'Iarb.', 'nan', 'Asca.', 'Cupid.', 'Juno.', 'Anna.']
Lines of play: 
Jup.: Come, gentle Ganymede, and play with me:
 	I love thee well, say Juno what she will.
Gany.: I am much better for your worthless love,
 

Next line:
[/INST]"""

model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, do_sample=True)[0], skip_special_tokens=True))


[INST]<<SYS>>
You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.
<</SYS>>

Title: Dido, Queen of Carthage
Year: 1585-1586
Author: Christopher Marlowe
Genre: Tragedy
Characters in play: ['Aeneas.', 'First Lord.', 'Cloan.', 'Dido.', 'Achat.', 'Nurse.', 'Serg.', 'Jup.', 'Ilio.', 'Venus.', 'Iarb.', 'nan', 'Asca.', 'Cupid.', 'Juno.', 'Anna.']
Lines of play: 
Jup.: Come, gentle Ganymede, and play with me:
 	I love thee well, say Juno what she will.
Gany.: I am much better for your worthless love,
 

Next line:
[/INST]



## Enables Parameter Efficient Finetuning (PEFT)

In [9]:
#reduces the parameters needed to train
model.train()
def create_peft_config(model):
    from peft import (
        get_peft_model,
        LoraConfig,
        TaskType,
        prepare_model_for_int8_training,
    )

    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        r=8,
        lora_alpha=32,
        lora_dropout=0.05,
        bias= "none",
        target_modules = ["q_proj", "v_proj"]
    )
    
    kwargs = {
        'use_peft': True, 
        'peft_method': 'lora', 
        'quantization': True, 
        'use_fp16': True, 
        'model_name': os.path.join('/scratch', username, 'models--meta-llama--Llama-2-7b-hf/snapshots/6fdf2e60f86ff2481f2241aaee459f85b5b0bbb9'), 
        'output_dir': os.path.join('/scratch', username)
    }
    
    update_config((train_config, fsdp_config), **kwargs)
    
    model = prepare_model_for_int8_training(model)
    peft_config = generate_peft_config(train_config, kwargs)
    model = get_peft_model(model, peft_config)
    model.print_trainable_parameters()
    return model, peft_config

# create peft config
model, lora_config = create_peft_config(model)



trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.06220594176090199


In [11]:
torch.cuda.empty_cache()
from transformers import TrainerCallback
from contextlib import nullcontext
enable_profiler = False
output_dir = os.path.join('/scratch', username ,'llama-output')
#set up the configurations for training
config = {
    'lora_config': lora_config,
    'learning_rate': 1e-4,
    'num_train_epochs': 1,
    'gradient_accumulation_steps': 2,
    'per_device_train_batch_size': 2,
    'gradient_checkpointing': False,
}

# Set up profiler
if enable_profiler:
    wait, warmup, active, repeat = 1, 1, 2, 1
    total_steps = (wait + warmup + active) * (1 + repeat)
    schedule =  torch.profiler.schedule(wait=wait, warmup=warmup, active=active, repeat=repeat)
    profiler = torch.profiler.profile(
        schedule=schedule,
        on_trace_ready=torch.profiler.tensorboard_trace_handler(f"{output_dir}/logs/tensorboard"),
        record_shapes=True,
        profile_memory=True,
        with_stack=True)
    
    class ProfilerCallback(TrainerCallback):
        def __init__(self, profiler):
            self.profiler = profiler
            
        def on_step_end(self, *args, **kwargs):
            self.profiler.step()

    profiler_callback = ProfilerCallback(profiler)
else:
    profiler = nullcontext()

## Defines training arguments and trains the model

In [None]:
torch.cuda.empty_cache()
from transformers import default_data_collator, Trainer, TrainingArguments

# Define training args
training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    bf16=True, 
    logging_dir=f"{output_dir}/logs",
    logging_strategy="steps",
    evaluation_strategy="steps",
    logging_steps=20,  # 10
    save_strategy="steps",
    optim="adamw_torch_fused",
    auto_find_batch_size = True, 
    max_steps=total_steps if enable_profiler else -1,
    **{k:v for k,v in config.items() if k != 'lora_config'},
    remove_unused_columns=False,
    save_steps=20,
    save_total_limit=5,
    load_best_model_at_end=True
)

with profiler:
    # Create Trainer instance
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        data_collator=default_data_collator,
        callbacks=[profiler_callback] if enable_profiler else [],
    )
    
# Start training
trainer.train()

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss,Validation Loss


## Save the model to output directory

In [11]:
#output dirrectory of working model
output_dir = os.path.join('/scratch', username ,'llama-output')

#saves the model
model.save_pretrained(output_dir)

#Saves model configurations
model.config.to_json_file(os.path.join(output_dir, "config.json"))

#saves PEFT config
peft_config = model.peft_config
json_file_path = os.path.join(output_dir, "peft_config.json")

# Custom serialization function for LoraConfig objects
def lora_config_serializer(obj):
    if isinstance(obj, LoraConfig):
        # Return a dictionary representation of the LoraConfig object
        return obj.__dict__
    raise TypeError("Type not serializable")

# Write the dictionary to the JSON file using the custom serializer
with open(json_file_path, "w") as json_file:
    json.dump(peft_config, json_file, default=lora_config_serializer, indent=4)

## Test model on the same input as before

In [11]:
#Tests the model on the sample input from before
model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100)[0], skip_special_tokens=True))



"[INST]<<SYS>>
You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.
<</SYS>>

Title: Dido, Queen of Carthage
Year: 1585-1586
Author: Christopher Marlowe
Genre: Tragedy
Characters in play: ['Aeneas.', 'First Lord.', 'Cloan.', 'Dido.', 'Achat.', 'Nurse.', 'Serg.', 'Jup.', 'Ilio.', 'Venus.', 'Iarb.', 'nan', 'Asca.', 'Cupid.', 'Juno.', 'Anna.']
Lines of play: 
Jup.: Come, gentle Ganymede, and play with me:
 	I love thee well, say Juno what she will.
Gany.: I am much better for your worthless love,
 

Next line:
[/INST]"

INST>



## Load Saved Model

In [4]:
#load and test- get metrics and automatically show random 5 model input and output
#get this cell running on cold
output_dir = os.path.join('/scratch', username ,'llama-output')

model = AutoModelForCausalLM.from_pretrained(
        train_config.model_name,
        load_in_8bit=True if train_config.quantization else None,
        device_map="auto" if train_config.quantization else None,
)    

model = PeftModelForCausalLM.from_pretrained(model, output_dir)

# Assuming test_dataset is your test dataset and model is your trained model

from transformers import DataCollatorWithPadding
'''
import nltk
from nltk.translate.bleu_score import corpus_bleu

# Assuming references is a list of reference texts and hypotheses is a list of generated texts
def compute_bleu(references, hypotheses):
    # Compute BLEU score using nltk's corpus_bleu function
    bleu_score = corpus_bleu(references, hypotheses)
    return bleu_score


# Define data collator for evaluation
eval_data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Create Trainer instance for evaluation
eval_training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_eval_batch_size=8,  # Adjust batch size based on your system's memory
    remove_unused_columns=False,
    load_best_model_at_end=False,
)
eval_trainer = Trainer(
    model=model,
    args=eval_training_args,
    data_collator=eval_data_collator,
    compute_metrics=lambda p: {"bleu": compute_bleu(p)},
)

# Evaluate the model on test_dataset
evaluation_results = eval_trainer.predict(test_dataset)
'''
eval_prompt = """
[INST]<<SYS>>
You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.
<</SYS>>

Title: Dido, Queen of Carthage
Year: 1585-1586
Author: Christopher Marlowe
Genre: Tragedy
Characters in play: ['Aeneas.', 'First Lord.', 'Cloan.', 'Dido.', 'Achat.', 'Nurse.', 'Serg.', 'Jup.', 'Ilio.', 'Venus.', 'Iarb.', 'nan', 'Asca.', 'Cupid.', 'Juno.', 'Anna.']
Lines of play: 
Jup.: Come, gentle Ganymede, and play with me:
 	I love thee well, say Juno what she will.
Gany.: I am much better for your worthless love,
 

Next line:
[/INST]"""

model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, do_sample=True)[0], skip_special_tokens=True))

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]


[INST]<<SYS>>
You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.
<</SYS>>

Title: Dido, Queen of Carthage
Year: 1585-1586
Author: Christopher Marlowe
Genre: Tragedy
Characters in play: ['Aeneas.', 'First Lord.', 'Cloan.', 'Dido.', 'Achat.', 'Nurse.', 'Serg.', 'Jup.', 'Ilio.', 'Venus.', 'Iarb.', 'nan', 'Asca.', 'Cupid.', 'Juno.', 'Anna.']
Lines of play: 
Jup.: Come, gentle Ganymede, and play with me:
 	I love thee well, say Juno what she will.
Gany.: I am much better for your worthless love,
 

Next line:
[/INST]
Next line:
[INST]
You are the only one I do not despise.


In [6]:
#Edit eval_prompt to match your data
eval_prompt = """
[INST]<<SYS>>
You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.
<</SYS>>

Title: Love's Metamorphosis
Year: 1590
Author: John Lyly
Genre: Mythological
Characters in play: ['nan', 'Protea.', 'Silv.', 'Celia.', 'Fidelia.', 'Ceres.', 'Petul.', 'Mont.', 'Tirt.', 'Eris.', 'Ramis.', 'Siren.', 'Cupid.', 'Nisa.', 'Merch.', 'Niobe.']
Lines of play: 
Ramis.: I cannot see, Montanus, why it is fained by
 	the poets that Love sat upon the chaos and created
	the world, since in the world there is so little love.


Next line:
[/INST]"""

model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

model.eval()
with torch.no_grad():
    print(tokenizer.decode(model.generate(**model_input, max_new_tokens=100, do_sample=True)[0], skip_special_tokens=True))


[INST]<<SYS>>
You are an expert at predicting the next line of Elizabethan plays. Given the title of a play, its published year, author, genre, characters in the play, and three lines, generate the next line that follows. Only generate the next line with no other introduction or explanation.
<</SYS>>

Title: Love's Metamorphosis
Year: 1590
Author: John Lyly
Genre: Mythological
Characters in play: ['nan', 'Protea.', 'Silv.', 'Celia.', 'Fidelia.', 'Ceres.', 'Petul.', 'Mont.', 'Tirt.', 'Eris.', 'Ramis.', 'Siren.', 'Cupid.', 'Nisa.', 'Merch.', 'Niobe.']
Lines of play: 
Ramis.: I cannot see, Montanus, why it is fained by
 	the poets that Love sat upon the chaos and created
	the world, since in the world there is so little love.


Next line:
[/INST]Next line: 	
Cupid.: Nay, if you will not love, you shall not live.


In [15]:
import random

# Get random samples for display
num_samples_to_display = 5
random_sample_indices = random.sample(range(len(test_dataset)), num_samples_to_display)
random_samples = [test_dataset[idx] for idx in random_sample_indices]

# Generate and display summaries
for idx, sample in zip(random_sample_indices, random_samples):
    input_text = apply_prompt_template(sample)["text"]
    generated_summary = tokenizer.decode(evaluation_results.predictions[idx], skip_special_tokens=True)
    
    print(f"Sample {idx + 1}:")
    print(f"Input Dialogue:\n{sample['llama_prompt']}")
    print(f"Generated Summary:\n{generated_summary}\n{'-'*50}\n")


KeyError: 'llama_prompt'