In [1]:

from pathlib import Path
import matplotlib.pyplot as plt
import lightning as L
import torch
import torch.nn as nn
from lit_llama import model
import random
from lit_llama import LLaMA, Tokenizer
from lit_llama.utils import EmptyInitOnDevice, lazy_load, llama_model_lookup

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
fabric = L.Fabric(devices=1)
tokenizer_path: Path = Path("checkpoints/lit-llama/tokenizer.model")
tokenizer = Tokenizer(tokenizer_path)



In [3]:
import json
with open('datasets/alpaca_data_cleaned.json') as f:
    alpaca_json = json.load(f)

for item in alpaca_json:
    if(len(item['input']) == 0):
        item['input'] = ' '
    #print(f"input: [{item['input']}], length = {len(item['input'])}")
    

# Create tokenized j
alpaca_train_tokens = []
alpaca_test_tokens = []



for item in alpaca_json[:5176]:
    alpaca_train_tokens.append(
        {
            'instruction': tokenizer.encode(item['instruction'], bos=True, eos=False, device=fabric.device),
            'input': tokenizer.encode(item['input'], bos=False, eos=False, device=fabric.device),
            'output':tokenizer.encode(item['output'], bos=False, eos=True, device=fabric.device)
        }
    )

for item in alpaca_json[5176:]:
    alpaca_test_tokens.append(
        {
            'instruction': tokenizer.encode(item['instruction'], bos=True, eos=False, device=fabric.device),
            'input': tokenizer.encode(item['input'], bos=False, eos=False, device=fabric.device),
            'output':tokenizer.encode(item['output'], bos=False, eos=True, device=fabric.device)
        }
    )

In [4]:
checkpoint_path: Path = Path("checkpoints/lit-llama/7B/lit-llama.pth")
tokenizer_path: Path = Path("checkpoints/lit-llama/tokenizer.model")

dtype = torch.bfloat16 if fabric.device.type == "cuda" and torch.cuda.is_bf16_supported() else torch.float32


def load_LLaMA(checkpoint_path):
    with lazy_load(checkpoint_path) as checkpoint:
        name = llama_model_lookup(checkpoint)

        with EmptyInitOnDevice(
                device=fabric.device, dtype=dtype, quantization_mode=None # We won't quantize the weights
        ):
            model = LLaMA.from_name(name)

        model.load_state_dict(checkpoint)
    return model

In [5]:

LLaMA_config = model.LLaMAConfig.from_name('7B')
print('Loading models...')
# Load the LLaMa model and the IST generator (also a LLaMA model)
LLamaModel = load_LLaMA(checkpoint_path).to(fabric.device)
#LLamaModel = LLaMA(LLaMA_config).to(fabric.device)
print('Finished loading the first model')
print('Finished loading models')
tokenizer = Tokenizer(tokenizer_path)

IST_schemes = ['vanilla', 'last 4', '2nd to last', 'all layers']
scheme_losses = {}

IST_generator = model.Block(LLaMA_config)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(IST_generator.parameters(), lr=1e-4)
IST_generator = IST_generator.to(fabric.device)

for param in LLamaModel.parameters():
    param.requires_grad=False

Loading models...
Finished loading the first model
Finished loading models


In [6]:
def get_single_example(dataset, index=None):
    if(index is None):
        index = random.sample(range(len(dataset)), k=1)[0]
    # IST
    IST = IST_generator(LLamaModel(dataset[index]['instruction'].unsqueeze(0).to(fabric.device))[1])[:,-1,:]

    # Question
    question = LLamaModel.transformer.wte(dataset[index]['input'].unsqueeze(0).to(fabric.device)).squeeze()

    # Answer fragment
    answer_len = dataset[index]['output'].size(0)
    trunc_len = random.randint(0,answer_len-1)
    #print(answer_len)
    #print(trunc_len)

    truncated_answer = dataset[index]['output'][:trunc_len]
    truncated_answer = LLamaModel.transformer.wte(truncated_answer)
    
    target_tokens = torch.cat([dataset[index]['input'], dataset[index]['output'][:trunc_len+1]])
    #print(tokenizer.decode(target_tokens))

    if(question.dim() == 1):
        question = question.unsqueeze(0)

    if(truncated_answer.dim() == 1):
        truncated_answer = truncated_answer.unsqueeze(0)

    llama_input = torch.cat([IST,question,truncated_answer])
    return llama_input.unsqueeze(0), target_tokens.type(torch.LongTensor).unsqueeze(0)
    
    

In [7]:
train_losses = []
test_losses = []

In [8]:
import wandb
learning_rate = 1e-4
batch_size=32
trainset_size=len(alpaca_train_tokens)
testset_size=len(alpaca_test_tokens)

config = {
    'batch_size': batch_size,
    'trainset_size': trainset_size,
    'testset_size':testset_size,
}

# init wandb
wandb.init(
    project='Alpaca instruction tuning',
    config=config,
    name='Fix bos/eos issue'
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mandrew-zeng[0m ([33msmalllanguagemodels[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
optimizer = torch.optim.Adam(IST_generator.parameters(), lr=1e-4)

In [10]:
IST_generator.load_state_dict(torch.load("new_instructtunedweights_3.pt"))

<All keys matched successfully>

In [11]:
import gc

In [102]:
loss_fn = nn.CrossEntropyLoss()
for param in LLamaModel.parameters():
    param.requires_grad=False

batch_size=32
LLamaModel.eval()

for epoch in range(30):
    indices = list(range(trainset_size))
    random.shuffle(indices)
    epoch_train_loss = 0
    
    while(len(indices) >= batch_size):
        batch_indices = indices[:batch_size]
        indices = indices[batch_size:]
        batch_loss = 0
        IST_generator.train()
        for i in range(batch_size):
            input, target = get_single_example(alpaca_train_tokens, index=batch_indices[i])
            llama_output = LLamaModel.forward_embeddings(input.type(torch.bfloat16))[0]
            loss = loss_fn(llama_output.squeeze().to(fabric.device), target.squeeze().to(fabric.device))
            loss.backward()
            batch_loss += loss.item()
            del loss
        gc.collect()

        
        batch_loss /= batch_size

        optimizer.step()
        optimizer.zero_grad()
        train_losses.append(batch_loss)
        epoch_train_loss += batch_loss

        IST_generator.eval()
        # validation:
        with torch.no_grad():
            batch_loss = 0
            for i in range(batch_size):
                input, target = get_single_example(alpaca_test_tokens)
                llama_output = LLamaModel.forward_embeddings(input.type(torch.bfloat16))[0]
                loss = loss_fn(llama_output.squeeze().to(fabric.device), target.squeeze().to(fabric.device))
                batch_loss += loss.item()
                del loss
            gc.collect()
            
            batch_loss /= batch_size

            test_losses.append(batch_loss)
            
        print(f'epoch {epoch}, train loss = {train_losses[-1]}, validation loss={test_losses[-1]}')
        wandb.log({'batch train loss':train_losses[-1], 'batch validation loss':test_losses[-1], 'learning rate': optimizer.param_groups[0]['lr']})



        

In [77]:
torch.save(IST_generator.state_dict(), "new_instructtunedweights_4.pt")

In [56]:
IST_generator.load_state_dict(torch.load("new_instructtunedweights_3.pt"))

<All keys matched successfully>

In [57]:
def generate(model, tokenizer, prompt, IST=None, max_new_tokens=200):
  
    generated = ''
    tokenized_input = tokenizer.encode(prompt).to(fabric.device)
    with torch.no_grad():

        for _ in range(max_new_tokens):
            last_logits = model(tokenized_input.unsqueeze(0), IST.type(torch.bfloat16))[0][:,-1,:]
            new_token = torch.argmax(last_logits, dim=1)
            if(new_token == 2 and _ >= 1): #eos
                break
            generated += tokenizer.decode(new_token)
            tokenized_input = torch.cat([tokenized_input, new_token])

    #print(tokenizer.decode(tokenized_input))
    return tokenized_input, tokenizer.decode(tokenized_input)[len(prompt):]

In [58]:
def get_IST(string):
    tokens = tokenizer.encode(string).unsqueeze(0).type(torch.LongTensor).to(fabric.device)
    x = LLamaModel(tokens)[1]
    x = IST_generator(x)
    return x[:,-1,:]

In [100]:
generated = generate(LLamaModel, tokenizer, prompt="", IST=get_IST("Fill in the blank: A bird in the hand is worth ____ in the bush."))

In [101]:
generated[1]

'The answer to the question "What is the name of the famous American comedian who was known for his catchphrase \'I\'m rich\'?" is "Jerry Lewis".'

In [53]:
testing_json = []

for index, item in enumerate(alpaca_json[:5176]):
    context = item['instruction']
    question = item['input']
    answer = item['output']
    _, out = generate(LLamaModel, tokenizer, question, IST=get_IST(context),max_new_tokens=20)
    print(out)
    testing_json.append({'instruction':context, 'input':question, 'model_output': out, 'ground_truth': answer})
    print(index)

# 1999–2000 in English football

The 19
0
# 1990–91 in English football

The 1990
1
The Periodic Table of Elements is a chart that organizes all of the known chemical elements into
2
# 1998–99 in English football

The 1998
3
# 1999–2000 in English football

The 19
4
▶️ 100% FREE SHIPPING on all orders!
▶️
5
 is equal to 0.25.
4/16 = 0.25
6
#include <iostream>
#include <string>
#include <vector>
#include
7
. He finished his meal and left the restaurant.
The sentence is correct.

Comment
8
Tags: python, pandas, dataframe

Question: How to create a new column in a dataframe
9
The capital of France is Paris. The city is located in the north of the country, on the
10
# 1999–2000 in English football

The 19
11
The Great Depression was a severe economic downturn in the United States that lasted from 
12

The oak tree is a deciduous tree that is native to the temperate regions of the
13
# 1999–2000-es magyar labdarúgó-
14
# 1998–99 in English football

The 1998
15



def factorialize(num):
  f

KeyboardInterrupt: 

In [57]:
import json

with open('alpaca_outputs.json', 'w') as f:
    json.dump(testing_json, f,indent=2)