In [13]:
import os
import sys
# Get the absolute path of the project directory
project_root = os.path.abspath(os.path.join(os.path.join(os.getcwd()), ".."))
# Add the project root to sys.path
sys.path.insert(0, project_root)

In [14]:

import random
from argparse import ArgumentParser
import logging

import torch
from trl import SFTConfig, SFTTrainer

from lima_dataset import load_lima_dataset, tokenize_text, format_prompt_func, EOT_TOKEN
from utils import (
    read_yaml,
    get_model_config,
    get_tokenizer_config,
    get_generation_config,
    get_generation_samples,
)
from model import (
    load_model,
    load_tokenizer,
    generate,
)

In [15]:
# config = read_yaml("./configs/generate_config_llama.yaml")
config = read_yaml("../configs/generate_config_llama_qlora.yaml")

In [16]:
tokenizer_name, tokenizer_path, tokenizer_config = get_tokenizer_config(config)
tokenizer = load_tokenizer(
    tokenizer_name=tokenizer_name,
    tokenizer_path=tokenizer_path,
    tokenizer_config=tokenizer_config,
)
tokenizer_name, tokenizer_path, tokenizer_config

('llama2',
 'meta-llama/Llama-2-7b-hf',
 {'add_bos_token': True, 'add_eos_token': False})

In [17]:
model_name, model_path, base_model_path, model_config = get_model_config(
    config,
    pad_token_id=tokenizer.pad_token_id,
    tokenizer_length=len(tokenizer),
)
model_config

{'force_download': False,
 'device_map': 'cuda:0',
 'use_cache': False,
 'pad_token_id': 32000,
 'tokenizer_length': 32002}

In [18]:
model = load_model(
    model_string=model_name,
    model_path=model_path,
    base_model_path=base_model_path,
    model_config=model_config,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [19]:
config = read_yaml('../configs/generate_config_llama_qlora.yaml')

In [20]:
generation_config = get_generation_config(config)
generation_config["pad_token_id"] = tokenizer.pad_token_id
generation_config["eos_token_id"] = tokenizer.eos_token_id
# generation_config['max_new_tokens'] = 1024

In [21]:
samples = get_generation_samples(config)
samples

['What is reinforcement learning?',
 'Explain black hole singularity.',
 'Describe the role of mitochondria.']

In [25]:
# prompt = "I'm writing a NeurIPS paper about a new model architecture for processing and generating long texts. Here are some facts about the paper:\n* The main trick is to replace some of the attention heads with an exponential moving average, where the decay rate is learned for each head. We call this architecture ExeMA.\n* On language modeling, the perplexity difference between our model and a vanilla transformer is negligible, but that's because next-token prediction is almost always a local task, so perplexity won't be sensitive enough to detect any improvements in long-range understanding.\n* However, on the SCROLLS benchmark, our model improves by 10% over the baseline.\n* We also have a new metric for measuring coherence in generated text (CoGnaTe), where our model generates text that is 43% more coherent than the baseline.\nHelp me write the paper's introduction."
# # prompt = "Plan a day trip in Tokyo. The spots need to be within walking distance to each other."
# prompt = "What medicine should I take when I get a cold?"
# # prompt = f"{prompt}{EOT_TOKEN}"

outs = generate(
    model,
    tokenizer,
    # prompt_samples=prompt,
    prompt_samples=samples,
    generation_config=generation_config,
    use_encode=False,
    use_eot_token=False,
)

In [26]:
print(outs[0])

What is reinforcement learning?
Reinforcement learning is the branch of machine learning that helps machines learn from experiences.
Reinforcement learning is the branch of machine learning that helps machines learn from experiences. It is used to teach machines to make decisions by providing feedback about whether or not those decisions are good.
In reinforcement learning, the algorithm receives input from the environment, such as the position of a player in a video game or the temperature of a room. The algorithm then decides what action to take next based on that information and the goal of the game.
There are two types of reinforcement learning: supervised and unsupervised. Supervised learning is when the algorithm receives input from an expert or a teacher who provides feedback about whether or not its decisions are good. Unsupervised learning is when there is no expert or teacher, so the algorithm must figure out what is good and bad on its own.
Reinforcement learning has many ap

In [28]:
print(outs[1])

Explain black hole singularity.
I want to know about singularity and why we have to accept it?
Asked by Rishabh Kumar | 13th Mar, 2015, 04:06: PM
Singularities are points in space and time where physical quantities such as density or temperature diverge. This happens because of the extreme nature of the spacetime curvature at these points.
We cannot understand the behavior of matter at singularities because at this point the laws of physics breaks down. So we have to accept it.
