In [None]:
import os
import json
from tqdm import tqdm  # Import tqdm for progress tracking

import torch
from datasets import load_dataset
from transformers import (
    LlamaForCausalLM,
    BitsAndBytesConfig,
    LlamaTokenizer,
    GenerationConfig,
)

In [3]:
model_name = 'meta-llama/Llama-2-7b-hf'

In [3]:
bnb_dict = {
    "load_in_4bit": True,
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_compute_dtype": 'float16',
    "bnb_4bit_use_double_quant": False,
}
bnb_config = BitsAndBytesConfig(**bnb_dict)

model = LlamaForCausalLM.from_pretrained(
    model_name,
    # device_map='cpu'
    quantization_config=bnb_config,
    device_map='cuda:0'
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
tokenizer = LlamaTokenizer.from_pretrained(model_name)
tokenizer

LlamaTokenizer(name_or_path='meta-llama/Llama-2-7b-hf', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}
)

In [11]:
dataset_name = 'GAIR/lima'
split = 'train'
dataset = load_dataset(dataset_name, split=split)
dataset[0]

{'conversations': ['Can brain cells move? By movement I mean long distance migration (preferably within the brain only).',
  'The question is relatively broad and one should take into account that the brain not only consists of neurons, but also glial cells (supportive cells) and pre-mitotic neuronal stem cells. Furthermore, as critical fellow-scientists have indicated, developmental stage is very important, as the developing embryonic brain is very different from the adult brain.\nHowever, after sifting through various publications, the answer to the question is actually remarkably simple: Yes, brain cells migrate.\nIn  the adult brain glial cells migrate in the brain (Klämbt, 2009). Glial cells are involved in a myriad of functions, but a notable example of migrating glial cells are the oligodendrocytes that migrate relative long distances to find their target axons onto which they wrap themselves to form the insulating myelin sheath (Tsai and Miller, 2002).\nNeuronal stem cells migr

In [None]:
split_dataset = dataset.train_test_split(test_size=0.2)
split_dataset['train']

In [6]:
generation_config = GenerationConfig(
    # max_new_tokens=10,
    max_length=2048,
    top_p=0.9,
    temperature=0.7,
    num_beams=1,
    top_k=None,
    do_sample=True,
    repetition_penalty=1.2,
)
generation_config

GenerationConfig {
  "do_sample": true,
  "max_length": 2048,
  "repetition_penalty": 1.2,
  "temperature": 0.7,
  "top_k": null,
  "top_p": 0.9
}

In [7]:
output_file = "./generated_outputs.json"

try:
    with open(output_file, "w", encoding="utf-8") as f:
        f.write("[\n")  # Start of JSON array
        success = False  # Flag to track successful writes

        with tqdm(total=len(dataset), desc="Generating Outputs", unit="sample") as pbar:
            for idx, prompt in enumerate(dataset):
                try:
                    tokenized_prompt = tokenizer.encode(
                        prompt["conversations"][0], return_tensors="pt"
                    ).to(device="cuda:0")

                    logits = model.generate(
                        tokenized_prompt, generation_config=generation_config
                    )
                    decoded_text = tokenizer.batch_decode(
                        logits, skip_special_tokens=True
                    )[0]

                    # Create a JSON entry
                    output_entry = {
                        "index": idx,
                        "original_prompt": prompt["conversations"][0],
                        "generated_output": decoded_text,
                    }

                    # Write to file immediately (JSON streaming)
                    json.dump(output_entry, f, indent=4, ensure_ascii=False)

                    # Add a comma for the next entry, except for the last one
                    if idx < len(dataset) - 1:
                        f.write(",\n")

                    success = (
                        True  # Mark that at least one entry was successfully written
                    )

                except Exception as e:
                    print(f"Error generating output for index {idx}: {e}")

                pbar.update(1)  # Update the progress bar after each sample

        f.write("\n]")  # End of JSON array

    if not success:
        print(
            "No outputs were successfully generated. Please check your model and dataset."
        )
    else:
        print(f"Saved generated outputs dynamically to {output_file}")

except Exception as e:
    print(f"Fatal error: Unable to write to file {output_file}. Error: {e}")

Generating Outputs:   0%|          | 0/300 [00:00<?, ?sample/s]

Generating Outputs: 100%|██████████| 300/300 [1:13:13<00:00, 14.65s/sample]

Saved generated outputs dynamically to ./generated_outputs.json



