In [None]:
%%capture #Remove this if you're having problems, it just makes these not print output
!pip uninstall transformers --yes #You'll need a special, different transformers
!pip install -q -U datasets peft trl bitsandbytes transformers[torch] scipy tqdm 

In [None]:
import os
from dataclasses import dataclass, field
from typing import Optional

import torch
from datasets import load_dataset
from datasets import load_from_disk
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
)
from tqdm.notebook import tqdm

from trl import SFTTrainer
from huggingface_hub import interpreter_login

In [None]:
interpreter_login()

In [None]:
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype='float16',
        bnb_4bit_use_double_quant=False,
    )
device_map = {"": 0}

In [None]:
#Download model
model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Llama-2-7b-hf", 
        quantization_config=bnb_config, 
        device_map=device_map, 
        use_auth_token=True
    )

In [None]:
model.config.pretraining_tp = 1 
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=.1,
    r=32,
    bias="none",
    task_type="CAUSAL_LM", 
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
training_arguments = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    save_steps=500, #CHANGE THIS IF YOU WANT IT TO SAVE LESS OFTEN. I WOULDN'T SAVE MORE OFTEN BECAUSE OF SPACE
    logging_steps=10,
    learning_rate=2e-4,
    fp16=False,
    bf16=True,
    max_grad_norm=.3,
    max_steps=10000,
    warmup_ratio=.03,
    group_by_length=True,
    lr_scheduler_type="constant",
)

In [None]:
model.config.use_cache = False

JSONL of 
{"text": "### Human: Can you write a short introduction about the relevance of the term \"monopsony\" in economics? Please use examples related to potential monopsonies in the labour market and cite relevant research.### Assistant: \"Monopsony\" refers to a market structure where there is only one buyer for a particular good or service. In economics, this term is particularly relevant in the labor market, where a monopsony employer has significant power over the wages and working conditions of their employees. The presence of a monopsony can result in lower wages and reduced employment opportunities for workers, as the employer has little incentive to increase wages or provide better working conditions.\n\nRecent research has identified potential monopsonies in industries such as retail and fast food, where a few large companies control a significant portion of the market (Bivens & Mishel, 2013). In these industries, workers often face low wages, limited benefits, and reduced bargaining power, leading to a situation where they are dependent on the employer for their livelihood. This dependence can result in further suppression of wages and a decline in working conditions.\n\nOverall, the concept of monopsony is essential to understanding the dynamics of labor markets and the impact of market power on workers. Further research is needed to understand the extent and impact of monopsonies on the economy and to develop policies to address this issue.\n\nReferences:\nBivens, J., & Mishel, L. (2013). The Pay of Corporate Executives and Financial Professionals as Evidence of Rents in Top 1 Percent Incomes. Journal of Economic Perspectives, 27(3), 57-78.### Human: Now explain it to a dog"}

In [None]:
dataset = load_dataset("json", data_files="data.jsonl", split="train")

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)

In [None]:
trainer.train()

#trainer.train(resume_from_checkpoint=True) # To resume

In [None]:
model.push_to_hub("YOURUSERNAME/WHATEVER", use_auth_token=True)

In [None]:
model = PeftModel.from_pretrained(model, "results/checkpoint-2000")

In [None]:
def make_prompt(entry):
    return f"### Human: YOUR INSTRUCTION HERE: {entry} ### Assistant:"

In [None]:
def run_model(entry):
    model_input = tokenizer(
        make_prompt(entry),
        return_tensors="pt").to("cuda")

    input_length = len(model_input['input_ids'][0])
    model.eval()
    with torch.no_grad():
        full_tokens = model.generate(**model_input, max_new_tokens=100)[0]
        decoded_tokens = tokenizer.decode(full_tokens[input_length:], skip_special_tokens=True)
    return decoded_tokens

In [None]:
run_model("Hello! Here's my example of an input you might test...")