In [1]:
from datasets import load_dataset, DatasetDict
from config import HUGGING_FACE_TOKEN as token
from torch import cuda
import os
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    GPTQConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

cache_dir = "models/"
model_name = "meta-llama/Llama-2-7b-chat-hf"

# Load Yahoo Answers Topics dataset
dataset = load_dataset("yahoo_answers_topics")

ImportError: cannot import name 'GPTQConfig' from 'transformers' (/home/exouser/anaconda3/envs/llama/lib/python3.10/site-packages/transformers/__init__.py)

In [None]:
# Use a suitable tokenizer from Hugging Face
tokenizer = AutoTokenizer.from_pretrained(model_name, token=token, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

In [None]:
# import torch
# import transformers

# Quantization to load an LLM with less GPU memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # 4-bit quantization
    bnb_4bit_quant_type='nf4',  # Normalized float 4
    bnb_4bit_use_double_quant=True,  # Second quantization after the first
    bnb_4bit_compute_dtype="float16"  # Computation type
)

In [None]:
#!pip install peft
#!pip install trl

In [None]:
# from transformers import AutoModelForCausalLM, TrainingArguments, Trainer
from peft import get_peft_model
# from trl import SFTTrainer

# device = torch.device("cuda" if torch.cuda.is_available() else "CPU")

device_map = {"": 0}

# Load the model for sequence classification
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    num_labels=20, 
    quantization_config=bnb_config,
    token=token,
    device_map=device_map,
    trust_remote_code=True,
    cache_dir=cache_dir
)

model.config.use_cache = False
# More info: https://github.com/huggingface/transformers/pull/24906
model.config.pretraining_tp = 1 

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

lora_model = get_peft_model(model, peft_config)
lora_model.print_trainable_parameters()

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    optim="paged_adamw_32bit",
    gradient_accumulation_steps=16,
    num_train_epochs=1,
    save_steps=300,
    logging_steps=100,
    eval_steps=300,
    remove_unused_columns=False
)

# Create a Trainer instance
trainer = SFTTrainer(
    model=lora_model,
    args=training_args,
    peft_config=peft_config,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    dataset_text_field="best_answer",
    max_seq_length=512,
    tokenizer=tokenizer,
    packing=False
)

# Train the model
trainer.train()

In [None]:
import os
output_dir = os.path.join(output_dir, "final_checkpoint")
trainer.model.save_pretrained(output_dir)

In [None]:
# Define a function to get predictions
def predict_topic(text, model, tokenizer):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding='max_length', max_length=512)
    outputs = model(**inputs)
    predictions = torch.argmax(outputs.logits, dim=1)
    return predictions

# Example of predicting topics for unseen text
unseen_text = "Your unseen text goes here."
predicted_topic = predict_topic(unseen_text, model, tokenizer)
print(f"Predicted Topic: {predicted_topic}")


In [None]:
# Assuming you have the unseen text in a variable named 'transcript'
predicted_topic = predict_topic(transcript, model, tokenizer)
print(f"Predicted Topic: {predicted_topic}")