In [4]:
!huggingface-cli login

/bin/bash: huggingface-cli: command not found


## Loading Llama2 7b tokenizer and model

In [16]:
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import init_empty_weights
from transformers import BitsAndBytesConfig
import torch

# Define the model name and cache directory
model_name = "meta-llama/Llama-2-7b-hf"  # LLaMA 2-7B model
cache_dir = "/scratch/gilbreth/anand173/model_cache"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_use_double_quant=True,  # Enable double quantization for memory savings
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use bfloat16 for computation
)

# Load the tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)

# Load the model with 4-bit quantization and device map
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",  # Automatically allocate model layers across GPU/CPU
    cache_dir=cache_dir,
)

# Ensure the pad token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

print("Model and tokenizer loaded successfully!")

Loading tokenizer...
Loading model...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Model and tokenizer loaded successfully!


## Zero shot example

In [3]:
import time

# Example review to classify
review = "Item was delivered on time and was a direct replcement"

# Format prompt for classification
prompt = f"""### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
{review}

### Response:"""

# Start timing
start_time = time.time()

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
).to("cuda")  # Send input tensors to GPU

# Generate the output
print("Generating classification response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=10,           # Limit the response length
    eos_token_id=tokenizer.eos_token_id
)

# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(f"Predicted Label: {response}")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Generating classification response...
Predicted Label: ### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
Item was delivered on time and was a direct replcement

### Response:
Correct Size/Just Right

##
Runtime: 0.87 seconds


## Few Shot example

In [5]:
# Few-shot examples for the classification task
few_shot_examples = """
### Examples:
1. Review: "order came quickly and is working fine and is much better price than going to Lowe's or Home Depot to purchase."
   Classification -> No Comment
2. Review: "Perfect Fit - Ideal for when you don't need to replace an otherwise good OEM axle. Fits all FWD/AWD Volvo 850   S/V70 '93-'00"
   Classification -> Correct Size/Just Right
3. Review: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
   Classification -> Wrong Size
"""

# Example review to classify
review = "Have not had to use it yet - but I know how handy it is to have it available. Thanks"

# Format prompt with few-shot examples
prompt = f"""### Instruction:
Classify the following autoparts review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

{few_shot_examples}

### Input:
Review: "{review}"
### Response:
"""
# Start timing
start_time = time.time()

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
).to("cuda")  # Send input tensors to GPU

# Generate the output
print("Generating classification response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=10,           # Limit the response length
    eos_token_id=tokenizer.eos_token_id
)

# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(f"Predicted Label: {response}")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Generating classification response...
Predicted Label: ### Instruction:
Classify the following autoparts review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:


### Examples:
1. Review: "order came quickly and is working fine and is much better price than going to Lowe's or Home Depot to purchase."
   Classification -> No Comment
2. Review: "Perfect Fit - Ideal for when you don't need to replace an otherwise good OEM axle. Fits all FWD/AWD Volvo 850   S/V70 '93-'00"
   Classification -> Correct Size/Just Right
3. Review: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
   Classification -> Wrong Size


### Input:
Review: "Have not had to use it yet - but I know how handy it is to have it available. Thanks"
### Response:
Correct Size/Just Right


##
Runtime: 0.76 seconds


## First 10 reviews prompt version 1

In [6]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_10.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 10:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""### Instruction:
Classify the following review into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment". Please respond only with the category name.

### Categories:
- "Correct Size/Just Right": The product fits as expected and performs its intended function without issues.
- "Wrong Size": The product does not fit or requires modifications to work correctly.
- "No Comment": The review does not mention size or fitting issues.

### Examples:
1. Review: "order came quickly and is working fine and is much better price than going to Lowe's or Home Depot to purchase."
   Classification -> No Comment
2. Review: "Perfect Fit - Ideal for when you don't need to replace an otherwise good OEM axle. Fits all FWD/AWD Volvo 850   S/V70 '93-'00"
   Classification -> Correct Size/Just Right
3. Review: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
   Classification -> Wrong Size

### Input:
Review: "{review}"
### Response:
"""

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=10,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 10 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Predictions for the first 10 reviews saved to fit_predictions_first_10.csv.
Runtime: 8.13 seconds


In [7]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_10.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(10)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                          ReviewText                FINAL Fit  \
0  You will have to remove the window which is ve...               No Comment   
1   It does what it is supposed to! Sure it does ...               No Comment   
2  Item was delivered on time and was a direct re...               No Comment   
3   This was a really great part, shipped fast, a...               No Comment   
4  This puller worked getting off a stubborn wipe...               No Comment   
5   Works ok, really just a quick adapter as the ...               No Comment   
6  Shipped really fast.  I've had it about a mont...  Correct Size/Just Right   
7   I don't love this, it's just a battery.  But ...               No Comment   
8   It only last 1 year and i couldn't find where...               No Comment   
9   The vendor immediately phoned me, listened ca...  Correct Size/Just Right   

  PredictedLabel  Match  
0     Wrong Size  False  
1     Wrong Size  Fa

## First 10 reviews prompt v2

In [8]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_10_promptv2.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 10:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""nstruction: Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
Examples:
1. "Fits perfectly and works well with my setup." -> Correct Size/Just Right
2. "I had to modify it to make it fit my device." -> Wrong Size
3. "Shipped on time and is good quality." -> No Comment
Review: {review}
Response:
"""

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=10,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 10 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Predictions for the first 10 reviews saved to fit_predictions_first_10_promptv2.csv.
Runtime: 8.01 seconds


In [9]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_10_promptv2.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(10)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                          ReviewText                FINAL Fit  \
0  You will have to remove the window which is ve...               No Comment   
1   It does what it is supposed to! Sure it does ...               No Comment   
2  Item was delivered on time and was a direct re...               No Comment   
3   This was a really great part, shipped fast, a...               No Comment   
4  This puller worked getting off a stubborn wipe...               No Comment   
5   Works ok, really just a quick adapter as the ...               No Comment   
6  Shipped really fast.  I've had it about a mont...  Correct Size/Just Right   
7   I don't love this, it's just a battery.  But ...               No Comment   
8   It only last 1 year and i couldn't find where...               No Comment   
9   The vendor immediately phoned me, listened ca...  Correct Size/Just Right   

  PredictedLabel  Match  
0     No Comment   True  
1     No Comment   T

## First 50 reviews

In [10]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_50.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 50:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""Instruction: Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
Examples:
1. "Fits perfectly and works well with my setup." -> Correct Size/Just Right
2. "I had to modify it to make it fit my device." -> Wrong Size
3. "Shipped on time and is good quality." -> No Comment
Review: {review}
Response:
"""

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=10,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 50 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Predictions for the first 50 reviews saved to fit_predictions_first_50.csv.
Runtime: 35.33 seconds


In [11]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_50.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(50)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                           ReviewText  \
0   You will have to remove the window which is ve...   
1    It does what it is supposed to! Sure it does ...   
2   Item was delivered on time and was a direct re...   
3    This was a really great part, shipped fast, a...   
4   This puller worked getting off a stubborn wipe...   
5    Works ok, really just a quick adapter as the ...   
6   Shipped really fast.  I've had it about a mont...   
7    I don't love this, it's just a battery.  But ...   
8    It only last 1 year and i couldn't find where...   
9    The vendor immediately phoned me, listened ca...   
10  Battery fired up on the first try and couldn't...   
11  Everyone else was sold out... our kids love ri...   
12   If quality matters to you, and you are lookin...   
13   This battery does not have F2 terminals. They...   
14  Looked far and wide for this battery as a repl...   
15   Husband bought this for his motorcycle, and l..

## Few-Shot first 500 reviews

In [12]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_500.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 500:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""Instruction: Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
Examples:
1. "Fits perfectly and works well with my setup." -> Correct Size/Just Right
2. "I had to modify it to make it fit my device." -> Wrong Size
3. "Shipped on time and is good quality." -> No Comment
Review: {review}
Response:
"""

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=1024
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=10,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 50 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_500.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(500)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Predictions for the first 50 reviews saved to fit_predictions_first_500.csv.
Runtime: 359.05 seconds
Comparison of Predicted vs. Actual:
                                            ReviewText  \
0    You will have to remove the window which is ve...   
1     It does what it is supposed to! Sure it does ...   
2    Item was delivered on time and was a direct re...   
3     This was a really great part, shipped fast, a...   
4    This puller worked getting off a stubborn wipe...   
..                                                 ...   
495   If you have the rear defrost then this is not...   
496   It's just a bit noisier than the factory Bosc...   
497  This is the 3rd radiator my husband has ordere...   
498   This was very easy to install and a necessary...   
499   This liquid epoxy product repaired the crack ...   

                   FINAL Fit PredictedLabel  Match  
0                 No Comment     No Comment   True  
1                 No Comment     No Comment   True  
2      

## Training Approach

In [19]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType
from datasets import Dataset
import pandas as pd

# Load tokenizer and quantized model
bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_use_double_quant=True)

# Configure LoRA
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    r=16,  # Rank of the LoRA adapter
    lora_alpha=32,
    lora_dropout=0.1,
)

# Attach LoRA adapters to the model
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()  # Verify which parameters are trainable

trainable params: 8,388,608 || all params: 6,746,804,224 || trainable%: 0.1243


In [20]:
# Load and preprocess dataset
data = pd.read_csv("fit.csv")

# Select a subset (e.g., 1,000 rows)
subset = data.head(100)

# Format the dataset for fine-tuning
subset["text"] = (
    "Instruction: Classify the following review into one of the categories: "
    '"Correct Size/Just Right," "Wrong Size," or "No Comment." '
    "Review: " + subset["ReviewText"]
)
subset["label"] = subset["FINAL Fit"]

# Convert to Hugging Face Dataset
hf_dataset = Dataset.from_pandas(subset[["text", "label"]])

# Tokenize the dataset
def preprocess_data(batch):
    inputs = tokenizer(batch["text"], truncation=True, padding="max_length", max_length=128, return_tensors="pt")
    inputs["labels"] = tokenizer(batch["label"], truncation=True, padding="max_length", max_length=128, return_tensors="pt")["input_ids"]
    return inputs

tokenized_dataset = hf_dataset.map(preprocess_data, batched=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset["text"] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  subset["label"] = subset["FINAL Fit"]


Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [23]:
from transformers import TrainingArguments, Trainer
from peft import PeftModel

from peft import get_peft_model, LoraConfig, TaskType

# Define LoRA configuration
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,  # Task type for causal language modeling
    inference_mode=False,          # Enable training mode
    r=8,                           # LoRA rank
    lora_alpha=16,                 # LoRA alpha
    lora_dropout=0.1,              # LoRA dropout
)

# Attach LoRA adapters to the model
model = get_peft_model(model, peft_config)

# Verify trainable parameters
print("Trainable Parameters:")
model.print_trainable_parameters()  # Only LoRA layers should be trainable

# Define training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,  # Adjust based on your GPU memory
    gradient_accumulation_steps=4,  # Accumulate gradients to simulate larger batch sizes
    num_train_epochs=3,             # Define number of epochs
    learning_rate=5e-5,             # Fine-tuning learning rate
    fp16=True,                      # Enable mixed precision for efficiency
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
)

# Initialize Trainer
trainer = Trainer(
    model=model,                   # Quantized model with LoRA adapters
    args=training_args,            # Training arguments
    train_dataset=tokenized_dataset,  # Tokenized dataset from Step 2
)

# Fine-tune the model
print("Starting fine-tuning...")
trainer.train()

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Trainable Parameters:
trainable params: 4,194,304 || all params: 6,742,609,920 || trainable%: 0.0622


ValueError: You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more details

## Stratified Approach

In [46]:
import pandas as pd

# Load dataset
file_path = "fit.csv"
data = pd.read_csv(file_path)

# Define categories
categories = ["Correct Size/Just Right", "Wrong Size", "No Comment"]

# Stratified sampling to pick 2 examples per category
few_shot_examples = {}
for category in categories:
    category_examples = data[data["FINAL Fit"] == category].sample(n=2, random_state=42)
    few_shot_examples[category] = category_examples

# Format examples for few-shot prompting
few_shot_prompt = ""
for category, examples in few_shot_examples.items():
    for _, row in examples.iterrows():
        review_text = row["ReviewText"]
        few_shot_prompt += f'Review: "{review_text}"\nClassification -> {category}\n\n'

# Format the final prompt structure
def format_prompt(review, few_shot_prompt):
    return f"""Instruction: Classify the following review into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment". Please respond only with the category name.
Few-shot Examples:
{few_shot_prompt}
Input:
Review: "{review}"
Response:
"""

# Define output file
output_file = "fit_predictions_with_few_shot.csv"

# Prepare to write predictions
with open(output_file, "w") as out_csv:
    out_csv.write("ReviewText,PredictedLabel\n")  # Write headers

    for _, row in data.iterrows():
        review_text = row["ReviewText"]

        # Generate the few-shot prompt
        prompt = format_prompt(review_text, few_shot_prompt)

        # Tokenize the input
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            padding=True,
        ).to("cuda")  # Ensure inputs are on GPU

        # Generate prediction
        outputs = model.generate(
            **inputs,
            max_new_tokens=10,
            eos_token_id=tokenizer.eos_token_id,
        )

        # Decode the response
        predicted_label = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # Write result to file
        out_csv.write(f'"{review_text}","{predicted_label}"\n')

print(f"Predictions saved to {output_file}.")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Predictions saved to fit_predictions_with_few_shot.csv.
