## Imports

In [4]:
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import init_empty_weights
from transformers import BitsAndBytesConfig
import torch
import csv
import pandas as pd

## Loading Model Llama-3.1-8B-Instruct and Tokenizer

In [2]:
# Define the model name and cache directory
model_name = "meta-llama/Llama-3.1-8B-Instruct"
cache_dir = "/scratch/gilbreth/anand173/model_cache"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_use_double_quant=True,  # Enable double quantization for memory savings
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use bfloat16 for computation
)

# Load the tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)

# Load the model with 4-bit quantization and device map
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",  # Automatically allocate model layers across GPU/CPU
    cache_dir=cache_dir,
)

# Ensure the pad token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

print("Model and tokenizer loaded successfully!")

Loading tokenizer...
Loading model...




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model and tokenizer loaded successfully!


## Zero Shot Model Test

In [3]:
# Example review to classify
review = "Product was delivered on time, and works well with my Yamaha Vstar 1100CC. Fits right in had no issues starting."

# Format prompt for classification
prompt = f"""### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
{review}

### Response:"""

# Start timing
start_time = time.time()

# Update the eos_token and synchronize it with the model
tokenizer.eos_token = "</end>"
model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
).to("cuda")  # Send input tensors to GPU


# Generate the output
print("Generating classification response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=3,           # Limit the response length
    eos_token_id=tokenizer.eos_token_id
)

# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(f"Predicted Label: {response}")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Generating classification response...
Predicted Label: ### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
Product was delivered on time, and works well with my Yamaha Vstar 1100CC. Fits right in had no issues starting.

### Response: 
Correct Size
Runtime: 1.39 seconds


## Predicting fit

In [10]:
# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "results/fit_predictions_full_dataset.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            review = row["ReviewText"]

            prompt = f"""### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    Correct Size/Just Right: I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it.
    Correct Size/Just Right: I have even used this to start my dodge 2500 which has a heavy duty battery for starting and it worked great. The light pulls out to expose a 12v car adapter socket.
    Correct Size/Just Right: Perfect fit and the price is right. So far so good, I expect it will last a long long time.
    Correct Size/Just Right: the belt  came in time and fit properly did not have any issues when installing it i would buy again
    Wrong Size: It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it.
    Wrong Size: two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery.
    Wrong Size: I ordered this product for my car   while Amazon.com says this will fit my 1996 Nissan Sentra GXE auto transmission, when the part arrived, I knew it wouldn't fit. I won't order parts online again.
    Wrong Size: they could be more specific as to which model its used for.
    No Comment: I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price.
    No Comment: Got the product and was as advertised. Have not yet had time to install. Does not look cheap and product was not damaged.
    No Comment: This Is a great product and its even stronger than de factory one my Jeep Grand Cherokee came with.... Recommend this product
    No Comment: Good Quality Great Price, unit seem to be a bit more sturdy than the Trico or Anco blades I replaced.  Like the price and quality.

    ### Review:
    {review}
    ### Response:
    """

            # Update the eos_token and synchronize it with the model
            tokenizer.eos_token = "</end>"
            model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=2000
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=3,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # print(response)

            # Extract the category after "### Response:"
            if "### Response:" in response:
                category = response.split("### Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

print(f"Predictions for the first 500 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Wrong Size
Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off, so

Because of unequal category tokenization lengths, model predicts "Correct Size/" in place of "Correct Size/Just Right". This was done using max_new_tokens = 3 in the output parameter setting. Increasing this value made the model pad the response using the first few characteristics of the prompt itself making it difficult for accuracy comparison. It would help to standardize the input/output based on token length for ensuring consistency. To avoid this issue, I have limited the max new tokens and then I employ post processing to convert "Correct Size/" to "Correct Size/Just Right"

Post processing step can be seen below.

## Model Performance against Gold classification

In [None]:
import csv
import pandas as pd
import time

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "Wrong Size Correct": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to standardize categories
def standardize_category(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement
    return "Invalid Response"  # Default for invalid categories

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_full_dataset.csv"  # File with model predictions
output_file = "fit_predictions_full_dataset_4_few_shot_cleaned.csv"  # Cleaned predictions file

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Standardize the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(standardize_category)

# Save the cleaned predictions to a new CSV file
# df_predictions.to_csv(output_file, index=False)
# print(f"Cleaned predictions saved to {output_file}.")

# Ensure only the first 500 rows are used for comparison
# df_input = df_input.head(500)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")