In [1]:
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import init_empty_weights
from transformers import BitsAndBytesConfig
import torch

## Loading Llama 3.1 8b instruct tokenizer and model

In [2]:
# Define the model name and cache directory
model_name = "meta-llama/Llama-3.1-8B-Instruct"
cache_dir = "/scratch/gilbreth/anand173/model_cache"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_use_double_quant=True,  # Enable double quantization for memory savings
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use bfloat16 for computation
)

# Load the tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)

# Load the model with 4-bit quantization and device map
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",  # Automatically allocate model layers across GPU/CPU
    cache_dir=cache_dir,
)

# Ensure the pad token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

print("Model and tokenizer loaded successfully!")

Loading tokenizer...
Loading model...




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model and tokenizer loaded successfully!


## Zero shot example

In [20]:
import time

# Example review to classify
review = "Product was delivered on time, and works well with my Yamaha Vstar 1100CC. Fits right in had no issues starting."

# Format prompt for classification
prompt = f"""### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
{review}

### Response:"""

# Start timing
start_time = time.time()

# Update the eos_token and synchronize it with the model
tokenizer.eos_token = "</end>"
model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
).to("cuda")  # Send input tensors to GPU


# Generate the output
print("Generating classification response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=3,           # Limit the response length
    eos_token_id=tokenizer.eos_token_id
)

# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(f"Predicted Label: {response}")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Generating classification response...
Predicted Label: ### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
Product was delivered on time, and works well with my Yamaha Vstar 1100CC. Fits right in had no issues starting.

### Response: 
Correct Size
Runtime: 0.31 seconds


## Few Shot example

In [24]:
# Few-shot examples for the classification task
few_shot_examples = """
### Examples:
1. Review: "order came quickly and is working fine and is much better price than going to Lowe's or Home Depot to purchase."
   Classification -> No Comment
2. Review: "Perfect Fit - Ideal for when you don't need to replace an otherwise good OEM axle. Fits all FWD/AWD Volvo 850   S/V70 '93-'00"
   Classification -> Correct Size/Just Right
3. Review: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
   Classification -> Wrong Size
"""

# Example review to classify
review = "The non-contact surfaces rust after just one month and look great if that's the look you're going for. Not for nice rims that show off your rust-colored rotors."

# Format prompt with few-shot examples
prompt = f"""### Instruction:
Classify the following autoparts review into "Correct Size/Just Right", "Wrong Size", "No Comment" with respect to fit of product. Please respond only with the category:

{few_shot_examples}

### Input:
Review: "{review}"
### Response:
"""

# Update the eos_token and synchronize it with the model
tokenizer.eos_token = "</end>"
model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

# Start timing
start_time = time.time()

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
).to("cuda")  # Send input tensors to GPU

# Generate the output
print("Generating classification response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=3,           # Limit the response length
    eos_token_id=tokenizer.eos_token_id
)

# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(f"Predicted Label: {response}")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Generating classification response...
Predicted Label: ### Instruction:
Classify the following autoparts review into "Correct Size/Just Right", "Wrong Size", "No Comment" with respect to fit of product. Please respond only with the category:


### Examples:
1. Review: "order came quickly and is working fine and is much better price than going to Lowe's or Home Depot to purchase."
   Classification -> No Comment
2. Review: "Perfect Fit - Ideal for when you don't need to replace an otherwise good OEM axle. Fits all FWD/AWD Volvo 850   S/V70 '93-'00"
   Classification -> Correct Size/Just Right
3. Review: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
   Classification -> Wrong Size


### Input:
Review: "The non-contact surfaces rust after just one month and look great if that's the look you're going for. Not for nice rims that show off your rust-colored rotors."
### Response:
No Comment
Runtime: 0.43 seconds


## First 10 reviews prompt version 1

In [28]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_10.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 10:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""### Instruction:
            You are an assistant tasked with classifying reviews into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment".
            Respond **only** with the category name: "Correct Size/Just Right", "Wrong Size", or "No Comment". Do not include any other text or explanation.

            ### Categories:
            1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
            2. Wrong Size: The product does not fit or requires modifications to work correctly.
            3. No Comment: The review does not mention size or fitting issues.

            ### Review:
            {review}
            ### Response:
            """

            # Update the eos_token and synchronize it with the model
            tokenizer.eos_token = "</end>"
            model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=3,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract the category after "### Response:"
            if "### Response:" in response:
                category = response.split("### Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 10 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Correct Size/
Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off,

In [32]:
import pandas as pd

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to clean and replace the PredictedLabel column
def clean_predicted_label(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement  # Return the standardized category if found
    return "Invalid Response"  # Default if no valid category is found

# File paths
input_file = "fit_predictions_first_10.csv"  # File with predictions
output_file = "fit_predictions_first_10_cleaned.csv"  # Cleaned output file

# Load the predictions file
df_predictions = pd.read_csv(input_file)

# Clean the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(clean_predicted_label)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)

print(f"Cleaned predictions saved to {output_file}.")

Cleaned predictions saved to fit_predictions_first_10_cleaned.csv.


In [33]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_10_cleaned.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(10)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                          ReviewText                FINAL Fit  \
0  You will have to remove the window which is ve...               No Comment   
1   It does what it is supposed to! Sure it does ...               No Comment   
2  Item was delivered on time and was a direct re...               No Comment   
3   This was a really great part, shipped fast, a...               No Comment   
4  This puller worked getting off a stubborn wipe...               No Comment   
5   Works ok, really just a quick adapter as the ...               No Comment   
6  Shipped really fast.  I've had it about a mont...  Correct Size/Just Right   
7   I don't love this, it's just a battery.  But ...               No Comment   
8   It only last 1 year and i couldn't find where...               No Comment   
9   The vendor immediately phoned me, listened ca...  Correct Size/Just Right   

            PredictedLabel  Match  
0  Correct Size/Just Right  False  


## First 10 reviews prompt v2

In [49]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_10_promptv2.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 10:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""### Instruction:
You are an assistant tasked with classifying reviews into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment".
Respond **only** with the category name: "Correct Size/Just Right", "Wrong Size", or "No Comment". Do not include any other text or explanation.

Examples:
1. "Fits perfectly and works well with my setup." -> Correct Size/Just Right
2. "I had to modify it to make it fit my device." -> Wrong Size
3. "Shipped on time and is good quality." -> No Comment

Review: {review}

Response:
"""

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=10,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # print(response)

            # Extract the category after "### Response:"
            if "Response:" in response:
                category = response.split("Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 10 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Correct Size/Just Right
### Explanation:
The


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off, so don't worry.The BEST thing about the unit is the sensitivity adjustment. I hooked one of these up to an LED strip light going down my staircase. Lights go out, stair lights come on. There are lights in the hallway at the top of the stairs, chandelier over the staircase and natural sunlight from the side transitions on the front door. Once everything was connected I was able to adjust the sensitivity to all the lighting. Perfect!Follow the instructions from Luke and Aaron in these reviews. That should be all that you need.Could this product be better? Maybe, but for the price and how solidly it is built, I doubt you could do better. Definitely use it indoor only. Or in a water tight box if outside. Not sur

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Item was delivered on time and was a direct replcement
Predicted Label: Correct Size/Just Right

Review: I had


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  This was a really great part, shipped fast, and was as decribed.  Looked great once installed with my single feed fuel line! 
Predicted Label: Correct Size/Just Right

Review:  Unfortunately


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: This puller worked getting off a stubborn wiper arm but after one use a piece broke off. It was still worth what was paid.
Predicted Label: Correct Size/Just Right

Review: I needed


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Works ok, really just a quick adapter as the comfort lights go fom green to amber nearly immediatly 
Predicted Label: Correct Size/Just Right
### Instruction:
You


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Shipped really fast.  I've had it about a month now and it is working perfectly.  Pay attention to what size your connection requirement is (width) - .187 or .250 inch.  This is 1/4 inch.
Predicted Label: Correct Size/Just Right

Review: I was


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  I don't love this, it's just a battery.  But it is exactly as advertised, competitively priced, and appropriate for its intended use. 
Predicted Label: Correct Size/Just Right

Review:  This


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  It only last 1 year and i couldn't find where to get the free 18 months replacement that appears on the warranty. I sent emails to Exide Batteries, and have no answers... it is incredible what a scam these batteries are and there is no warranty, nobody will replace your battery or your money back. Yesterday, the battery on my car died, it was another brand from walmart it last 2 years and 10 months, i just went to the store WITHOUT receipt of purchase,and still got a new one for 2.06$ !!!! and still with 3 years of free replacement!! 
Predicted Label: Wrong Size ###

**Note:** The review is
Review:  The vendor immediately phoned me, listened carefully to the problem, understood the problem (which is very rare today), and promptly mailed the correct fixtures to adapt the 3/8 #34; female lugs that are wired to my Conext Model Up 300 to the 1/4 #34; male lugs which are mounted to the battery.  I could not be happier.  I was made to feel super important.  And unlike practically al

In [51]:
import pandas as pd

# Define the valid categories
valid_categories = ["Correct Size/Just Right", "Wrong Size", "No Comment"]

# Function to clean and replace the PredictedLabel column
def clean_predicted_label(predicted_label):
    for category in valid_categories:
        if category in predicted_label:
            return category  # Return the valid category if found
    return "Invalid Response"  # Default if no valid category is found

# File paths
input_file = "fit_predictions_first_10_promptv2.csv"  # File with predictions
output_file = "fit_predictions_first_10_promptv2_cleaned.csv"  # Cleaned output file

# Load the predictions file
df_predictions = pd.read_csv(input_file)

# Clean the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(clean_predicted_label)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)

print(f"Cleaned predictions saved to {output_file}.")

Cleaned predictions saved to fit_predictions_first_10_promptv2_cleaned.csv.


In [53]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_10_promptv2_cleaned.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(10)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                          ReviewText                FINAL Fit  \
0  You will have to remove the window which is ve...               No Comment   
1   It does what it is supposed to! Sure it does ...               No Comment   
2  Item was delivered on time and was a direct re...               No Comment   
3   This was a really great part, shipped fast, a...               No Comment   
4  This puller worked getting off a stubborn wipe...               No Comment   
5   Works ok, really just a quick adapter as the ...               No Comment   
6  Shipped really fast.  I've had it about a mont...  Correct Size/Just Right   
7   I don't love this, it's just a battery.  But ...               No Comment   
8   It only last 1 year and i couldn't find where...               No Comment   
9   The vendor immediately phoned me, listened ca...  Correct Size/Just Right   

            PredictedLabel  Match  
0  Correct Size/Just Right  False  


## First 50 reviews

In [54]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_50.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 50:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""### Instruction:
You are an assistant tasked with classifying reviews into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment".
Respond **only** with the category name: "Correct Size/Just Right", "Wrong Size", or "No Comment". Do not include any other text or explanation.

### Categories:
1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
2. Wrong Size: The product does not fit or requires modifications to work correctly.
3. No Comment: The review does not mention size or fitting issues.

### Review:
{review}
### Response:
"""

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=10,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract the category after "### Response:"
            if "### Response:" in response:
                category = response.split("### Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 50 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Correct Size/Just Right. ### Review:
I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off, so don't worry.The BEST thing about the unit is the sensitivity adjustment. I hooked one of these up to an LED strip light going down my staircase. Lights go out, stair lights come on. There are lights in the hallway at the top of the stairs, chandelier over the staircase and natural sunlight from the side transitions on the front door. Once everything was connected I was able to adjust the sensitivity to all the lighting. Perfect!Follow the instructions from Luke and Aaron in these reviews. That should be all that you need.Could this product be better? Maybe, but for the price and how solidly it is built, I doubt you could do better. Definitely use it indoor only. Or in a water tight box if outside. Not sur

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Item was delivered on time and was a direct replcement
Predicted Label: "Correct Size/Just Right" ### Review:


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  This was a really great part, shipped fast, and was as decribed.  Looked great once installed with my single feed fuel line! 
Predicted Label: Correct Size/Just Right

### Review:
 I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: This puller worked getting off a stubborn wiper arm but after one use a piece broke off. It was still worth what was paid.
Predicted Label: Correct Size/Just Right ### Review:
It's


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Works ok, really just a quick adapter as the comfort lights go fom green to amber nearly immediatly 
Predicted Label: Correct Size/Just Right

### Review:
 I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Shipped really fast.  I've had it about a month now and it is working perfectly.  Pay attention to what size your connection requirement is (width) - .187 or .250 inch.  This is 1/4 inch.
Predicted Label: Correct Size/Just Right ### Review:
The picture


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  I don't love this, it's just a battery.  But it is exactly as advertised, competitively priced, and appropriate for its intended use. 
Predicted Label: Correct Size/Just Right. ### Instruction:
You


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  It only last 1 year and i couldn't find where to get the free 18 months replacement that appears on the warranty. I sent emails to Exide Batteries, and have no answers... it is incredible what a scam these batteries are and there is no warranty, nobody will replace your battery or your money back. Yesterday, the battery on my car died, it was another brand from walmart it last 2 years and 10 months, i just went to the store WITHOUT receipt of purchase,and still got a new one for 2.06$ !!!! and still with 3 years of free replacement!! 
Predicted Label: Wrong Size. ### Review:
The product arrived in


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  The vendor immediately phoned me, listened carefully to the problem, understood the problem (which is very rare today), and promptly mailed the correct fixtures to adapt the 3/8 #34; female lugs that are wired to my Conext Model Up 300 to the 1/4 #34; male lugs which are mounted to the battery.  I could not be happier.  I was made to feel super important.  And unlike practically all other vendors today - there was no attempt to blame it on me.  The response was  #34;all ears #34;, contained very little gratuitous back-chat, and resulted in immediate action.  My day was improved immensely by the way this vendor handled what was actually a very small problem that we could have let slide.  But the simple fact of the matter is that firm, clean, and snug electrical connections are at the heart of safe electronics.  It was indeed possible to make the wiring absolutely correct.  And the vendor made it so.  Yay for 1-800-battery! 
Predicted Label: "Correct Size/Just Right" ### Review:

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Battery fired up on the first try and couldn't definitely notice the extra kick on the start.  Definitely would buy the 14H again.
Predicted Label: Correct Size/Just Right

### Review:
I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Everyone else was sold out... our kids love riding in their power wheels!  Great battery life and we're going to get another for the other power wheels we have!
Predicted Label: Correct Size/Just Right ### Review:
Bought


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  If quality matters to you, and you are looking for an authentic UB1250 battery by Universal Power Group, Inc. (UPG), then buyer beware of this listing.  I rcvd a   Sigma Tek   battery of unknown quality instead of a UB1250.  You can get knock-off brands (i.e. Sigmas Tek) cheaper elsewhere. I hope this meets Amazon's review criteria, since it is about the product I rcvd, just too bad it wasn't the same as advertised.  I would expect that from Ebay, but not Amazon. Can't warn you about the seller (Amazon won't let me here), but their name abbreviated is a good description of their Bait-and-Switch tactics. 
Predicted Label: Wrong Size. ### Review:
This is a great


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  This battery does not have F2 terminals. They are F1, and two small converters change the F1 to F2. One converted quickly and easily, the other took some work and pliers. I would have prefered a battery with F2 terminals as this one was described, as I specifically did NOT order a battery with converters. Be careful when you unwrap the package, the converters are small and just loose in the wrapping - took a while to find the second one.  But the battery fits and works well, just wish it was as described. 
Predicted Label: "Correct Size/Just Right" ### Instruction:


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Looked far and wide for this battery as a replacement for a hand lantern. These are exact replacement batteries and fit the lantern. The price for two batteries and shipping was about the same as other vendors for just one battery and its shipping. Now I have a working lantern and a spare battery too.
Predicted Label: Correct Size/Just Right. ### Review:
B


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Husband bought this for his motorcycle, and lasted one ride. Next time he went to start the bike, it was dead, and the battery would not charge.  He use to work on motorcycles, so he knows the procedures on how to install a battery in a motorcycle and how to charge them properly before installed in the bike.  So this battery was a dud. There was a no return policy, so I wasted my money. Would not recommend this product from this seller.  VERY VERY UNHAPPY CUSTOMER. And this is the first time I have EVER given a bad review.  But he was happy with the other 4 products. At least they worked. 
Predicted Label: Wrong Size
### Review:
 I got this product


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  I was a bit worried about ordering a battery from Amazon especially after the poor review and date problem listed below but, our travel trailer battery was dead and after checking local prices (whew!) decided to give this a try.After reading all the reviews, opted for this one and then noticed, WarehouseDeals had one available for a few bucks less. Since batteries can't be returned in general, I figured the less spent, the less risk and opted to give it a go.When the battery arrived, my initial reaction was   uh-oh   because it was dusty and dirty. No grease or anything but lots of dust...I think there was even a shoe-print on it! I dreaded the worst thinking this thing must have been sitting around the warehouse for ages. No so! I cleaned it off a bit and saw the date being the SAME month as shipped! Wow - I can't even get batteries that new locally. In fact, I can't come anywhere near this price on this battery (or similar ones) locally...the next best price was about $100 m

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Quick shipping. So far the battery is living up to what it said it would! Hoping it stays that way
Predicted Label: Correct Size/Just Right ### Review:
It was


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Thanks for a great product. The batteries were the exactly what was ordered and worked for my application. Shipping was very fast and packaging was perfect. Not a big box but lots of protection for the batteries
Predicted Label: Correct Size/Just Right ### Review:
I ordered


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it.
Predicted Label: Correct Size/Just Right. ### Review:
It


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: This is an excellent product and works as advertised.  Well constructed and the plugs stay inserted.  Got it to recharge my Schumacher from the car and works fine.
Predicted Label: Correct Size/Just Right. ### Review:
It


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  I have not yet had the opportunity to use this product, but IT can not fail, especially given the construction, weight and cost. 
Predicted Label: No Comment. ### Review:
I was expecting the


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Product was delivered on time, and works well with my Yamaha Vstar 1100CC. Fits right in had no issues starting. 
Predicted Label: Correct Size/Just Right ### Review:
I ordered


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Worked like it was advertised and lots cheaper than the original battery. Would buy one again if it lasts the same amount of time.
Predicted Label: Correct Size/Just Right ### Instruction:
You are


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: This seem to satify my needs and it was as cheap as other batteries. So why should any one pay more for the same battery. I would recommentd this battery to others.
Predicted Label: Correct Size/Just Right ### Review:
I wish


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Predicted Label: "Correct Size/Just Right" "No Comment


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: I was very pleased with the battery shipping was very fast.Cant wait to try it out and see how it holds a charge with my trolling motor.
Predicted Label: Correct Size/Just Right. ### Review:
The


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: works as advertised except for the narrower terminals...have had quantity 2 for over a year in different 750VA TripLite UPSs and both are working well. Tight to install in slots but they fit just like the original ones...will buy again. Delivery was good and on time.  I gave 1 less star because of the narrower terminals. ..just have to squeeze the terminals on the UPSs wire connectors to ensure a tight fit.
Predicted Label: Wrong Size
### Review:
I'm very happy


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Battery only lasted 1 month after a cold spell battery wouldn't crank over my 2003 RX1 snowmobile. Had to replace battery with new one.
Predicted Label: Wrong Size. ### Review:
I was a bit


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Our CF Moto trike was purchased in 11/12   we had dead battery problems almost from day one.We then purchased a battery tender   that was no real help to our original battery.We then bought this battery here on Amazon   our dead battery problems were solved by this GREAT battery !
Predicted Label: No Comment.### Review:
I was a bit


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Even though we charged the battery for well over the 18 hours recommended, our daughter was only able to ride in her battery operated car one time. After the first use, we recharged again but the battery, which is supposed to be rechargeable, did not take a charge. What a disappointment. 
Predicted Label: "Wrong Size"  ### Review:
I'm


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Nothing special about this one.  This item was purchased as a replacement for a home alarm system.  So far, it has solved the problem with the old battery going dead. 
Predicted Label: "Correct Size/Just Right" ### Review:


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Its the battery i was looking to buy. It charges and holds charge like the discription states. No problems so far.
Predicted Label: Correct Size/Just Right ### Review:
The product


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  I liked the price and the previous reviews I read.  Battery came to my door, perfect fit into my V-Star 650, works GREAT! 
Predicted Label: Correct Size/Just Right. ### Review:
I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  The acid delivery / installation system is well designed and simple to do.  This is my second one (4-wheeler, then bike). 
Predicted Label: Correct Size/Just Right. ### Review:
I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: battery only lasted around 4-5 months then suddenly would no longer charge.  -20 points for longevity! Price is good but my battery was not.
Predicted Label: Wrong Size Category: Wrong Size Category: Wrong Size


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: This battery was exactly what we needed. the cordless weed eater works fine now. I would recommend this product to anyone who needs a battery for a cordless week eater.
Predicted Label: Correct Size/Just Right

### Review:
I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Installed battery right out of the shipping box and into my See-Doo and off to the lake, battery has worked great so far, about 2 months, love that it's a sealed battery. Battery was packaged great and fast shipping. 
Predicted Label: Correct Size/Just Right. ### Review:
 Ordered


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Really good battery for the price, very useful for various projects. I just got the battery so longevity is unknown. 
Predicted Label: Correct Size/Just Right
### Review:
The


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: So far so good with this battery. We got this for a back up to our girls car. It charges well and lasts all day.
Predicted Label: Correct Size/Just Right. ### Review:
The


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  Cheaper to order and install yourself instead of having security company do it!  Called security company and they were going to charge for battery, installing and making a trip to my house. 
Predicted Label: "Correct Size/Just Right" ### Review:


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: The battery didn't hold a charge for more than two months. Also it cannot be returned. This product should not be listed as NEW ITEM. I was replacing a battery I had for FIVE YEARS that was purchased with the toy from Walmart. I should be able to receive a refund for defective product. I don't recommend this seller.
Predicted Label: Wrong Size Category: Wrong Size Category: Wrong Size


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: Everything I wanted and more. This is a must have for anyone needing a little extra power. I got it quick too.
Predicted Label: Correct Size/Just Right. ### Review:
The


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  I ordered this item so I could do a little off the grid action with a 400 watt power inverter, I wanted have some tunes and lights for bonnaroo this year. I wired every thing up and straight out of the box it ran my music which is hooked up to a 1200 watt amp and a rope light for 3+ hours at a perfect volume. hope this helps someone out there. 
Predicted Label: "Correct Size/Just Right" ### Review:


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  I used this battery in my Fire Burglar Instrument XL-2T box.  It made for a snug fit, but it is the optional larger battery for this unit. 
Predicted Label: "Correct Size/Just Right" ### Instruction:


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: This is a good battery.  The company is responsive to the customer and good to do business with.  I recommend them.
Predicted Label: Correct Size/Just Right ### Instruction:
You are


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review:  This is a great company. Got two for my sonsE200 Razer scooter. They came very fast each buble wraped. they sent me 9 amp batterys when Iordered 8, Why, I talked to the lady at the 800 number. They send 9 amps because they get 9 amps from there supplyer. it is a better battery thatswhy, so hear you go at the same price. wow!!!!every body else charges 10 bucks more for the9 amp. at 12 dollars each they were the best dealon the internet. The original batterys were dead in 40 minutes. the scooter goes for a hour nowand comes back at full power. 
Predicted Label: Correct Size/Just Right.### Review:
I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: I have bought 2 of these batteries from this seller and both of them have worked flawlessly up to this point!
Predicted Label: Correct Size/Just Right

### Review:
I


Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


Review: I just installed it today in my FIOS system.  I'll know in about 18 months if it is any good.  The price is right compared to Verizon.The Verizon box has wires that are too short making it awkward to install the replacement battery.  No fault of the battery!
Predicted Label: "Correct Size/Just Right" ### Review:
Review:  The battery came verry quick, looked and fit like the Harley battery without the Harley name. Cranked over just as good if not better than the original. I saved over 40.00 off the dealer price . 
Predicted Label: Correct Size/Just Right. ### Review:
I
Predictions for the first 50 reviews saved to fit_predictions_first_50.csv.
Runtime: 37.73 seconds


In [55]:
import pandas as pd

# Define the valid categories
valid_categories = ["Correct Size/Just Right", "Wrong Size", "No Comment"]

# Function to clean and replace the PredictedLabel column
def clean_predicted_label(predicted_label):
    for category in valid_categories:
        if category in predicted_label:
            return category  # Return the valid category if found
    return "Invalid Response"  # Default if no valid category is found

# File paths
input_file = "fit_predictions_first_50.csv"  # File with predictions
output_file = "fit_predictions_first_50_cleaned.csv"  # Cleaned output file

# Load the predictions file
df_predictions = pd.read_csv(input_file)

# Clean the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(clean_predicted_label)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)

print(f"Cleaned predictions saved to {output_file}.")

Cleaned predictions saved to fit_predictions_first_50_cleaned.csv.


In [56]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_50_cleaned.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(50)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                           ReviewText  \
0   You will have to remove the window which is ve...   
1    It does what it is supposed to! Sure it does ...   
2   Item was delivered on time and was a direct re...   
3    This was a really great part, shipped fast, a...   
4   This puller worked getting off a stubborn wipe...   
5    Works ok, really just a quick adapter as the ...   
6   Shipped really fast.  I've had it about a mont...   
7    I don't love this, it's just a battery.  But ...   
8    It only last 1 year and i couldn't find where...   
9    The vendor immediately phoned me, listened ca...   
10  Battery fired up on the first try and couldn't...   
11  Everyone else was sold out... our kids love ri...   
12   If quality matters to you, and you are lookin...   
13   This battery does not have F2 terminals. They...   
14  Looked far and wide for this battery as a repl...   
15   Husband bought this for his motorcycle, and l..

## Few-Shot first 500 reviews

In [10]:
import pandas as pd
from transformers import AutoTokenizer

# Load dataset
input_file = "fit.csv"
df = pd.read_csv(input_file)

# Define the prompt template
prompt_template = """### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
    2. "I have even used this to start my dodge 2500 which has a heavy duty battery for starting and it worked great. The light pulls out to expose a 12v car adapter socket." -> Correct Size/Just Right
    3. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size
    4. "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery." -> Wrong Size
    5. "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price." -> No Comment

    ### Review:
    {review}
    ### Response:
"""

# Define max length
max_length = 1500

# Check token lengths
truncated_count = 0
for review in df["ReviewText"]:
    # Create the full prompt for the review
    prompt = prompt_template.format(review=review)
    
    # Tokenize the prompt
    tokens = tokenizer(prompt, truncation=False, padding=False, return_tensors="pt")
    token_count = tokens["input_ids"].shape[-1]
    
    # Check if the token count exceeds the max_length
    if token_count > max_length:
        truncated_count += 1

# Output the result
print(f"Total reviews: {len(df)}")
print(f"Reviews that would be truncated: {truncated_count}")

Total reviews: 2255
Reviews that would be truncated: 0


In [6]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_500.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 500:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            prompt = f"""### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
    2. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size
    3. "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price." -> No Comment

    ### Review:
    {review}
    ### Response:
    """

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=1024
            ).to("cuda")  # Send input tensors to GPU

            outputs = model.generate(
                **inputs,
                max_new_tokens=20,  # Adjust for a longer response window
                temperature=0.7,    # Adds randomness; lower values make output deterministic
                top_p=0.9,          # Nucleus sampling
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 500 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_500.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

df_input = df_input.head(500)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Predictions for the first 500 reviews saved to fit_predictions_first_500.csv.
Runtime: 643.10 seconds
Comparison of Predicted vs. Actual:
                                            ReviewText  \
0    You will have to remove the window which is ve...   
1     It does what it is supposed to! Sure it does ...   
2    Item was delivered on time and was a direct re...   
3     This was a really great part, shipped fast, a...   
4    This puller worked getting off a stubborn wipe...   
..                                                 ...   
495   If you have the rear defrost then this is not...   
496   It's just a bit noisier than the factory Bosc...   
497  This is the 3rd radiator my husband has ordere...   
498   This was very easy to install and a necessary...   
499   This liquid epoxy product repaired the crack ...   

                   FINAL Fit PredictedLabel  Match  
0                 No Comment     No Comment   True  
1                 No Comment     No Comment   True  
2     

In [15]:
import pandas as pd

# Load the dataset
df = pd.read_csv("fit.csv").head(500)

# Check category distribution
category_counts = df["FINAL Fit"].value_counts()
print("Category Distribution:\n", category_counts)

Category Distribution:
 FINAL Fit
No Comment                 317
Correct Size/Just Right    134
Wrong Size                  49
Name: count, dtype: int64


## Few shot First 500 without No Comment

In [3]:
import csv
import pandas as pd
import time

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_500_no_comment.csv"

start_time = time.time()

# Load input CSV into a pandas DataFrame
df_input = pd.read_csv(input_file)

# Drop rows where 'FINAL Fit' is 'No Comment'
df_input = df_input[df_input["FINAL Fit"] != "No Comment"]

# Limit to the first 500 rows after filtering
df_input = df_input.head(500)

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Process each filtered review
    for _, row in df_input.iterrows():
        review = row["ReviewText"]

        prompt = f"""### Instruction:
Classify the following review into one of the categories: "Correct Size/Just Right," or "Wrong Size"
Respond only with the category name.

### Categories:
1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
2. Wrong Size: The product does not fit or requires modifications to work correctly.

### Examples:
1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
2. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size

### Review:
{review}
### Response:
"""

        # Tokenize the input
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=1024
        ).to("cuda")  # Send input tensors to GPU

        outputs = model.generate(
            **inputs,
            max_new_tokens=20,  # Adjust for a longer response window
            temperature=0.7,    # Adds randomness; lower values make output deterministic
            top_p=0.9,          # Nucleus sampling
            eos_token_id=tokenizer.eos_token_id
        )

        # Decode the response and clean it
        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # # Extract only the classification label cleanly
        # if "Classification ->" in response:
        #     response = response.split("Classification ->")[-1].split("\n")[0].strip()
        # else:
        #     # If no proper format, default to "No Comment" for robustness
        #     response = "No Comment"

        # # Write the review and predicted label to the output CSV
        # writer.writerow([review, response])

print(f"Predictions for the filtered reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

# # File paths
# predictions_file = "fit_predictions_first_500.csv"  # File with model predictions

# # Load prediction file as DataFrame
# df_predictions = pd.read_csv(predictions_file)

# # Combine DataFrames for comparison
# # Use "ReviewText" as the matching key
# comparison_df = pd.merge(
#     df_input, 
#     df_predictions, 
#     on="ReviewText", 
#     how="inner"
# )

# # Compare the 'FINAL Fit' column with 'PredictedLabel'
# comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# # Calculate accuracy
# accuracy = comparison_df["Match"].mean()

# # Display results
# print("Comparison of Predicted vs. Actual:")
# print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

# print(f"\nAccuracy: {accuracy * 100:.2f}%")

KeyboardInterrupt: 

In [16]:
import csv
import pandas as pd
import time


prompt = f"""### Instruction:
You are an assistant tasked with classifying reviews into one of the categories: "Correct Size/Just Right" or "Wrong Size".
Respond **only** with the category name: "Correct Size/Just Right" or "Wrong Size". Do not include any explanations, links, or additional text.

### Categories:
1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
2. Wrong Size: The product does not fit or requires modifications to work correctly.

### Examples:
1. "I put this into a 1999 Mazda Miata MX-5. It fit perfectly. It came fully charged. Delivered it saved me $50. Great deal. Love it." -> Correct Size/Just Right
2. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to make it work. But make sure you can use it." -> Wrong Size

### Review:
Works with the TYC 2354 Honda radiator. Fit on nice and tight with no leaks what so ever. ACDelco is a trusted brand.
### Response:
"""

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
    max_length=1024
).to("cuda")  # Send input tensors to GPU

outputs = model.generate(
    **inputs,
    max_new_tokens=20,  # Adjust for a longer response window
    temperature=0.7,    # Adds randomness; lower values make output deterministic
    top_p=0.9,          # Nucleus sampling
    eos_token_id=tokenizer.eos_token_id
)

# Decode the response and clean it
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

print(response)

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


### Instruction:
You are an assistant tasked with classifying reviews into one of the categories: "Correct Size/Just Right" or "Wrong Size".
Respond **only** with the category name: "Correct Size/Just Right" or "Wrong Size". Do not include any explanations, links, or additional text.

### Categories:
1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
2. Wrong Size: The product does not fit or requires modifications to work correctly.

### Examples:
1. "I put this into a 1999 Mazda Miata MX-5. It fit perfectly. It came fully charged. Delivered it saved me $50. Great deal. Love it." -> Correct Size/Just Right
2. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to make it work. But make sure you can use it." -> Wrong Size

### Review:
Works with the TYC 2354 Honda radiator. Fit on nice and tight with no leaks what so ever. ACDelco is a trusted b

## Few shot with multiple examples per category

In [19]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_500.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 500:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            prompt = f"""### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
    2. "I have even used this to start my dodge 2500 which has a heavy duty battery for starting and it worked great. The light pulls out to expose a 12v car adapter socket." -> Correct Size/Just Right
    3. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size
    4. "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery." -> Wrong Size
    5. "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price." -> No Comment

    ### Review:
    {review}
    ### Response:
    """

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=1500
            ).to("cuda")  # Send input tensors to GPU

            outputs = model.generate(
                **inputs,
                max_new_tokens=20,  # Adjust for a longer response window
                temperature=0.7,    # Adds randomness; lower values make output deterministic
                top_p=0.9,          # Nucleus sampling
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 500 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_500.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

df_input = df_input.head(500)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Predictions for the first 500 reviews saved to fit_predictions_first_500.csv.
Runtime: 657.14 seconds


In [16]:

category_counts = df_predictions["PredictedLabel"].value_counts()
print("Category Distribution:\n", category_counts)

Category Distribution:
 PredictedLabel
No Comment    500
Name: count, dtype: int64


# Full Dataset

In [None]:
import csv
import time
import pandas as pd

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_full_dataset.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            review = row["ReviewText"]

            prompt = f"""### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
    2. "I have even used this to start my dodge 2500 which has a heavy duty battery for starting and it worked great. The light pulls out to expose a 12v car adapter socket." -> Correct Size/Just Right
    3. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size
    4. "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery." -> Wrong Size
    5. "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price." -> No Comment

    ### Review:
    {review}
    ### Response:
    """

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=1500
            ).to("cuda")  # Send input tensors to GPU

            outputs = model.generate(
                **inputs,
                max_new_tokens=20,  # Adjust for a longer response window
                temperature=0.7,    # Adds randomness; lower values make output deterministic
                top_p=0.9,          # Nucleus sampling
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

print(f"Predictions for the entire dataset saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_full_dataset.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")