In [1]:
import time
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import init_empty_weights
from transformers import BitsAndBytesConfig
import torch

## Loading Llama 3.1 8b instruct tokenizer and model

In [2]:
# Define the model name and cache directory
model_name = "meta-llama/Llama-3.1-8B-Instruct"
cache_dir = "/scratch/gilbreth/anand173/model_cache"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Use 4-bit quantization
    bnb_4bit_use_double_quant=True,  # Enable double quantization for memory savings
    bnb_4bit_compute_dtype=torch.bfloat16,  # Use bfloat16 for computation
)

# Load the tokenizer
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)

# Load the model with 4-bit quantization and device map
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",  # Automatically allocate model layers across GPU/CPU
    cache_dir=cache_dir,
)

# Ensure the pad token is set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id

print("Model and tokenizer loaded successfully!")

Loading tokenizer...
Loading model...




Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model and tokenizer loaded successfully!


## Zero shot example

In [20]:
import time

# Example review to classify
review = "Product was delivered on time, and works well with my Yamaha Vstar 1100CC. Fits right in had no issues starting."

# Format prompt for classification
prompt = f"""### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
{review}

### Response:"""

# Start timing
start_time = time.time()

# Update the eos_token and synchronize it with the model
tokenizer.eos_token = "</end>"
model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
).to("cuda")  # Send input tensors to GPU


# Generate the output
print("Generating classification response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=3,           # Limit the response length
    eos_token_id=tokenizer.eos_token_id
)

# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(f"Predicted Label: {response}")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Generating classification response...
Predicted Label: ### Instruction:
Classify the following review into "Correct Size/Just Right", "Wrong Size", "No Comment". Please respond only with the category:

### Input:
Product was delivered on time, and works well with my Yamaha Vstar 1100CC. Fits right in had no issues starting.

### Response: 
Correct Size
Runtime: 0.31 seconds


## Few Shot example

In [24]:
# Few-shot examples for the classification task
few_shot_examples = """
### Examples:
1. Review: "order came quickly and is working fine and is much better price than going to Lowe's or Home Depot to purchase."
   Classification -> No Comment
2. Review: "Perfect Fit - Ideal for when you don't need to replace an otherwise good OEM axle. Fits all FWD/AWD Volvo 850   S/V70 '93-'00"
   Classification -> Correct Size/Just Right
3. Review: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
   Classification -> Wrong Size
"""

# Example review to classify
review = "The non-contact surfaces rust after just one month and look great if that's the look you're going for. Not for nice rims that show off your rust-colored rotors."

# Format prompt with few-shot examples
prompt = f"""### Instruction:
Classify the following autoparts review into "Correct Size/Just Right", "Wrong Size", "No Comment" with respect to fit of product. Please respond only with the category:

{few_shot_examples}

### Input:
Review: "{review}"
### Response:
"""

# Update the eos_token and synchronize it with the model
tokenizer.eos_token = "</end>"
model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

# Start timing
start_time = time.time()

# Tokenize the input
inputs = tokenizer(
    prompt,
    return_tensors="pt",
    truncation=True,
    padding=True,
).to("cuda")  # Send input tensors to GPU

# Generate the output
print("Generating classification response...")
outputs = model.generate(
    **inputs,
    max_new_tokens=3,           # Limit the response length
    eos_token_id=tokenizer.eos_token_id
)

# Decode and display the response
response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
print(f"Predicted Label: {response}")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Generating classification response...
Predicted Label: ### Instruction:
Classify the following autoparts review into "Correct Size/Just Right", "Wrong Size", "No Comment" with respect to fit of product. Please respond only with the category:


### Examples:
1. Review: "order came quickly and is working fine and is much better price than going to Lowe's or Home Depot to purchase."
   Classification -> No Comment
2. Review: "Perfect Fit - Ideal for when you don't need to replace an otherwise good OEM axle. Fits all FWD/AWD Volvo 850   S/V70 '93-'00"
   Classification -> Correct Size/Just Right
3. Review: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
   Classification -> Wrong Size


### Input:
Review: "The non-contact surfaces rust after just one month and look great if that's the look you're going for. Not for nice rims that show off your rust-colored rotors."
### Response:
No Comment
Runtime: 0.43 seconds


## First 10 reviews prompt version 1

In [28]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_10.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 10:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""### Instruction:
            You are an assistant tasked with classifying reviews into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment".
            Respond **only** with the category name: "Correct Size/Just Right", "Wrong Size", or "No Comment". Do not include any other text or explanation.

            ### Categories:
            1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
            2. Wrong Size: The product does not fit or requires modifications to work correctly.
            3. No Comment: The review does not mention size or fitting issues.

            ### Review:
            {review}
            ### Response:
            """

            # Update the eos_token and synchronize it with the model
            tokenizer.eos_token = "</end>"
            model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=3,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract the category after "### Response:"
            if "### Response:" in response:
                category = response.split("### Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 10 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Correct Size/
Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off,

In [32]:
import pandas as pd

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to clean and replace the PredictedLabel column
def clean_predicted_label(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement  # Return the standardized category if found
    return "Invalid Response"  # Default if no valid category is found

# File paths
input_file = "fit_predictions_first_10.csv"  # File with predictions
output_file = "fit_predictions_first_10_cleaned.csv"  # Cleaned output file

# Load the predictions file
df_predictions = pd.read_csv(input_file)

# Clean the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(clean_predicted_label)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)

print(f"Cleaned predictions saved to {output_file}.")

Cleaned predictions saved to fit_predictions_first_10_cleaned.csv.


In [33]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_10_cleaned.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(10)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                          ReviewText                FINAL Fit  \
0  You will have to remove the window which is ve...               No Comment   
1   It does what it is supposed to! Sure it does ...               No Comment   
2  Item was delivered on time and was a direct re...               No Comment   
3   This was a really great part, shipped fast, a...               No Comment   
4  This puller worked getting off a stubborn wipe...               No Comment   
5   Works ok, really just a quick adapter as the ...               No Comment   
6  Shipped really fast.  I've had it about a mont...  Correct Size/Just Right   
7   I don't love this, it's just a battery.  But ...               No Comment   
8   It only last 1 year and i couldn't find where...               No Comment   
9   The vendor immediately phoned me, listened ca...  Correct Size/Just Right   

            PredictedLabel  Match  
0  Correct Size/Just Right  False  


## First 10 reviews prompt v2

In [35]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_10_promptv2.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 10:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""### Instruction:
You are an assistant tasked with classifying reviews into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment" with respect to fit of product. Do not include any other text or explanation.

Examples:
1. "Fits perfectly and works well with my setup." -> Correct Size/Just Right
2. "I had to modify it to make it fit my device." -> Wrong Size
3. "Shipped on time and is good quality." -> No Comment

Review: {review}

Response:
"""
            # Update the eos_token and synchronize it with the model
            tokenizer.eos_token = "</end>"
            model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=3,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # print(response)

            # Extract the category after "### Response:"
            if "Response:" in response:
                category = response.split("Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 10 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Correct Size/
Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off,

In [38]:
import pandas as pd

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to clean and replace the PredictedLabel column
def clean_predicted_label(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement  # Return the standardized category if found
    return "Invalid Response"  # Default if no valid category is found

# File paths
input_file = "fit_predictions_first_10_promptv2.csv"  # File with predictions
output_file = "fit_predictions_first_10_promptv2_cleaned.csv"  # Cleaned output file

# Load the predictions file
df_predictions = pd.read_csv(input_file)

# Clean the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(clean_predicted_label)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)

print(f"Cleaned predictions saved to {output_file}.")

Cleaned predictions saved to fit_predictions_first_10_promptv2_cleaned.csv.


In [39]:
df_predictions

Unnamed: 0,ReviewText,PredictedLabel
0,You will have to remove the window which is ve...,Correct Size/Just Right
1,It does what it is supposed to! Sure it does ...,No Comment
2,Item was delivered on time and was a direct re...,No Comment
3,"This was a really great part, shipped fast, a...",Correct Size/Just Right
4,This puller worked getting off a stubborn wipe...,Wrong Size
5,"Works ok, really just a quick adapter as the ...",Wrong Size
6,Shipped really fast. I've had it about a mont...,Wrong Size
7,"I don't love this, it's just a battery. But ...",No Comment
8,It only last 1 year and i couldn't find where...,Wrong Size
9,"The vendor immediately phoned me, listened ca...",Correct Size/Just Right


In [40]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_10_promptv2_cleaned.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(10)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                          ReviewText                FINAL Fit  \
0  You will have to remove the window which is ve...               No Comment   
1   It does what it is supposed to! Sure it does ...               No Comment   
2  Item was delivered on time and was a direct re...               No Comment   
3   This was a really great part, shipped fast, a...               No Comment   
4  This puller worked getting off a stubborn wipe...               No Comment   
5   Works ok, really just a quick adapter as the ...               No Comment   
6  Shipped really fast.  I've had it about a mont...  Correct Size/Just Right   
7   I don't love this, it's just a battery.  But ...               No Comment   
8   It only last 1 year and i couldn't find where...               No Comment   
9   The vendor immediately phoned me, listened ca...  Correct Size/Just Right   

            PredictedLabel  Match  
0  Correct Size/Just Right  False  


## First 50 reviews - Zero shot but more context

In [43]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_50.csv"

# Start timing
start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 50:  # Process only the first 10 reviews
                break

            review = row["ReviewText"]

            # Format the prompt for each review
            prompt = f"""### Instruction:
You are an assistant tasked with classifying autoparts reviews into one of the categories: "Correct Size/Just Right", "Wrong Size", or "No Comment" with respect to fit of product. Do not include any other text or explanation.

### Categories:
Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
Wrong Size: The product does not fit or requires modifications to work correctly.
No Comment: The review does not mention size or fitting issues.

### Review:
{review}
### Response:
"""
            # Update the eos_token and synchronize it with the model
            tokenizer.eos_token = "</end>"
            model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=512
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=3,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract the category after "### Response:"
            if "### Response:" in response:
                category = response.split("### Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 50 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: No Comment.
Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off, s

In [44]:
import pandas as pd

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to clean and replace the PredictedLabel column
def clean_predicted_label(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement  # Return the standardized category if found
    return "Invalid Response"  # Default if no valid category is found

# File paths
input_file = "fit_predictions_first_50.csv"  # File with predictions
output_file = "fit_predictions_first_50_cleaned.csv"  # Cleaned output file

# Load the predictions file
df_predictions = pd.read_csv(input_file)

# Clean the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(clean_predicted_label)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)

print(f"Cleaned predictions saved to {output_file}.")

Cleaned predictions saved to fit_predictions_first_50_cleaned.csv.


In [56]:
import pandas as pd

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_50_cleaned.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Ensure only the first 10 rows are used for comparison
df_input = df_input.head(50)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Comparison of Predicted vs. Actual:
                                           ReviewText  \
0   You will have to remove the window which is ve...   
1    It does what it is supposed to! Sure it does ...   
2   Item was delivered on time and was a direct re...   
3    This was a really great part, shipped fast, a...   
4   This puller worked getting off a stubborn wipe...   
5    Works ok, really just a quick adapter as the ...   
6   Shipped really fast.  I've had it about a mont...   
7    I don't love this, it's just a battery.  But ...   
8    It only last 1 year and i couldn't find where...   
9    The vendor immediately phoned me, listened ca...   
10  Battery fired up on the first try and couldn't...   
11  Everyone else was sold out... our kids love ri...   
12   If quality matters to you, and you are lookin...   
13   This battery does not have F2 terminals. They...   
14  Looked far and wide for this battery as a repl...   
15   Husband bought this for his motorcycle, and l..

## Few-Shot first 500 reviews

In [45]:
import pandas as pd
from transformers import AutoTokenizer

# Load dataset
input_file = "fit.csv"
df = pd.read_csv(input_file)

# Define the prompt template
prompt_template = """### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment." with respect to fit of product
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
    2. "I have even used this to start my dodge 2500 which has a heavy duty battery for starting and it worked great. The light pulls out to expose a 12v car adapter socket." -> Correct Size/Just Right
    3. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size
    4. "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery." -> Wrong Size
    5. "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price." -> No Comment

    ### Review:
    {review}
    ### Response:
"""

# Define max length
max_length = 1500

# Check token lengths
truncated_count = 0
for review in df["ReviewText"]:
    # Create the full prompt for the review
    prompt = prompt_template.format(review=review)
    
    # Tokenize the prompt
    tokens = tokenizer(prompt, truncation=False, padding=False, return_tensors="pt")
    token_count = tokens["input_ids"].shape[-1]
    
    # Check if the token count exceeds the max_length
    if token_count > max_length:
        truncated_count += 1

# Output the result
print(f"Total reviews: {len(df)}")
print(f"Reviews that would be truncated: {truncated_count}")

Total reviews: 2255
Reviews that would be truncated: 0


In [56]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_500.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 500:
                break

            review = row["ReviewText"]

            prompt = f"""### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment." with respect to fit of product.
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
    2. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size
    3. "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price." -> No Comment

    ### Review:
    {review}
    ### Response:
    """
            # Update the eos_token and synchronize it with the model
            tokenizer.eos_token = "</end>"
            model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=1024
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=3,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # print(response)

            # Extract the category after "### Response:"
            if "### Response:" in response:
                category = response.split("### Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 500 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Wrong Size
Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off, so

In [57]:
import csv
import pandas as pd
import time

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to standardize categories
def standardize_category(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement
    return "Invalid Response"  # Default for invalid categories

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_500.csv"  # File with model predictions
output_file = "fit_predictions_first_500_cleaned.csv"  # Cleaned predictions file

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Standardize the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(standardize_category)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)
print(f"Cleaned predictions saved to {output_file}.")

# Ensure only the first 500 rows are used for comparison
df_input = df_input.head(500)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Cleaned predictions saved to fit_predictions_first_500_cleaned.csv.
Comparison of Predicted vs. Actual:
                                            ReviewText  \
0    You will have to remove the window which is ve...   
1     It does what it is supposed to! Sure it does ...   
2    Item was delivered on time and was a direct re...   
3     This was a really great part, shipped fast, a...   
4    This puller worked getting off a stubborn wipe...   
..                                                 ...   
495   If you have the rear defrost then this is not...   
496   It's just a bit noisier than the factory Bosc...   
497  This is the 3rd radiator my husband has ordere...   
498   This was very easy to install and a necessary...   
499   This liquid epoxy product repaired the crack ...   

                   FINAL Fit           PredictedLabel  Match  
0                 No Comment               Wrong Size  False  
1                 No Comment  Correct Size/Just Right  False  
2         

## Few shot First 500 without No Comment

In [4]:
import csv
import pandas as pd
import time

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_500_no_comment.csv"

start_time = time.time()

# Load input CSV into a pandas DataFrame
df_input = pd.read_csv(input_file)

# Drop rows where 'FINAL Fit' is 'No Comment'
df_input = df_input[df_input["FINAL Fit"] != "No Comment"]

# Limit to the first 500 rows after filtering
# df_input = df_input.head(500)

review_count = 0

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Process each filtered review
    for _, row in df_input.iterrows():
        review = row["ReviewText"]

        prompt = f"""### Instruction:
Classify the following review into one of the categories: "Correct Size/Just Right," or "Wrong Size"
Respond only with the category name.

### Categories:
Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
Wrong Size: The product does not fit or requires modifications to work correctly.

### Examples:
Correct Size/Just Right: "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
Wrong size:  "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size

### Review:
{review}
### Response:
"""

        # Update the eos_token and synchronize it with the model
        tokenizer.eos_token = "</end>"
        model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

        # Tokenize the input
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=1024
        ).to("cuda")  # Send input tensors to GPU

        # Generate the output
        outputs = model.generate(
            **inputs,
            max_new_tokens=3,  # Limit the response length
            eos_token_id=tokenizer.eos_token_id
        )

        # Decode the response and clean it
        response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        # print(response)

        # Extract the category after "### Response:"
        if "### Response:" in response:
            category = response.split("### Response:")[-1].strip()
        else:
            category = "Invalid Response"  # Fallback if the format is incorrect

        print(f"Review: {review}")
        print(f"Predicted Label: {category}")

        # Write the review and predicted label to the output CSV
        writer.writerow([review, category])

        # Increment the counter
        review_count += 1

print(f"Predictions for the first 500 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: Shipped really fast.  I've had it about a month now and it is working perfectly.  Pay attention to what size your connection requirement is (width) - .187 or .250 inch.  This is 1/4 inch.
Predicted Label: Wrong Size
Review:  The vendor immediately phoned me, listened carefully to the problem, understood the problem (which is very rare today), and promptly mailed the correct fixtures to adapt the 3/8 #34; female lugs that are wired to my Conext Model Up 300 to the 1/4 #34; male lugs which are mounted to the battery.  I could not be happier.  I was made to feel super important.  And unlike practically all other vendors today - there was no attempt to blame it on me.  The response was  #34;all ears #34;, contained very little gratuitous back-chat, and resulted in immediate action.  My day was improved immensely by the way this vendor handled what was actually a very small problem that we could have let slide.  But the simple fact of the matter is that firm, clean, and snug electri

In [6]:
import csv
import pandas as pd
import time

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to standardize categories
def standardize_category(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement
    return "Invalid Response"  # Default for invalid categories

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_500_no_comment.csv"  # File with model predictions
output_file = "fit_predictions_first_500_cleaned_no_comment.csv"  # Cleaned predictions file

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_input = df_input[df_input["FINAL Fit"] != "No Comment"]
df_predictions = pd.read_csv(predictions_file)

# Standardize the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(standardize_category)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)
print(f"Cleaned predictions saved to {output_file}.")

# Ensure only the first 500 rows are used for comparison
# df_input = df_input.head(500)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]].to_csv("no_comments_comparison.csv")

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Cleaned predictions saved to fit_predictions_first_500_cleaned_no_comment.csv.
Comparison of Predicted vs. Actual:
                                            ReviewText  \
0    Shipped really fast.  I've had it about a mont...   
1     The vendor immediately phoned me, listened ca...   
2     This battery does not have F2 terminals. They...   
3    Looked far and wide for this battery as a repl...   
4    I put this into a 1999 Mazda Miata MX-5.  It f...   
..                                                 ...   
875  Does not work well on the rear window of my 20...   
876   Have to wait for rain, but as best I can tell...   
877   I bought 2 pairs of these for use with a Ford...   
878   A perfect fit in the oem frames on my 08 Fron...   
879  these are heavy duty extra wide wiper refills....   

                   FINAL Fit           PredictedLabel  Match  
0    Correct Size/Just Right               Wrong Size  False  
1    Correct Size/Just Right  Correct Size/Just Right   True  

## Few shot with 2 examples per category

In [11]:
import csv

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_first_500_multi_few_shot.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Initialize a counter
    review_count = 0

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            if review_count >= 500:
                break

            review = row["ReviewText"]

            prompt = f"""### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    Correct Size/Just Right: "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it."
    Correct Size/Just Right: "I have even used this to start my dodge 2500 which has a heavy duty battery for starting and it worked great. The light pulls out to expose a 12v car adapter socket."
    Wrong Size: "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." 
    Wrong Size: "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery."
    No Comment: "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price."
    No Comment: This Is a great product and its even stronger than de factory one my Jeep Grand Cherokee came with.... Recommend this product

    ### Review:
    {review}
    ### Response:
    """

            # Update the eos_token and synchronize it with the model
            tokenizer.eos_token = "</end>"
            model.config.eos_token_id = tokenizer.convert_tokens_to_ids(tokenizer.eos_token)

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=1024
            ).to("cuda")  # Send input tensors to GPU

            # Generate the output
            outputs = model.generate(
                **inputs,
                max_new_tokens=3,  # Limit the response length
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # print(response)

            # Extract the category after "### Response:"
            if "### Response:" in response:
                category = response.split("### Response:")[-1].strip()
            else:
                category = "Invalid Response"  # Fallback if the format is incorrect

            print(f"Review: {review}")
            print(f"Predicted Label: {category}")

            # Write the review and predicted label to the output CSV
            writer.writerow([review, category])

            # Increment the counter
            review_count += 1

print(f"Predictions for the first 500 reviews saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

Review: You will have to remove the window which is very easy and be very careful.  drop the window down to allow access to both screws holding the glass.  Lift the window up a bit and then drop the front of the windows down into the door slowly and then raise the rear of the glass up and you will start to lift the glass at a angle upwards and out of the door frame.  You will then be able to access the three screws to remove the door handle and you will only have to remove the cable pin from the door once you have access to the back of the handle.The process to remove the door handles is very very easy and you will need a P2 and P3 screw driver and a 10mm socket with extension.
Predicted Label: Wrong Size
Review:  It does what it is supposed to! Sure it does not come with any instructions, why take a star away for that? Once I got the power connected to it the correct way, the unit kicks on and off solidly. There is some audible  #34;clicking #34; when the relay switches on and off, so

In [12]:
import csv
import pandas as pd
import time

# Define the valid categories and their standardized replacements
valid_categories = {
    "Correct Size": "Correct Size/Just Right",
    "Wrong Size": "Wrong Size",
    "No Comment": "No Comment"
}

# Function to standardize categories
def standardize_category(predicted_label):
    for category, replacement in valid_categories.items():
        if category in predicted_label:
            return replacement
    return "Invalid Response"  # Default for invalid categories

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_first_500_multi_few_shot.csv"  # File with model predictions
output_file = "fit_predictions_first_500_multi_few_shot_cleaned.csv"  # Cleaned predictions file

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Standardize the PredictedLabel column
df_predictions["PredictedLabel"] = df_predictions["PredictedLabel"].apply(standardize_category)

# Save the cleaned predictions to a new CSV file
df_predictions.to_csv(output_file, index=False)
print(f"Cleaned predictions saved to {output_file}.")

# Ensure only the first 500 rows are used for comparison
# df_input = df_input.head(500)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Cleaned predictions saved to fit_predictions_first_500_multi_few_shot_cleaned.csv.
Comparison of Predicted vs. Actual:
                                            ReviewText  \
0    You will have to remove the window which is ve...   
1     It does what it is supposed to! Sure it does ...   
2    Item was delivered on time and was a direct re...   
3     This was a really great part, shipped fast, a...   
4    This puller worked getting off a stubborn wipe...   
..                                                 ...   
495   If you have the rear defrost then this is not...   
496   It's just a bit noisier than the factory Bosc...   
497  This is the 3rd radiator my husband has ordere...   
498   This was very easy to install and a necessary...   
499   This liquid epoxy product repaired the crack ...   

                   FINAL Fit           PredictedLabel  Match  
0                 No Comment               Wrong Size  False  
1                 No Comment  Correct Size/Just Right  Fal

In [16]:

category_counts = df_predictions["PredictedLabel"].value_counts()
print("Category Distribution:\n", category_counts)

Category Distribution:
 PredictedLabel
No Comment    500
Name: count, dtype: int64


# Full Dataset

In [None]:
import csv
import time
import pandas as pd

# Define the file path for the input reviews
input_file = "fit.csv"
output_file = "fit_predictions_full_dataset.csv"

start_time = time.time()

# Prepare to write results to a new CSV file
with open(output_file, mode="w", newline="") as out_csv:
    writer = csv.writer(out_csv)
    writer.writerow(["ReviewText", "PredictedLabel"])  # Write headers

    # Read and process each review from the input CSV file
    with open(input_file, mode="r") as in_csv:
        reader = csv.DictReader(in_csv)
        for row in reader:
            review = row["ReviewText"]

            prompt = f"""### Instruction:
    Classify the following review into one of the categories: "Correct Size/Just Right," "Wrong Size," or "No Comment."
    Respond only with the category name.

    ### Categories:
    1. Correct Size/Just Right: The product fits as expected and performs its intended function without issues.
    2. Wrong Size: The product does not fit or requires modifications to work correctly.
    3. No Comment: The review does not mention size or fitting issues.

    ### Examples:
    1. "I put this into a 1999 Mazda Miata MX-5.  It fit perfectly.  It came fully charged.  Delivered it saved me $50.  Great deal.  Love it." -> Correct Size/Just Right
    2. "I have even used this to start my dodge 2500 which has a heavy duty battery for starting and it worked great. The light pulls out to expose a 12v car adapter socket." -> Correct Size/Just Right
    3. "It was not the exact match. I had to rewire the battery in order to make it work. It was a toy for my Lil man. I am glad that I was able to to make it work.  But make sure you can iuse it." -> Wrong Size
    4. "two different ends on cables. doesn't make sense. had to change the end on one side to fit it to the battery." -> Wrong Size
    5. "I would recommend this product.  It lasts long and works fine.  Did the job for me.  It was a good price." -> No Comment

    ### Review:
    {review}
    ### Response:
    """

            # Tokenize the input
            inputs = tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                padding=True,
                max_length=1500
            ).to("cuda")  # Send input tensors to GPU

            outputs = model.generate(
                **inputs,
                max_new_tokens=20,  # Adjust for a longer response window
                temperature=0.7,    # Adds randomness; lower values make output deterministic
                top_p=0.9,          # Nucleus sampling
                eos_token_id=tokenizer.eos_token_id
            )

            # Decode the response and clean it
            response = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

            # Extract only the classification label cleanly
            if "Classification ->" in response:
                response = response.split("Classification ->")[-1].split("\n")[0].strip()
            else:
                # If no proper format, default to "No Comment" for robustness
                response = "No Comment"

            # Write the review and predicted label to the output CSV
            writer.writerow([review, response])

print(f"Predictions for the entire dataset saved to {output_file}.")

# End timing
end_time = time.time()

# Print runtime
runtime = end_time - start_time
print(f"Runtime: {runtime:.2f} seconds")

# File paths
input_file = "fit.csv"  # Original file with actual labels
predictions_file = "fit_predictions_full_dataset.csv"  # File with model predictions

# Load input and prediction files as DataFrames
df_input = pd.read_csv(input_file)
df_predictions = pd.read_csv(predictions_file)

# Combine DataFrames for comparison
# Use "ReviewText" as the matching key
comparison_df = pd.merge(
    df_input, 
    df_predictions, 
    on="ReviewText", 
    how="inner"
)

# Compare the 'FINAL Fit' column with 'PredictedLabel'
comparison_df["Match"] = comparison_df["FINAL Fit"] == comparison_df["PredictedLabel"]

# Calculate accuracy
accuracy = comparison_df["Match"].mean()

# Display results
print("Comparison of Predicted vs. Actual:")
print(comparison_df[["ReviewText", "FINAL Fit", "PredictedLabel", "Match"]])

print(f"\nAccuracy: {accuracy * 100:.2f}%")

Challenges:

- Imbalanced dataset
- End token issue
- Token size - output being padded with prompt : Model good at generating text, but does not perform tasks that well. It is a model, not an assistant.