# BART BASE FINE-TUNED ON CUSTOM AMAZON DATASET - TEST FILE

In [1]:
!pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25ldone
[?25h  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24954 sha256=d5602a5a977556a4dcc51e93ccf88fa5a201377fb672855df164aaedaaa74fe4
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [2]:
import torch
import json
import pandas as pd
import os
import numpy as np
import re
from rouge_score import rouge_scorer



## Test Set

In [3]:
path_meta = "/kaggle/input/test-file-cell-bart/Cell_Phones_and_Accessories_testflie.json"

with open(path_meta,'r') as file:
    test_data = json.load(file)
len(test_data)

20

In [4]:
from huggingface_hub import login
login(token= "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")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## BART BASE testing

In [5]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("Vinayak1699/bart-base-ecom-qa")
model = AutoModelForSeq2SeqLM.from_pretrained("Vinayak1699/bart-base-ecom-qa")

Downloading (…)okenizer_config.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.74k [00:00<?, ?B/s]

caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so: undefined symbol: _ZN3tsl6StatusC1EN10tensorflow5error4CodeESt17basic_string_viewIcSt11char_traitsIcEENS_14SourceLocationE']
caused by: ['/opt/conda/lib/python3.10/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: undefined symbol: _ZTVN10tensorflow13GcsFileSystemE']


Downloading pytorch_model.bin:   0%|          | 0.00/558M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/292 [00:00<?, ?B/s]

In [6]:
# function extracts answers, questions, and contexts from the dataset
def data_prep(data):
    contexts = []
    questions = []
    answers = []
    for prod in data:
        context = prod['context']
        for i in range(len(prod['qas'])):
            question = prod['qas'][i]['question']
            answer = prod['qas'][i]['answer']['text']
            contexts.append(context)
            questions.append(question)
            answers.append(answer)
    return contexts,questions,answers

test_contexts,test_questions,test_answers = data_prep(test_data)

In [7]:
# function tokenizes the questions and contexts together and labels seprately
def encode_prep(questions,contexts,answers):
    encode_qa = tokenizer(questions,contexts,truncation=True,padding="max_length",max_length = 512,pad_to_max_length=True,
                          add_special_tokens=True)
    encode_ans = tokenizer(answers,truncation=True,padding="max_length",max_length = 25,pad_to_max_length=True,
                          add_special_tokens=True)
    labels = encode_ans["input_ids"]
    encode_qa.update({'labels':labels,"decoder_attention_mask":encode_ans["attention_mask"]})

    return encode_qa

test_embedding = encode_prep(test_questions,test_contexts,test_answers)

In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
batch = torch.tensor(test_embedding['input_ids']).to(device)
batch.shape

torch.Size([139, 512])

## Generating Answers

In [9]:
output = model.generate(input_ids=batch)
pred_answers = tokenizer.batch_decode(output, skip_special_tokens=True)



In [10]:
# fucntion calculates average Exact Match, F1 score
def metrics(predictions, answers):
    EM_ls = []
    f1_ls = []
    for prediction,answer in zip(predictions,answers):
        EM = 0
        if prediction.lower() == answer.lower():
            EM = 1
            EM_ls.append(EM)
        else:
            EM_ls.append(EM)
        prediction_tokens = prediction.lower().split()
        answer_tokens = answer.lower().split()

        common_tokens = set(prediction_tokens) & set(answer_tokens)

        if len(prediction_tokens) == 0 or len(answer_tokens) == 0:
            f1 = 0
            f1_ls.append(f1)
        else:
            precision = len(common_tokens) / len(prediction_tokens)
            recall = len(common_tokens) / len(answer_tokens)

            if precision + recall == 0:
                f1 = 0
                f1_ls.append(f1)
            else:
                f1 = (2 * precision * recall) / (precision + recall)
                f1_ls.append(f1)
    f1_scores = sum(f1_ls)/len(f1_ls)
    EM_scores = sum(EM_ls)/len(EM_ls)
    return f1_scores,EM_scores

In [11]:
# fucntion calculates F1 score
def f1_score_metric(prediction, answer):
    prediction_tokens = prediction.lower().split()
    answer_tokens = answer.lower().split()

    common_tokens = set(prediction_tokens) & set(answer_tokens)

    if len(prediction_tokens) == 0 or len(answer_tokens) == 0:
        return 0

    precision = len(common_tokens) / len(prediction_tokens)
    recall = len(common_tokens) / len(answer_tokens)

    if precision + recall == 0:
        return 0

    f1 = (2 * precision * recall) / (precision + recall)
    return f1

## Metric Calculations on Test Set

In [12]:
f1_score,exact_match = metrics(pred_answers,test_answers)
print("F1 score on test set: ",f1_score)
print("Exact Match on test set: ",exact_match)

F1 score on test set:  0.839130540780985
Exact Match on test set:  0.697841726618705


In [13]:
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

## Examples

In [14]:
print("Context:",'\n',test_contexts[0])
print("\n")
print("Question: ",test_questions[0])
print("Generated Answer: ",pred_answers[0])
print("True Answer: ",test_answers[0])
rouge_score = scorer.score(pred_answers[0],test_answers[0])
EM_SC = int(pred_answers[0] == test_answers[0])
F1_SC =  f1_score_metric(pred_answers[0],test_answers[0])
print("F1 score: ",F1_SC)
print("Exact Match: ",EM_SC)
print("Rouge score: ",rouge_score)

Context: 
 It is a EMPIRE product. This Samsung Galaxy S i pink butterfly case cover provides excellent protection from dust scratches and unwanted blemishes. The Samsung Galaxy S i pink butterfly case cover also allows for full functionality of your phone with openings for all buttons ports jacks and speakers. Provide your phone with excellent protection and give it a fashionable and attractive look at the same time with this Samsung Galaxy S i pink butterfly. Safe case removal tool included. Premium high quality snap on hard cover case protector. Designed to fit phone perfectly. Snaps right over your phone and gives it a new look while providing great protection. Openings for full phone functionality. Constructed for strong durability and scratch resistance. EMPIRE TM is a registered trademark with the USPTO. Categories of product are Cell Phones Accessories Cases Holsters Sleeves Basic Cases


Question:  What is the name of the product?
Generated Answer:  EMPIRE
True Answer:  EMPIRE

In [15]:
print("Context:",'\n',test_contexts[7])
print("\n")
print("Question: ",test_questions[7])
print("Generated Answer: ",pred_answers[7])
print("True Answer: ",test_answers[7])
rouge_score = scorer.score(pred_answers[7],test_answers[7])
EM_SC = int(pred_answers[7] == test_answers[7])
F1_SC =  f1_score_metric(pred_answers[7],test_answers[7])
print("F1 score: ",F1_SC)
print("Exact Match: ",EM_SC)
print("Rouge score: ",rouge_score)

Context: 
 It is a EMPIRE product. This Samsung Galaxy S i pink butterfly case cover provides excellent protection from dust scratches and unwanted blemishes. The Samsung Galaxy S i pink butterfly case cover also allows for full functionality of your phone with openings for all buttons ports jacks and speakers. Provide your phone with excellent protection and give it a fashionable and attractive look at the same time with this Samsung Galaxy S i pink butterfly. Safe case removal tool included. Premium high quality snap on hard cover case protector. Designed to fit phone perfectly. Snaps right over your phone and gives it a new look while providing great protection. Openings for full phone functionality. Constructed for strong durability and scratch resistance. EMPIRE TM is a registered trademark with the USPTO. Categories of product are Cell Phones Accessories Cases Holsters Sleeves Basic Cases


Question:  Openings for what?
Generated Answer:  full phone functionality.
True Answer:  f

In [16]:
print("Context:",'\n',test_contexts[32])
print("\n")
print("Question: ",test_questions[32])
print("Generated Answer: ",pred_answers[32])
print("True Answer: ",test_answers[32])
rouge_score = scorer.score(pred_answers[32],test_answers[32])
EM_SC = int(pred_answers[32] == test_answers[32])
F1_SC =  f1_score_metric(pred_answers[32],test_answers[32])
print("F1 score: ",F1_SC)
print("Exact Match: ",EM_SC)
print("Rouge score: ",rouge_score)

Context: 
 It is a Toggle product. Mobile SIM card operate with GSM technology. Prepaid SIM card so there is no need for a contract. Enjoy cheap rate calling from local to local or to international destination. Lowest rates at SMS GPRS and other Data service. Please contact corresponding service provider or contact us before purchasing this product to know rates and Terms Conditions. SIM Card for The Netherlands UK Australia Denmark Norway Spain and Sweden National calls EUR min Internatonal Calls starting at EUR min Incl EUR call credit Categories of product are Cell Phones Accessories SIM Cards Prepaid Minutes SIM Cards


Question:  What is the name of the product?
Generated Answer:  Toggle
True Answer:  Toggle
F1 score:  1.0
Exact Match:  1
Rouge score:  {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rouge2': Score(precision=0.0, recall=0.0, fmeasure=0.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}


In [17]:
print("Context:",'\n',test_contexts[50])
print("\n")
print("Question: ",test_questions[50])
print("Generated Answer: ",pred_answers[50])
print("True Answer: ",test_answers[50])
rouge_score = scorer.score(pred_answers[50],test_answers[50])
EM_SC = int(pred_answers[50] == test_answers[50])
F1_SC =  f1_score_metric(pred_answers[50],test_answers[50])
print("F1 score: ",F1_SC)
print("Exact Match: ",EM_SC)
print("Rouge score: ",rouge_score)

Context: 
 It is a Milante product. Brand new Milante messina universal case carry your phone in style with this Milante messina cell phone pouch that features a ratcheting belt clip. Use with small flip type phones. Secures with closure flap. Black with gray piping messina universal case with ratcheting belt clip compatible with audiovox CDM CDM CDM Kyocera KX SLIDER SOHO LG AX AX CG CG LG UX VX VX VX Motorola C T C T G T C V V V V V V V V V i V V V V V V i GSM V s V V Nextel i i Pantech PN Samsung U U D X X X M . Sanyo SCP . Siemens CF T Sony Ericsson Z Z A many more compatible with all similar sized phones not listed above phone not included. Ratcheting belt clip Use with small flip type phones Secures with closure flap Brand new Milante messina universal case Categories of product are Cell Phones Accessories Cases Holsters Sleeves Holsters


Question:  What are the categories of product?
Generated Answer:  Cell Phones Accessories Cases Holsters Sleeves
True Answer:  Cell Phones Acc

In [18]:
print("Context:",'\n',test_contexts[130])
print("\n")
print("Question: ",test_questions[130])
print("Generated Answer: ",pred_answers[130])
print("True Answer: ",test_answers[130])
rouge_score = scorer.score(pred_answers[130],test_answers[130])
EM_SC = int(pred_answers[130] == test_answers[130])
F1_SC =  f1_score_metric(pred_answers[130],test_answers[130])
print("F1 score: ",F1_SC)
print("Exact Match: ",EM_SC)
print("Rouge score: ",rouge_score)

Context: 
 It is a Generic product. This premium skin case provides your Huawei M the maximum protection against scratches and scuffs enabling you to keep your Huawei M in a new condition and preserving its looks and features. Made with grade A silica gel this case is not only durable and long lasting but also equips the Huawei M with a soft and comfortable surface. Furthermore all the openings for the dock connector charger side buttons speaker headphone jack and camera were precisely cut to allow full access to all of the Huawei M s functions. It is user friendly and super easy to install. A case like this has all the combined beauty with functionality by utilizing high quality materials and fine craftsmanship. This is truly the perfect case for your valuable Huawei M . Provides your Huawei M the maximum protection against scratches and scuffs. Made with grade A silica gel this case is not only durable and long lasting but also equips the Huawei M with a soft and comfortable surface.

## User Input Test

In [19]:
context = str(input("Put your Product Description Here: "))
question = str(input("Ask Question related product: "))
context = re.sub(r"[^a-zA-Z0-9.!?]+", r" ", context)
question = re.sub(r"[^a-zA-Z0-9.!?]+", r" ", question)
inp = question+' '+context
input_ids = tokenizer.encode(inp, return_tensors='pt')
ans = model.generate(input_ids.to(device))
final_ans = tokenizer.batch_decode(ans, skip_special_tokens = True)
print("\n")
print("Question: ",question)
print("Generated Answer: ",final_ans[0])

Put your Product Description Here:  Google Tensor G2 makes Pixel 7 faster, more efficient and more secure than previous Pixel phones.[1] And it delivers even more helpful features and the best photo and video quality yet on a Pixel.It's easy to see what's on your Pixel screen. It adjusts to high brightness in sunshine, and automatically dims in the dark.The Pixel 7 display is super sharp, with rich vivid colours. Andit's fast and response for smooth gaming, scrolling and moving between apps.[2][1]Compared to Pixel 6. Speed and efficiency claims based on internal testing on pre-production devices.[2]Based on the use of Smooth Display (not available for all apps or content). Full-screen 160.5-mm (6.3-in)1 display
Ask Question related product:  How is Pixel 7 display?




Question:  How is Pixel 7 display?
Generated Answer:  super sharp
