In [None]:
# libraryes required
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer, util

In [2]:
# reading data from df.xlsx
data_frame = pd.read_excel('df.xlsx')
data_frame

Unnamed: 0,asin,question,answer
0,B00004U9JP,I have a 9 year old Badger 1 that needs replac...,I replaced my old one with this without a hitch.
1,B00004U9JP,model number,This may help InSinkErator Model BADGER-1: Bad...
2,B00004U9JP,can I replace Badger 1 1/3 with a Badger 5 1/2...,Plumbing connections will vary with different ...
3,B00004U9JP,Does this come with power cord and dishwasher ...,It does not come with a power cord. It does co...
4,B00004U9JP,loud noise inside when turned on. sounds like ...,Check if you dropped something inside.Usually ...
...,...,...,...
9003,B00L07RWJE,How come it is compatible with all brands?,The Woder 5K Inline Water Filter for Ice ice m...
9004,B00L07RWJE,Is it compatible to replace my Maytag UKF8001 ...,"Yes, of course. The Woder Fridge Filter fits b..."
9005,B00L07RWJE,I cannot find any official reference to testin...,'Woder' is a Clearbrook LLC / Sunrise Solution...
9006,B00L07RWJE,"Can it be installed inside the refrigerator, a...",I can't say for certain but it would require c...


In [3]:
#reading encodings from excel file
embeddings_dataframe = pd.read_excel("amz_encode.xlsx")

In [None]:
# Load model from HuggingFace Hub
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')
model = AutoModel.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

In [5]:
#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


In [6]:
def result(asin,query):
    #create a dataframe with columns a s questions &answers
    qa_dataframe_ofrequired_product=data_frame[data_frame['asin']==asin][['question','answer']]
    
    #list of questions
    question_list=qa_dataframe_ofrequired_product['question'].values.tolist()
    
    #list of answers
    answer_list=qa_dataframe_ofrequired_product['answer'].values.tolist()
    
    sd=embeddings_dataframe[embeddings_dataframe['asin']==asin]
    
    embeded_query_list= sd[list(range(384))].values.tolist()
    # Tokenize sentences

    encoded_input = tokenizer(query, padding=True, truncation=True, return_tensors='pt')

    # Compute token embeddings
    
    model_output = model(**encoded_input)

    # Perform pooling
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

    # Normalize embeddings
    query_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

    #Compute cosine similarity between query and all document embeddings
    scores = util.cos_sim(query_embeddings, embeded_query_list)[0].cpu().tolist()
    #Combine cosine & scores
    cosine_pairs = list(zip(scores,question_list,answer_list))
    #Sort by decreasing score
    cosine_pairs = sorted(cosine_pairs, key=lambda x: x[0], reverse=True)
    #for cosine, score in cosine_pairs:
    #    print(score, doc)
    e={}
    #print(cosine_pairs)
    
    return cosine_pairs 


In [None]:
from flask import Flask, render_template, request, jsonify


app = Flask(__name__,template_folder=r'C:\Users\divya\Desktop\Team A11\code\fend\templates')

# Define the route for the HTML webpage
@app.route('/')
def index():
    return render_template('index.html')

# Define the route to handle the POST request
@app.route('/process', methods=['POST'])
def process():
    # Get the input values from the HTML form
    input1 = request.form['input1']
    input2 = request.form['input2']

    # Call your Python function with the input values
    #print(input1,input2)
    # Replace this with your own Python code that performs the desired functionality
    results = result(input1,input2)
    #print(resultss)
    # Return the results as JSON data
    return jsonify(results)

if __name__ == '__main__':
    app.run(debug=False, port=5000, host='0.0.0.0')