In [1]:
import os


os.chdir('..')
[x for x in os.listdir() if not x.startswith('.')]

['data', 'img', 'models', 'notebooks']

## Load the Models

In [2]:
from models.model_hub import models

for model_name, model_obj in models.items():
    print(model_name, model_obj)

roberta <transformers.pipelines.question_answering.QuestionAnsweringPipeline object at 0x0000019FC56F8040>
deberta <transformers.pipelines.question_answering.QuestionAnsweringPipeline object at 0x0000019FC5C27F70>
distilbert <transformers.pipelines.question_answering.QuestionAnsweringPipeline object at 0x0000019FC39C0400>


## Load the Data

In [3]:
import pandas as pd
from glob import glob

data = glob('data/*.csv')

syrovarnya = pd.read_csv([x for x in data if 'syrovarnya' in x][0])
print(syrovarnya.shape)
print(syrovarnya.columns)
syrovarnya.head(3)

(204831, 25)
Index(['Unnamed: 0', 'noun', 'comments', 'rated_indiv', 'date_post',
       'reviewer', 'number_of_reveiws', 'loc_reviewer', 'restaurant_name',
       'review_translated', 'review_full', 'tokenized_text', 'adjectives',
       'value', 'Sentiment_adj1_pos', 'Sentiment_adj1_neutral',
       'Sentiment_adj1_neg', 'Bigram', 'Sentiment_bigram_pos',
       'Sentiment_bigram_neutral', 'Sentiment_bigram_neg',
       'Sentiment_text_pos', 'Sentiment_text_neutral', 'Sentiment_text_neg',
       'noun_grouped'],
      dtype='object')


Unnamed: 0.1,Unnamed: 0,noun,comments,rated_indiv,date_post,reviewer,number_of_reveiws,loc_reviewer,restaurant_name,review_translated,...,Sentiment_adj1_neutral,Sentiment_adj1_neg,Bigram,Sentiment_bigram_pos,Sentiment_bigram_neutral,Sentiment_bigram_neg,Sentiment_text_pos,Sentiment_text_neutral,Sentiment_text_neg,noun_grouped
0,0,*,Mi stupisco come alcuni abbiamo dato 5* a ques...,10,"February 12, 2023",['enstrop'],['5 reviews'],[],Lavash,I am amazed how some have given 5* to this res...,...,1.0,0.0,NA *,0.0,1.0,0.0,0.262,0.738,0.0,*
1,1,restaurant,Mi stupisco come alcuni abbiamo dato 5* a ques...,10,"February 12, 2023",['enstrop'],['5 reviews'],[],Lavash,I am amazed how some have given 5* to this res...,...,1.0,0.0,NA restaurant,0.0,1.0,0.0,0.262,0.738,0.0,food
2,2,food,Mi stupisco come alcuni abbiamo dato 5* a ques...,10,"February 12, 2023",['enstrop'],['5 reviews'],[],Lavash,I am amazed how some have given 5* to this res...,...,1.0,0.0,NA food,0.0,1.0,0.0,0.0,0.639,0.361,food


In [4]:
columns_dev = ['date_post', 'review_full', 'value', 'Sentiment_text_pos', 'Sentiment_text_neutral', 'Sentiment_text_neg']
dev = syrovarnya[columns_dev].copy()
dev.drop_duplicates(inplace=True)
dev.head()

Unnamed: 0,date_post,review_full,value,Sentiment_text_pos,Sentiment_text_neutral,Sentiment_text_neg
0,"February 12, 2023",i am amazed how some have given 5* to this res...,,0.262,0.738,0.0
2,"February 12, 2023",i am amazed how some have given 5* to this res...,,0.0,0.639,0.361
3,"February 9, 2023","we were there twice, incredibly good tasty foo...",good,0.262,0.738,0.0
4,"February 9, 2023","we were there twice, incredibly good tasty foo...",reasonable,0.262,0.738,0.0
5,"February 9, 2023","we were there twice, incredibly good tasty foo...",,0.0,1.0,0.0


In [5]:
with open('models/QA_template.txt', 'r') as template:
    questions = template.readlines()
questions

['what are the reasons that the reviewer like the restaraunt?\n',
 'what are the reasons that make the reviewer like the restaraunt?\n',
 'why the client liked this restaraunt (the reasons)?\n',
 'what things/products the reviewer like about the restaraunt?\n',
 'what things, products, anything the reviewer like about the restaraunt?']

## Prepare the Reviews (Test set)

***Define (manually  or with code) your desired sample of reviews (else use the below code snippet that selects randomly (uniform)***

In [6]:
number_of_reviews = 10   # change if neccasssary
reviews = dev.sample(number_of_reviews).review_full.tolist()
for review in reviews:
    print(review, end='\n\n')

great place, delicious food, beautiful presentation, nice staff. there is food for vegetarians. large selection of dishes. desserts separate love

not only was the food delicious and the service was remarkable .... but they provided so many traditional armenian foods for free!! this was great! a must visit!!!! amazing coffee and dessert 😍😍😍😍

we were looking for a tasty meal in yerevan, one of our armenian friends recommended this restaurant. it's very atmospheric, everything is home-style, very, very tasty! and the money is very affordable!

cosy, friendly atmosphere, delicious armenian food and exceptional service. staff is very professional. this is definitely one of the best restaurants in yerevan (among yerevan tavern and sherep), which i'll recommend to all.

i saw in the comments and articles that the khinkali here is the best in yerevan. it may be because of the holiday. there was no seat for the first time in the evening. we still ordered to eat at noon the next day. the khink

## Test the Q&A Models

In [7]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from models.inference import run_pipeline


cols = ['Review', 'Question/Template', 'Model', 'Answer']
empty_df = np.zeros([len(reviews)*len(questions)*len(models), len(cols)])
metadata = pd.DataFrame(empty_df, columns = cols, index=range(empty_df.__len__()))


row = 0
for review in tqdm(reviews):
    for template in questions:
        for model_name, model in models.items():
            ans = run_pipeline(model, template, review)
            metadata.iloc[row, :] = [review, template, model_name, ans]
            row += 1

metadata.head(3)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:33<00:00,  3.30s/it]


Unnamed: 0,Review,Question/Template,Model,Answer
0,"great place, delicious food, beautiful present...",what are the reasons that the reviewer like th...,roberta,"great place, delicious food, beautiful present..."
1,"great place, delicious food, beautiful present...",what are the reasons that the reviewer like th...,deberta,"delicious food, beautiful presentation, nice ..."
2,"great place, delicious food, beautiful present...",what are the reasons that the reviewer like th...,distilbert,"great place, delicious food, beautiful present..."


### Evaluation

#### Let's examine a particular review

In [12]:
target_review = reviews[1]  # random id
target_review

'not only was the food delicious and the service was remarkable .... but they provided so many traditional armenian foods for free!! this was great! a must visit!!!! amazing coffee and dessert 😍😍😍😍'

In [16]:
def get_eval_data(target: str, verbose: bool = False) -> pd.DataFrame:
    """Returns the filtered (by review) dataframe"""
    if verbose:
        print(target_review, '\n')
    sub_data = metadata[metadata.Review==target_review]
    eval_data = sub_data.iloc[:, 1:]  # first column is the Review column: all the values are the same == target_review
    return eval_data

get_eval_data(target_review)

Unnamed: 0,Question/Template,Model,Answer
15,what are the reasons that the reviewer like th...,roberta,amazing coffee and dessert
16,what are the reasons that the reviewer like th...,deberta,they provided so many traditional armenian fo...
17,what are the reasons that the reviewer like th...,distilbert,they provided so many traditional armenian foo...
18,what are the reasons that make the reviewer li...,roberta,amazing coffee and dessert
19,what are the reasons that make the reviewer li...,deberta,they provided so many traditional armenian fo...
20,what are the reasons that make the reviewer li...,distilbert,they provided so many traditional armenian foo...
21,why the client liked this restaraunt (the reas...,roberta,they provided so many traditional armenian foo...
22,why the client liked this restaraunt (the reas...,deberta,the food delicious and the service was remark...
23,why the client liked this restaraunt (the reas...,distilbert,they provided so many traditional armenian foo...
24,what things/products the reviewer like about t...,roberta,amazing coffee and dessert


In [17]:
target_review = reviews[2]
get_eval_data(target_review, True)

we were looking for a tasty meal in yerevan, one of our armenian friends recommended this restaurant. it's very atmospheric, everything is home-style, very, very tasty! and the money is very affordable! 



Unnamed: 0,Question/Template,Model,Answer
30,what are the reasons that the reviewer like th...,roberta,the money is very affordable
31,what are the reasons that the reviewer like th...,deberta,"it's very atmospheric, everything is home-style,"
32,what are the reasons that the reviewer like th...,distilbert,money is very affordable
33,what are the reasons that make the reviewer li...,roberta,the money is very affordable
34,what are the reasons that make the reviewer li...,deberta,"it's very atmospheric, everything is home-style,"
35,what are the reasons that make the reviewer li...,distilbert,money is very affordable
36,why the client liked this restaraunt (the reas...,roberta,the money is very affordable
37,why the client liked this restaraunt (the reas...,deberta,"it's very atmospheric, everything is home-style,"
38,why the client liked this restaraunt (the reas...,distilbert,the money is very affordable
39,what things/products the reviewer like about t...,roberta,everything is home-style


In [18]:
# syrovarnya.sample(1000).to_csv('small_syrovarnya.csv')