In [77]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import tensorflow
import pickle
import os


In [178]:

#path_full_dataset = os.path.join(os.path.dirname(__file__),'data/scrapping_cleaned_sentences.csv')
#path_model = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'model')
#path_embed=os.path.join(os.path.dirname(os.path.dirname(__file__)),'embeddings.pkl')

df_full = pd.read_csv("/Users/manoharan/code/fobokiller/fobokiller/data/scrapping_cleaned_sentences.csv",index_col=0)
model = SentenceTransformer("/Users/manoharan/code/fobokiller/model")

df_all_resto = df_full.groupby('alias').agg({ 'rate':'mean',
                                                'review':'nunique'
                                            })

def load_embedding():
    with open("/Users/manoharan/code/fobokiller/embeddings.pkl", 'rb') as file:
        embedding = pickle.load(file)
    return embedding['embeddings']


def compute_sim_df(text, embedding, n_prox=None, min_review=0):
    input_encoded = model.encode(text)
    similarities = util.cos_sim(input_encoded, np.array(embedding))

    df_sim = df_full.assign(sim=similarities.T)

    if n_prox:
        df_sentences = df_sim.sort_values('sim', ascending=False)[:n_prox]
    else:
        df_sentences = df_sim.sort_values('sim', ascending=False)

    df_agg = df_sentences.groupby('alias').agg({
        'rate': 'mean',
        'review': 'nunique',
        #'review_sentences':'first',
        #'review_clean':lambda txt: ' // '.join(txt),
        'sim':'mean'
    })

    df_resto = df_agg.merge(df_all_resto,
                             on='alias',
                             how='left',
                             suffixes=('_filtered', '_all')).reset_index()

    df_resto['ratio'] = df_resto['review_filtered'] / df_resto['review_all']

    df_resto = df_resto.sort_values('ratio')
    df_resto = df_resto[df_resto['review_all'] > min_review]

    df_final = df_sentences.merge(df_resto,
                                 on='alias',
                                 how='left',
                                 suffixes=('_s', '_r'))

    df_final['metric']=df_final['ratio']*df_final['sim_r']*df_final['rate_filtered']/5

    df_final.drop(columns=['review'],inplace=True)

    return df_final


In [179]:
result = compute_sim_df("I want to eat close to eiffel tower",embedding,3000,10)


In [191]:
result.sort_values("metric", ascending = False)

Unnamed: 0,alias,date,rate,review_clean,review_sentences,sim_s,rate_filtered,review_filtered,sim_r,rate_all,review_all,ratio,metric
41,6-new-york-paris,2018-09-15,5,we stumbled upon this restaurant while explori...,we stumbled upon this restaurant while explori...,0.811306,4.8,5.0,0.716983,4.352941,11.0,0.454545,0.312865
407,6-new-york-paris,2015-09-30,4,lovely!! came here for my bday to have the mys...,"the restaurant itself is nice, with a view of...",0.718040,4.8,5.0,0.716983,4.352941,11.0,0.454545,0.312865
479,6-new-york-paris,2016-09-13,5,delicious food and a view of the eiffel tower....,delicious food and a view of the eiffel tower,0.711247,4.8,5.0,0.716983,4.352941,11.0,0.454545,0.312865
912,6-new-york-paris,2015-09-06,5,"a good friend recommended the spot, and since ...","a good friend recommended the spot, and since ...",0.682693,4.8,5.0,0.716983,4.352941,11.0,0.454545,0.312865
1452,6-new-york-paris,2018-03-18,5,we had our first dinner in paris in this resta...,the place was awesome and we had a view of ei...,0.661631,4.8,5.0,0.716983,4.352941,11.0,0.454545,0.312865
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1996,le-bœuf-sur-le-toit-paris-2,2017-06-21,3,nice place close to ce where you have the expe...,nice place close to ce where you have the expe...,0.647401,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000
1057,là-haut-paris,2018-12-21,5,this food is so good! was pleasantly surprised...,i would recommend this place to anyone stayin...,0.675670,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000
365,crêperie-framboise-passy-trocadéro-paris-4,2018-11-10,5,small creperie steps away from the eiffel towe...,"if you're hanging around the eiffel tower, do...",0.723445,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000
2222,l-entredgeu-paris,2013-05-24,5,this is by far my favorite place to eat in par...,i would recommend this place to anyone visiti...,0.643674,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.000000


In [180]:
result[["alias","rate_filtered","ratio","metric","sim_r"]]

Unnamed: 0,alias,rate_filtered,ratio,metric,sim_r
0,le-bosquet-paris,4.200000,0.219512,0.130236,0.706308
1,firmine-paris,4.366071,0.276882,0.171649,0.709947
2,firmin-le-barbier-paris,4.666667,0.340909,0.234892,0.738233
3,la-gourmandise-paris-5,4.333333,0.171429,0.105928,0.712976
4,le-castel-café-paris-3,3.000000,0.235772,0.099975,0.706717
...,...,...,...,...,...
2995,la-cantine-du-troquet-dupleix-paris,4.666667,0.088235,0.054008,0.655807
2996,firmine-paris,4.366071,0.276882,0.171649,0.709947
2997,le-bailli-de-suffren-paris,1.789474,0.103030,0.025604,0.694371
2998,le-temps-des-cerises-paris-5,4.916667,0.043165,0.027678,0.652075


In [181]:
result.head(10)

Unnamed: 0,alias,date,rate,review_clean,review_sentences,sim_s,rate_filtered,review_filtered,sim_r,rate_all,review_all,ratio,metric
0,le-bosquet-paris,2017-08-17,4,amazing grill whole seabass fish to perfection...,good place to eat near the eiffle tower,0.882026,4.2,36.0,0.706308,3.848438,164.0,0.219512,0.130236
1,firmine-paris,2017-08-11,4,great place to eat if you want to remain close...,great place to eat if you want to remain close...,0.870985,4.366071,103.0,0.709947,3.90325,372.0,0.276882,0.171649
2,firmin-le-barbier-paris,2017-03-10,5,my wife and i ate here for our last night in p...,"if you're in the eiffel tower area, stop in a...",0.861907,4.666667,15.0,0.738233,4.30445,44.0,0.340909,0.234892
3,la-gourmandise-paris-5,2017-06-02,5,as you read this know that i used their free h...,if your looking for some delicious meal close...,0.86099,4.333333,6.0,0.712976,4.130252,35.0,0.171429,0.105928
4,le-castel-café-paris-3,2018-09-06,2,tourist trap cafe. they would not bring a cara...,do yourself a favor and get further from the ...,0.859872,3.0,29.0,0.706717,2.235955,123.0,0.235772,0.099975
5,samaya-paris-3,2019-11-24,5,my husband and i came here both for dinner one...,definitely a good place to eat when you're ne...,0.859075,4.4,5.0,0.718684,4.307692,15.0,0.333333,0.210814
6,monsieur-bleu-paris,2016-09-04,5,the food here was amazing and the outside pati...,go here if you are looking for a nice meal ne...,0.854015,4.1,27.0,0.698729,3.951006,137.0,0.19708,0.112919
7,café-de-mars-paris-3,2016-03-17,4,fantastic lunch spot and like the other review...,would recommend for a lunch near the eiffel t...,0.853944,4.846154,12.0,0.717027,4.555911,85.0,0.141176,0.098113
8,café-beaujolais-paris,2017-05-26,4,a good place to dine nearby eiffel tower. the ...,a good place to dine nearby eiffel tower,0.85275,4.121212,30.0,0.708957,3.680098,115.0,0.26087,0.15244
9,le-castel-café-paris-3,2018-11-19,1,it was cold and dreary in eiffel tower and we ...,pls guys dont be deceived to eat here coz its...,0.850791,3.0,29.0,0.706717,2.235955,123.0,0.235772,0.099975


In [182]:
result[result['alias']=='firmine-paris'].isna().sum()

alias               0
date                0
rate                0
review_clean        0
review_sentences    0
sim_s               0
rate_filtered       0
review_filtered     0
sim_r               0
rate_all            0
review_all          0
ratio               0
metric              0
dtype: int64

In [183]:
k = summary_reviews(result, 10)
k.shape

(587, 7)

In [184]:

def summary_reviews(result,n_best):
    result.fillna(0,inplace=True)

    # select n_best first restaurants with higher sim_r
    higher_sim_r = sorted(result['metric'].unique())#[-n_best - 1:]
    best_sim_r = result[result['metric'] > higher_sim_r[0]]

    reviews = best_sim_r.groupby('alias').agg({
        'review_clean': [set,'count'],
        'review_filtered':'first',
        'metric':'mean',
        "rate_filtered":"first"
    })

    reviews.rename(columns={'set':'reviews',
                            'count':'nb_sentences',
                            'first':'nb_review',
                            'mean':'metric sim_ratio',
                           "first":"rate_filtered"},inplace=True)

    reviews = reviews.droplevel(level=0, axis=1)

    reviews['sentences_pond'] = reviews['nb_sentences']/reviews['nb_sentences'].sum()
    reviews['metric_pond'] = reviews['sentences_pond'] * reviews[
        'metric sim_ratio']



    return reviews.sort_values('metric sim_ratio', ascending = False)


if __name__ == '__main__':
    load_embedding()


In [185]:
higher_sim_r = sorted(result['metric'].unique())[-n_best - 1:]

In [186]:
best_sim_r = result[result['metric'] > higher_sim_r[0]]

In [187]:
reviews = best_sim_r.groupby('alias').agg({
        'review_clean': [set,'count'],
        'review_filtered':'first',
        'metric':'mean'
    })


In [188]:
reviews

Unnamed: 0_level_0,review_clean,review_clean,review_filtered,metric
Unnamed: 0_level_1,set,count,first,mean
alias,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
6-new-york-paris,{we stumbled upon this restaurant while explor...,5,5.0,0.312865
au-bon-accueil-paris-3,{we came here for my boyfriend's bday dinner. ...,28,24.0,0.232446
aux-cerises-paris-07,"{last night, i attended a special, private din...",13,12.0,0.200761
chez-ming-paris,"{we've been traveling europe for a week, and t...",13,12.0,0.22762
firmin-le-barbier-paris,{read the reviews about this place and had to ...,15,15.0,0.234892
il-sorrentino-paris,{this was the best restaurant i went to while ...,20,17.0,0.224531
le-casse-noix-paris-15,{if you are looking for a fantastic dining exp...,10,8.0,0.211209
restaurant-de-la-tour-paris,{lovely cozy restaurant within a stone's throw...,13,12.0,0.19872
samaya-paris-3,{my husband and i came here both for dinner on...,5,5.0,0.210814
su-misura-paris,{the staff was very welcoming on christmas day...,22,21.0,0.221625


In [113]:
reviews

Unnamed: 0_level_0,review_clean,review_clean,review_filtered,metric
Unnamed: 0_level_1,set,count,first,mean
alias,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
58-tour-eiffel-paris,"{if you want good food, a nice view, and great...",31,115.0,0.269968
6-new-york-paris,{we stumbled upon this restaurant while explor...,2,5.0,0.287386
firmin-le-barbier-paris,{read the reviews about this place and had to ...,5,15.0,0.25167


In [106]:
result.fillna(0,inplace=True)


In [87]:
result = compute_sim_df("I want to eat close to eiffel tower",embedding,3000,10)

In [115]:
result.fillna(0,inplace=True)

    # select n_best first restaurants with higher sim_r
higher_sim_r = sorted(result['metric'].unique())[-n_best - 1:]
best_sim_r = result[result['metric'] > higher_sim_r[0]]

reviews = higher_sim_r.groupby('alias').agg({
        'review_clean': [set,'count'],
        'review_filtered':'first',
        'metric':'mean'
    })


In [124]:
result['metric'].unique().shape

(397,)

In [130]:
higher_sim_r

[0.248415701529559,
 0.25006916412373176,
 0.2516703104430979,
 0.25247231606839177,
 0.2548597611375964,
 0.2607212283394553,
 0.26214146859867055,
 0.2652622003677023,
 0.26851296262711044,
 0.2730640297902818,
 0.31405057049979845]

In [71]:
result = compute_sim_df("I want to eat close to eiffel tower",embedding,3000,10)

In [68]:
summary_reviews(result,10)

Unnamed: 0_level_0,reviews,nb_sentences,nb_review,metric sim_ratio,sentences_pond,metric_pond
alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
6-new-york-paris,{we stumbled upon this restaurant while explor...,5,5.0,0.312865,0.034722,0.010863
firmin-le-barbier-paris,{read the reviews about this place and had to ...,15,15.0,0.234892,0.104167,0.024468
au-bon-accueil-paris-3,{we came here for my boyfriend's bday dinner. ...,28,24.0,0.232446,0.194444,0.045198
chez-ming-paris,"{we've been traveling europe for a week, and t...",13,12.0,0.22762,0.090278,0.020549
il-sorrentino-paris,{this was the best restaurant i went to while ...,20,17.0,0.224531,0.138889,0.031185
su-misura-paris,{the staff was very welcoming on christmas day...,22,21.0,0.221625,0.152778,0.033859
le-casse-noix-paris-15,{if you are looking for a fantastic dining exp...,10,8.0,0.211209,0.069444,0.014667
samaya-paris-3,{my husband and i came here both for dinner on...,5,5.0,0.210814,0.034722,0.00732
aux-cerises-paris-07,"{last night, i attended a special, private din...",13,12.0,0.200761,0.090278,0.018124
restaurant-de-la-tour-paris,{lovely cozy restaurant within a stone's throw...,13,12.0,0.19872,0.090278,0.01794


In [66]:
result.groupby('alias').first().sort_values('metric', ascending = False).head(10)

Unnamed: 0_level_0,date,rate,review_clean,review_sentences,sim_s,rate_filtered,review_filtered,sim_r,rate_all,review_all,ratio,metric
alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
6-new-york-paris,2018-09-15,5,we stumbled upon this restaurant while explori...,we stumbled upon this restaurant while explori...,0.811306,4.8,5.0,0.716983,4.352941,11.0,0.436364,0.312865
firmin-le-barbier-paris,2017-03-10,5,my wife and i ate here for our last night in p...,"if you're in the eiffel tower area, stop in a...",0.861907,4.666667,15.0,0.738233,4.30445,44.0,0.318182,0.234892
au-bon-accueil-paris-3,2015-10-07,5,best meal we have had in paris on our entire s...,please avoid the tourist traps near the eiffe...,0.804549,4.678571,24.0,0.703844,4.521295,68.0,0.330252,0.232446
chez-ming-paris,2019-08-19,4,"good location, great food, good service. the w...",my husband and i were looking for something s...,0.798889,4.692308,12.0,0.707427,4.214286,35.0,0.321758,0.22762
il-sorrentino-paris,2015-06-11,5,took my girl firend here for her birthday dinn...,view of the eiffel tower if you're eating out...,0.778124,4.75,17.0,0.68124,4.248705,49.0,0.329592,0.224531
su-misura-paris,2017-11-14,5,amazing italian food! i got the pasta with arr...,the restaurant and a lovely atmosphere close ...,0.82687,4.590909,21.0,0.724123,4.532995,63.0,0.306061,0.221625
le-casse-noix-paris-15,2018-04-04,4,bustling bistro fare near the eiffel tower. wa...,bustling bistro fare near the eiffel tower,0.765234,4.8,8.0,0.687529,4.284519,25.0,0.3072,0.211209
samaya-paris-3,2019-11-24,5,my husband and i came here both for dinner one...,definitely a good place to eat when you're ne...,0.859075,4.4,5.0,0.718684,4.307692,15.0,0.293333,0.210814
aux-cerises-paris-07,2019-07-04,5,best quiche of my life!! highly recommend this...,this is great for folks looking for a good qu...,0.846447,4.923077,12.0,0.747624,4.371324,44.0,0.268531,0.200761
restaurant-de-la-tour-paris,2013-08-16,4,always have fond memories of this place. close...,"close to eiffel tower, the food is simple a d...",0.806853,4.769231,12.0,0.711811,4.719626,41.0,0.279174,0.19872


In [27]:
higher_sim_r = sorted(result['metric'].unique())[-n_best - 1:]
higher_sim_r

[0.4614033102989197, 0.46380746364593506, 0.5576086044311523]

In [28]:
higher_sim_r = sorted(result['metric'])[-n_best - 1:]
higher_sim_r

[0.46380746364593506, 0.46380746364593506, 0.5576086044311523]

In [64]:
summary_reviews(result,2)

Unnamed: 0_level_0,reviews,nb_sentences,nb_review,metric sim_ratio,sentences_pond,metric_pond
alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
le-casse-noix-paris-15,{took a short hike from nearby pullman hotel t...,10,10.0,0.217307,0.666667,0.144871
l-encrier-paris,"{in a few words, this was great: * really nice...",5,5.0,0.175846,0.333333,0.058615


In [12]:
summary_reviews(result, 10)

Unnamed: 0_level_0,reviews,nb_sentences,nb_review,metric sim_ratio,sentences_pond,metric_pond
alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
el-fares-paris-8,"{very very low quality food, maybe the worst r...",1,1,0.557609,0.012821,0.007149
l-eglantine-paris,"{thanks to l'eglantine, i've gotten off to a g...",2,1,0.463807,0.025641,0.011892
les-zygomates-paris-3,{this quaint french restaurant was absolutely ...,3,1,0.461403,0.038462,0.017746
noynoy-paris,"{although when in france, you should eat as mu...",3,1,0.41353,0.038462,0.015905
les-trois-marmites-paris,{a lovely corner cafe just around the corner f...,9,2,0.387063,0.115385,0.044661
khun-akorn-paris-2,{love the place since years. fresh delicious f...,17,3,0.345993,0.217949,0.075409
zoe-bouillon-paris,{wonderful place to pop in for a cup of soup! ...,7,2,0.333246,0.089744,0.029907
sans-gêne-paris,{first night of our paris trip and the meal at...,3,1,0.327929,0.038462,0.012613
irène-et-bernard-paris-4,{nice neighborhood cafe! i was staying at the ...,21,4,0.319803,0.269231,0.086101
sukhothai-paris,{this is the best restaurant i had in paris! s...,12,3,0.31939,0.153846,0.049137


In [76]:
result = compute_sim_df("I want to eat close to eiffel tower",embedding,3000,10)
resul2 = summary_reviews(result,10)
resul2

Unnamed: 0_level_0,reviews,nb_sentences,rate_filtered,metric sim_ratio,rate_filtered,sentences_pond,metric_pond
alias,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
6-new-york-paris,{we stumbled upon this restaurant while explor...,5,5.0,0.312865,4.8,0.034722,0.010863
firmin-le-barbier-paris,{read the reviews about this place and had to ...,15,15.0,0.234892,4.666667,0.104167,0.024468
au-bon-accueil-paris-3,{we came here for my boyfriend's bday dinner. ...,28,24.0,0.232446,4.678571,0.194444,0.045198
chez-ming-paris,"{we've been traveling europe for a week, and t...",13,12.0,0.22762,4.692308,0.090278,0.020549
il-sorrentino-paris,{this was the best restaurant i went to while ...,20,17.0,0.224531,4.75,0.138889,0.031185
su-misura-paris,{the staff was very welcoming on christmas day...,22,21.0,0.221625,4.590909,0.152778,0.033859
le-casse-noix-paris-15,{if you are looking for a fantastic dining exp...,10,8.0,0.211209,4.8,0.069444,0.014667
samaya-paris-3,{my husband and i came here both for dinner on...,5,5.0,0.210814,4.4,0.034722,0.00732
aux-cerises-paris-07,"{last night, i attended a special, private din...",13,12.0,0.200761,4.923077,0.090278,0.018124
restaurant-de-la-tour-paris,{lovely cozy restaurant within a stone's throw...,13,12.0,0.19872,4.769231,0.090278,0.01794
