## In this notebook

* we build the skeleton code required for the flask API calls

NOTE: 
- ensure that we rank reviews using length and recency, along with the given attribute scores

references (if required)
- https://stackoverflow.com/questions/4928054/postgresql-wildcard-like-for-any-of-a-list-of-words

- https://www.postgresql.org/docs/current/functions-array.html
    - https://www.postgresql.org/docs/current/arrays.html
    - array_length ( anyarray, integer ) → integer

In [1]:
from sqlalchemy import create_engine
import psycopg2 
import io

import pandas as pd
import json
import numpy as np

In [2]:
pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 200
pd.options.display.max_rows = 1000

In [3]:
conn_string = 'postgresql+psycopg2://gabbydbuser:gabbyDBpass@localhost:5432/gabbyDB'
db = create_engine(conn_string)
conn = db.connect()

## Getting attributes/phrases

In [78]:
negative_attributes_query = \
    '''SELECT  P.key_phrase_id, P.phrase, S.n_positive, S.n_negative, S.reviewer_idf, S.n_reviews, S.n_reviewers
    FROM key_phrase_root P, 
    (SELECT * 
    FROM key_phrase_scores 
    WHERE  n_positive - n_negative < 0 
    ORDER BY n_negative DESC LIMIT 50) S
    WHERE P.key_phrase_id=S.key_phrase_id 
    ORDER BY n_reviewers DESC
    '''
negative_phrases = pd.read_sql(negative_attributes_query, conn)

In [79]:
negative_phrases

Unnamed: 0,key_phrase_id,phrase,n_positive,n_negative,reviewer_idf,n_reviews,n_reviewers
0,4327,the issue,148,167,5.119226,276,272
1,7184,no way,128,164,5.145298,272,265
2,3258,this issue,128,130,5.313382,232,224
3,2323,this problem,103,111,5.467533,199,192
4,8322,warranty,58,125,5.688075,162,154
5,1697,your money,41,133,5.701147,162,152
6,5208,the replacement,72,91,5.790554,144,139
7,18154,a dead pixel,67,71,5.921007,126,122
8,3569,a refund,25,109,5.937536,122,120
9,2841,junk,32,98,5.971438,120,116


In [80]:
positive_attributes_query = \
     '''SELECT  P.key_phrase_id, P.phrase, S.n_positive, S.n_negative, S.reviewer_idf, S.n_reviews, S.n_reviewers
    FROM key_phrase_root P, 
    (SELECT * 
    FROM key_phrase_scores 
    WHERE  n_positive - n_negative > 0 
    ORDER BY n_positive DESC LIMIT 50) S
    WHERE P.key_phrase_id=S.key_phrase_id 
    ORDER BY n_positive DESC
    '''
positive_phrases = pd.read_sql(positive_attributes_query, conn)

In [81]:
positive_phrases

Unnamed: 0,key_phrase_id,phrase,n_positive,n_negative,reviewer_idf,n_reviews,n_reviewers
0,3,this monitor,10892,2788,1.776702,8259,7695
1,29,the monitor,9363,3182,1.961287,6819,6398
2,26,the price,4541,644,2.3219,4611,4461
3,9,the screen,4272,1855,2.420286,4265,4043
4,181,2,2554,839,2.893808,2618,2518
5,108,samsung,2048,911,3.519393,1441,1347
6,2066,the stand,2033,476,3.221187,1896,1815
7,18621,4k,1892,439,3.716523,1154,1106
8,104,second,1857,557,3.145349,2023,1958
9,2007,the box,1695,453,3.239536,1866,1782


In [86]:
attributes = pd.concat([positive_phrases, negative_phrases]).reset_index(drop=True)

In [107]:



def filter_phrases_containing_brand_model_terms(df, brand_model_terms):
    pattern = '(' + '|'.join(brand_model_terms) + ')'
    return df[ ~df['phrase'].str.match(pattern, case=False)]

def is_alpha_numeric(df):
    """
    checking if every token in the phrase is not a string of punctuations
    """
    alnums =  df['phrase'].apply(lambda p: all([t.isalnum() for t in p.split()]))
    return df[alnums]

def drop_numeric_phrases(df):
    """
    remove phrases that are just numbers
    """
    return df[~df['phrase'].apply(lambda p: len(p.split()) == 1 and p.isnumeric())]

    

In [90]:
drop_numeric_phrases(is_alpha_numeric(attributes))

Unnamed: 0,key_phrase_id,phrase,n_positive,n_negative,reviewer_idf,n_reviews,n_reviewers
0,3,this monitor,10892,2788,1.776702,8259,7695
1,29,the monitor,9363,3182,1.961287,6819,6398
2,26,the price,4541,644,2.3219,4611,4461
3,9,the screen,4272,1855,2.420286,4265,4043
5,108,samsung,2048,911,3.519393,1441,1347
6,2066,the stand,2033,476,3.221187,1896,1815
7,18621,4k,1892,439,3.716523,1154,1106
8,104,second,1857,557,3.145349,2023,1958
9,2007,the box,1695,453,3.239536,1866,1782
10,597,a lot,1631,453,3.276694,1804,1717


In [93]:
monitor_brands_query = \
    '''SELECT DISTINCT(brand)
        FROM baseline_products 
        WHERE title ILIKE '%%inch%%' 
        AND title ILIKE '%%monitor%%' 
    '''
monitor_brands = pd.read_sql(monitor_brands_query, conn)

In [97]:
monitor_brands.head()

Unnamed: 0,brand
0,
1,101 Audio Video Inc.
2,1byone
3,AFUNTA
4,AMOCAM


In [108]:
attributes_filtered = \
        filter_phrases_containing_brand_model_terms(
                drop_numeric_phrases(
                        is_alpha_numeric(attributes)
                ), 
                monitor_brands[monitor_brands['brand'].str.len() > 1]['brand'].tolist()
        )

In [109]:
attributes.shape

(100, 7)

In [110]:
attributes_filtered.shape

(89, 7)

In [112]:
attributes_filtered

Unnamed: 0,key_phrase_id,phrase,n_positive,n_negative,reviewer_idf,n_reviews,n_reviewers
0,3,this monitor,10892,2788,1.776702,8259,7695
1,29,the monitor,9363,3182,1.961287,6819,6398
2,26,the price,4541,644,2.3219,4611,4461
3,9,the screen,4272,1855,2.420286,4265,4043
6,2066,the stand,2033,476,3.221187,1896,1815
7,18621,4k,1892,439,3.716523,1154,1106
8,104,second,1857,557,3.145349,2023,1958
9,2007,the box,1695,453,3.239536,1866,1782
10,597,a lot,1631,453,3.276694,1804,1717
12,2383,the colors,1525,369,3.371946,1646,1561


In [116]:
attributes_filtered[['key_phrase_id', 'phrase']].to_json(orient='records')

'[{"key_phrase_id":3,"phrase":"this monitor"},{"key_phrase_id":29,"phrase":"the monitor"},{"key_phrase_id":26,"phrase":"the price"},{"key_phrase_id":9,"phrase":"the screen"},{"key_phrase_id":2066,"phrase":"the stand"},{"key_phrase_id":18621,"phrase":"4k"},{"key_phrase_id":104,"phrase":"second"},{"key_phrase_id":2007,"phrase":"the box"},{"key_phrase_id":597,"phrase":"a lot"},{"key_phrase_id":2383,"phrase":"the colors"},{"key_phrase_id":41,"phrase":"amazon"},{"key_phrase_id":57,"phrase":"a monitor"},{"key_phrase_id":48,"phrase":"the picture"},{"key_phrase_id":2351,"phrase":"hdmi"},{"key_phrase_id":2132,"phrase":"the display"},{"key_phrase_id":6286,"phrase":"windows"},{"key_phrase_id":2444,"phrase":"great monitor"},{"key_phrase_id":109,"phrase":"gaming"},{"key_phrase_id":880,"phrase":"speakers"},{"key_phrase_id":2414,"phrase":"colors"},{"key_phrase_id":2122,"phrase":"games"},{"key_phrase_id":3660,"phrase":"work"},{"key_phrase_id":21,"phrase":"the speakers"},{"key_phrase_id":294,"phrase":"

# Getting reviews given attributes

In [118]:
attributes_filtered.sample(5)

Unnamed: 0,key_phrase_id,phrase,n_positive,n_negative,reviewer_idf,n_reviews,n_reviewers
87,7338,2 weeks,39,51,6.853827,49,48
79,7884,doa,36,48,6.630683,60,60
36,250,movies,904,166,3.911583,946,910
95,27875,meh,17,34,7.259292,32,32
45,494,quality,767,174,3.988061,868,843


In [None]:
select * from table where value ~* 'the price|the stand|2 weeks|quality'

In [152]:
phrase_ids_query = \
    '''SELECT key_phrase_id, phrase 
        FROM key_phrase_root 
        WHERE phrase IN ('the price', 'the stand', '2 weeks', 'quality')
    '''
query_phrases = pd.read_sql(phrase_ids_query, conn)

In [153]:
query_phrases

Unnamed: 0,key_phrase_id,phrase
0,26,the price
1,494,quality
2,2066,the stand
3,7338,2 weeks


In [154]:
review_results_query = \
    '''SELECT key_phrase_id, review_id 
        FROM key_phrase_reviews 
        WHERE key_phrase_id IN 
        (SELECT key_phrase_id 
        FROM key_phrase_root 
        WHERE phrase IN ('the price', 'the stand', '2 weeks', 'quality'))
    '''
review_ids_for_query = pd.read_sql(review_results_query, conn)

In [155]:
review_ids_for_query

Unnamed: 0,key_phrase_id,review_id
0,26,139343
1,26,360959
2,26,721407
3,26,721408
4,26,360976
...,...,...
7419,7338,970614
7420,7338,628984
7421,7338,178426
7422,7338,1379579


In [158]:
review_ids_for_query = review_ids_for_query.merge(query_phrases, on='key_phrase_id', how='left')

In [159]:
review_ids_for_query = review_ids_for_query.groupby('review_id')['phrase'].apply(list).reset_index()
review_ids_for_query['n_matches'] = review_ids_for_query['phrase'].apply(len)

In [160]:
top_matched_reviews = review_ids_for_query.sort_values('n_matches', ascending=False).head(25)
top_matched_reviews

Unnamed: 0,review_id,phrase,n_matches
2317,473934,"[the price, quality, the stand]",3
4682,878360,"[the price, quality, the stand]",3
1104,246770,"[the price, quality, the stand]",3
5632,1118343,"[the price, quality, the stand]",3
1561,346497,"[the price, quality, the stand]",3
5244,983402,"[the price, quality, the stand]",3
949,227216,"[the price, quality, the stand]",3
4508,847415,"[the price, quality, the stand]",3
637,173319,"[the price, quality, the stand]",3
2851,572342,"[the price, quality, the stand]",3


In [161]:
fetch_matched_reviews_query = \
    f'''SELECT *
        FROM baseline_reviews
        WHERE review_id IN (
            {','.join(top_matched_reviews['review_id'].astype(str).tolist())}
        )
        
    '''
matched_reviews = pd.read_sql(fetch_matched_reviews_query, conn)

In [162]:
matched_reviews

Unnamed: 0,review_id,rating,sentiment,vote,verified,reviewerID,asin,reviewText,reviewTitle,reviewTime
0,173319,3.0,negative,5.0,True,A1WT047CVF256C,B001LYPNFQ,"Overall a nice monitor. Yes, it has tint problems out of the box, but you should calibrate every monitor you get anyway, regardless of quality. It can make a huge difference, even if you're not ed...",I would give 3.5 stars if it were possible,2010-11-02
1,112406,5.0,positive,0.0,True,A28GZ52326W7ET,B00139S3U6,"The latest generation of Hewlett Packard widescreen computer monitors sets a new standard for quality and value, and this model is probably just about right for your average desktop user. At 22inc...",HP Monitor Beats The Competition,2008-12-20
2,352163,5.0,positive,0.0,True,A1LDB0HGS322GA,B005HPSFWI,I wanted to declutter my desk so I bought this stand to hold my 2 24 inch monitors. The price of the stand was good and it looks nice on my desk. The monitors are very stable. When mounting the mo...,"Good value, looks nice on the desk",2013-10-01
3,227216,5.0,positive,0.0,True,A3NM39O6R8H3A5,B0039648BO,"I'm going to start this review of with what I feel are this monitors shortcomings, because there really aren't that many of them.\nAlso keep in mind that, my opinion of a pro or con, feature wise,...",Best purchase I have made this year,2012-11-28
4,209586,5.0,positive,12.0,False,A25A7C826KIX2R,B002MT6SDU,"I bought this to replace an older 4:3 Princeton LCD. Worked fine right out of the box with my Lenovo T61 laptop and its built in NVidia graphics card. My previous experience with 15-17"" widescre...",Excellent Monitor,2009-12-29
5,222049,5.0,positive,0.0,True,A3376ZXV304DQV,B002ZVCGXQ,"This has to be the best monitor you can buy for the price.\n\nPros:\nPrice - for under two hundred, a bargain.\nQuality - the best monitor I've ever owned. No dead pixels, scratches, or anything. ...",Great Product,2010-07-23
6,246770,5.0,positive,0.0,True,A37863TC1Q7059,B003Y3BJ7S,"Finally deciding to retire a 21"" CRT Viewsonic mainly because it was making my computer room very hot in the summer. I stayed with Viewsonic because of the positive experience I had for the past ...","Plenty of screen for photo, video and gaming",2013-01-31
7,346497,4.0,positive,94.0,False,A2AY4YUOX2N1BQ,B005FNH9RE,"*REVIEW UPDATED!*\n\nI recently built myself a desktop, so of course it was time for a new monitor. I knew I wanted LED for the power savings alone (and the arguably better blacks/contrast) and LG...",For the price - excellent!,2012-03-07
8,473333,4.0,positive,0.0,True,A2GS7CV57Y7UT2,B0098Y77U0,"<div id=""video-block-R9POUR97PUQWU"" class=""a-section a-spacing-small a-spacing-top-mini video-block""></div><input type=""hidden"" name="""" value=""https://images-na.ssl-images-amazon.com/images/I/D1yx...","Tilt difficult, NOT impossible",2016-10-05
9,473934,5.0,positive,0.0,True,A11Z60P0Z6CHFO,B0098Y77U0,I bought this monitor in early 2015 and I'm actually back to buy another for a dual monitor setup which brings me to this review I forgot to leave earlier. This monitor is great for my general pur...,A year later and still running like new!,2016-03-13


In [164]:
matched_reviews = matched_reviews.merge(top_matched_reviews, on='review_id')

In [165]:
matched_reviews

Unnamed: 0,review_id,rating,sentiment,vote,verified,reviewerID,asin,reviewText,reviewTitle,reviewTime,phrase,n_matches
0,173319,3.0,negative,5.0,True,A1WT047CVF256C,B001LYPNFQ,"Overall a nice monitor. Yes, it has tint problems out of the box, but you should calibrate every monitor you get anyway, regardless of quality. It can make a huge difference, even if you're not ed...",I would give 3.5 stars if it were possible,2010-11-02,"[the price, quality, the stand]",3
1,112406,5.0,positive,0.0,True,A28GZ52326W7ET,B00139S3U6,"The latest generation of Hewlett Packard widescreen computer monitors sets a new standard for quality and value, and this model is probably just about right for your average desktop user. At 22inc...",HP Monitor Beats The Competition,2008-12-20,"[the price, quality]",2
2,352163,5.0,positive,0.0,True,A1LDB0HGS322GA,B005HPSFWI,I wanted to declutter my desk so I bought this stand to hold my 2 24 inch monitors. The price of the stand was good and it looks nice on my desk. The monitors are very stable. When mounting the mo...,"Good value, looks nice on the desk",2013-10-01,"[the price, the stand]",2
3,227216,5.0,positive,0.0,True,A3NM39O6R8H3A5,B0039648BO,"I'm going to start this review of with what I feel are this monitors shortcomings, because there really aren't that many of them.\nAlso keep in mind that, my opinion of a pro or con, feature wise,...",Best purchase I have made this year,2012-11-28,"[the price, quality, the stand]",3
4,209586,5.0,positive,12.0,False,A25A7C826KIX2R,B002MT6SDU,"I bought this to replace an older 4:3 Princeton LCD. Worked fine right out of the box with my Lenovo T61 laptop and its built in NVidia graphics card. My previous experience with 15-17"" widescre...",Excellent Monitor,2009-12-29,"[the price, quality, the stand]",3
5,222049,5.0,positive,0.0,True,A3376ZXV304DQV,B002ZVCGXQ,"This has to be the best monitor you can buy for the price.\n\nPros:\nPrice - for under two hundred, a bargain.\nQuality - the best monitor I've ever owned. No dead pixels, scratches, or anything. ...",Great Product,2010-07-23,"[the price, quality, the stand]",3
6,246770,5.0,positive,0.0,True,A37863TC1Q7059,B003Y3BJ7S,"Finally deciding to retire a 21"" CRT Viewsonic mainly because it was making my computer room very hot in the summer. I stayed with Viewsonic because of the positive experience I had for the past ...","Plenty of screen for photo, video and gaming",2013-01-31,"[the price, quality, the stand]",3
7,346497,4.0,positive,94.0,False,A2AY4YUOX2N1BQ,B005FNH9RE,"*REVIEW UPDATED!*\n\nI recently built myself a desktop, so of course it was time for a new monitor. I knew I wanted LED for the power savings alone (and the arguably better blacks/contrast) and LG...",For the price - excellent!,2012-03-07,"[the price, quality, the stand]",3
8,473333,4.0,positive,0.0,True,A2GS7CV57Y7UT2,B0098Y77U0,"<div id=""video-block-R9POUR97PUQWU"" class=""a-section a-spacing-small a-spacing-top-mini video-block""></div><input type=""hidden"" name="""" value=""https://images-na.ssl-images-amazon.com/images/I/D1yx...","Tilt difficult, NOT impossible",2016-10-05,"[the price, the stand]",2
9,473934,5.0,positive,0.0,True,A11Z60P0Z6CHFO,B0098Y77U0,I bought this monitor in early 2015 and I'm actually back to buy another for a dual monitor setup which brings me to this review I forgot to leave earlier. This monitor is great for my general pur...,A year later and still running like new!,2016-03-13,"[the price, quality, the stand]",3


# Getting product given attributes (and reviews)

In [167]:
review_results_query = \
    '''SELECT key_phrase_id, review_id 
        FROM key_phrase_reviews 
        WHERE key_phrase_id IN 
        (SELECT key_phrase_id 
        FROM key_phrase_root 
        WHERE phrase IN ('the price', 'the stand', '2 weeks', 'quality'))
    '''
review_ids_for_query = pd.read_sql(review_results_query, conn)

In [168]:
review_ids_for_query = review_ids_for_query.merge(query_phrases, on='key_phrase_id', how='left')
review_ids_for_query = review_ids_for_query.groupby('review_id')['phrase'].apply(list).reset_index()
review_ids_for_query['n_matches'] = review_ids_for_query['phrase'].apply(len)

In [171]:
review_ids_for_query.shape

(6870, 3)

In [172]:
fetch_matched_reviews_query = \
    f'''SELECT *
        FROM baseline_reviews
        WHERE review_id IN (
            {','.join(review_ids_for_query['review_id'].astype(str).tolist())}
        )
        
    '''
matched_reviews = pd.read_sql(fetch_matched_reviews_query, conn)

In [175]:
matched_reviews = matched_reviews.merge(review_ids_for_query, on='review_id')

In [176]:
matched_reviews.shape

(6870, 12)

In [177]:
matched_reviews.head()

Unnamed: 0,review_id,rating,sentiment,vote,verified,reviewerID,asin,reviewText,reviewTitle,reviewTime,phrase,n_matches
0,42179,5.0,positive,25.0,True,A24DGMBY82E69H,B0002X8TVW,"I thought this monitor would be too good to be true for the price. It is fast, clear and I have not touched a control since getting it out of the box. It has been a joy to play video games on. Its...",Great Value,2005-03-02,[the price],1
1,54659,4.0,positive,0.0,True,A1A27N3E2A4PVT,B000A5S926,Awesome sound for the price.,Awesome sound for the price.,2017-07-26,[the price],1
2,34776,5.0,positive,72.0,False,A1QJ8RKPWKXI7O,B00025EKO0,"I'll echo the other comment. Educate yourself about the benefits and drawbacks of an LCD before opening trap. LCDs have one major limitation, and that is the image quality at anything other than n...",Make sure of your knowledge and needs before buying,2004-09-02,[the price],1
3,40298,5.0,positive,0.0,True,AERARUWRCEA3C,B0002M7FCC,Awesome sub for the price. Now my movies and music have a nice hint of bass. Would highly recommend this,Awesome sub for the price,2015-10-18,[the price],1
4,40306,5.0,positive,0.0,True,A3IVIGQK3E3ZNT,B0002M7FCC,"works very nicely; easy to set up. Provides excellent bass for the price. It is a big larger than I expected though, be sure to review the dimensions.",works very nicely; easy to set up,2015-06-17,[the price],1


In [187]:
from collections import Counter

In [241]:
product_ranking = matched_reviews.groupby(['asin']).agg({
    'n_matches': 'sum',
    'rating': 'mean',
    'verified': 'sum',
    'vote': 'sum',
    'review_id': 'count',
    'phrase': 'sum'
}).sort_values(['review_id', 'rating', 'n_matches', 'verified',  'vote'], ascending=False).reset_index()

In [242]:
product_ranking.head()

Unnamed: 0,asin,n_matches,rating,verified,vote,review_id,phrase
0,B0098Y77U0,339,4.273616,288,322.0,307,"[the price, the price, the stand, the price, the price, the price, the price, the price, quality, the stand, the price, the price, the price, the price, the stand, the price, quality, the price, q..."
1,B015WCV70W,226,4.539906,199,675.0,213,"[the price, the price, the price, the price, the stand, the price, the stand, the price, the price, the price, the price, the price, the price, the price, the price, the price, the price, the pric..."
2,B003Y3BJ7S,159,4.544218,139,1188.0,147,"[the stand, the price, the price, the price, quality, the price, the price, the price, the price, the price, quality, the price, the price, the price, the price, the price, the price, the price, q..."
3,B00EZSUWFG,131,4.336066,114,463.0,122,"[the price, the price, the price, the stand, quality, the price, the price, the price, the price, the price, quality, the stand, the price, the price, the price, the price, the price, quality, the..."
4,B00C8T5KOW,123,4.330435,106,143.0,115,"[the price, quality, the price, the price, the price, the price, the price, the price, the price, quality, the price, the stand, the price, the stand, the price, the price, the price, the price, t..."


In [243]:
product_ranking['phrase'] = product_ranking['phrase'].apply(lambda x: Counter(x))

In [244]:
product_ranking = product_ranking.rename(columns={'review_id': 'n_reviews'})
product_ranking.head(10)

Unnamed: 0,asin,n_matches,rating,verified,vote,n_reviews,phrase
0,B0098Y77U0,339,4.273616,288,322.0,307,"{'the price': 232, 'the stand': 82, 'quality': 25}"
1,B015WCV70W,226,4.539906,199,675.0,213,"{'the price': 162, 'the stand': 46, 'quality': 16, '2 weeks': 2}"
2,B003Y3BJ7S,159,4.544218,139,1188.0,147,"{'the stand': 21, 'the price': 120, 'quality': 18}"
3,B00EZSUWFG,131,4.336066,114,463.0,122,"{'the price': 93, 'the stand': 25, 'quality': 13}"
4,B00C8T5KOW,123,4.330435,106,143.0,115,"{'the price': 95, 'quality': 12, 'the stand': 16}"
5,B00081NX5U,108,4.509434,100,86.0,106,"{'the price': 99, 'quality': 9}"
6,B000A5S926,107,4.40566,100,85.0,106,"{'the price': 94, 'quality': 12, '2 weeks': 1}"
7,B00CLZ047Q,118,4.298077,90,575.0,104,"{'quality': 12, 'the price': 71, 'the stand': 34, '2 weeks': 1}"
8,B00D601UC8,107,4.306931,94,584.0,101,"{'the price': 73, 'the stand': 24, 'quality': 10}"
9,B00IKDFL4O,101,4.59375,86,395.0,96,"{'the price': 76, 'the stand': 19, 'quality': 6}"


In [245]:
product_ranking.head(10).to_json(orient='records')

'[{"asin":"B0098Y77U0","n_matches":339,"rating":4.2736156352,"verified":288,"vote":322.0,"n_reviews":307,"phrase":{"the price":232,"the stand":82,"quality":25}},{"asin":"B015WCV70W","n_matches":226,"rating":4.5399061033,"verified":199,"vote":675.0,"n_reviews":213,"phrase":{"the price":162,"the stand":46,"quality":16,"2 weeks":2}},{"asin":"B003Y3BJ7S","n_matches":159,"rating":4.5442176871,"verified":139,"vote":1188.0,"n_reviews":147,"phrase":{"the stand":21,"the price":120,"quality":18}},{"asin":"B00EZSUWFG","n_matches":131,"rating":4.3360655738,"verified":114,"vote":463.0,"n_reviews":122,"phrase":{"the price":93,"the stand":25,"quality":13}},{"asin":"B00C8T5KOW","n_matches":123,"rating":4.3304347826,"verified":106,"vote":143.0,"n_reviews":115,"phrase":{"the price":95,"quality":12,"the stand":16}},{"asin":"B00081NX5U","n_matches":108,"rating":4.5094339623,"verified":100,"vote":86.0,"n_reviews":106,"phrase":{"the price":99,"quality":9}},{"asin":"B000A5S926","n_matches":107,"rating":4.405

In [261]:
top10_products = product_ranking.head(10)

In [262]:
f'''
            '{"','".join(top10_products['asin'].astype(str).tolist())}'
    '''

"\n            'B0098Y77U0','B015WCV70W','B003Y3BJ7S','B00EZSUWFG','B00C8T5KOW','B00081NX5U','B000A5S926','B00CLZ047Q','B00D601UC8','B00IKDFL4O'\n    "

In [263]:
fetch_matched_products_query = \
    f'''SELECT *
        FROM baseline_products
        WHERE asin IN ('{"','".join(top10_products['asin'].astype(str).tolist())}')
    '''
matched_products = pd.read_sql(fetch_matched_products_query, conn)

In [264]:
matched_products.shape

(10, 13)

In [265]:
top10_products = top10_products.merge(matched_products[['asin', 'title', 'description', 'imageURLHighRes']], on='asin')

In [266]:
top10_products

Unnamed: 0,asin,n_matches,rating,verified,vote,n_reviews,phrase,title,description,imageURLHighRes
0,B0098Y77U0,339,4.273616,288,322.0,307,"{'the price': 232, 'the stand': 82, 'quality': 25}",Acer G236HL Bbd 23-Inch Screen LED-Lit Monitor,"Acer G-Series monitors sport a super-slim profile with an X-shaped stand that lets them fit nicely in spaces of any size. Oustanding performance, eco-friendliness and smart use of energy complemen...","https://images-na.ssl-images-amazon.com/images/I/31Nq2Q4eMSL.jpg, https://images-na.ssl-images-amazon.com/images/I/31Oq-Fq7z2L.jpg, https://images-na.ssl-images-amazon.com/images/I/31%2B5p99yQTL.j..."
1,B015WCV70W,226,4.539906,199,675.0,213,"{'the price': 162, 'the stand': 46, 'quality': 16, '2 weeks': 2}",HP Pavilion 21.5-Inch IPS LED HDMI VGA Monitor (22cwa),"It's time for a monitor that's worthy of your family's content. When you're looking for the best specs at the right price, the 21.5"" diagonal full HD display delivers with trusted HP performance a...","https://images-na.ssl-images-amazon.com/images/I/51ede3cxIDL.jpg, https://images-na.ssl-images-amazon.com/images/I/41F%2Bgneo-HL.jpg, https://images-na.ssl-images-amazon.com/images/I/511uOc7mRfL.j..."
2,B003Y3BJ7S,159,4.544218,139,1188.0,147,"{'the stand': 21, 'the price': 120, 'quality': 18}",ViewSonic VX2250WM-LED 22-Inch (21.5-Inch Vis) Widescreen Full HD 1080p LED Monitor with Integrated Stereo Speakers,ViewSonic&#8217;s VX2250wm-LED features an LED backlight 22-Inch (21.5-Inch Vis) widescreen monitor with up to 40% energy saving compared to a regular 22-Inch monitor. mercury free VX2250wm-LED of...,"https://images-na.ssl-images-amazon.com/images/I/51MGz38raAL.jpg, https://images-na.ssl-images-amazon.com/images/I/51jjG6kBQcL.jpg, https://images-na.ssl-images-amazon.com/images/I/31uiD74sUvL.jpg..."
3,B00EZSUWFG,131,4.336066,114,463.0,122,"{'the price': 93, 'the stand': 25, 'quality': 13}",ViewSonic VX2252MH 22 Inch 2ms 75Hz 1080p Gaming Monitor with HDMI DVI and VGA Inputs,"ViewSonic's VX2252mh is a 22"" (21.5"" viewable) 60Hz Full HD, glossy-finish display that offers the ultimate visual experience for gaming and multimedia entertainment. The VX2252mh features ClearMo...","https://images-na.ssl-images-amazon.com/images/I/512yOFwUspL.jpg, https://images-na.ssl-images-amazon.com/images/I/41cnUNS3tqL.jpg, https://images-na.ssl-images-amazon.com/images/I/41laWQIDoQL.jpg..."
4,B00C8T5KOW,123,4.330435,106,143.0,115,"{'the price': 95, 'quality': 12, 'the stand': 16}","AOC e2050Swd 20-Inch Class Screen LED-Lit Computer Monitor, 1600 x 900 Resolution, 5ms, 20M:1DCR, VGA/DVI, VESA","AOC e2050Swd 19. 5"" Hd 1600x900 monitor, 5ms, DVI-D/VGA, VESA compatible, low power mode, EPEAT silver, energy start or","https://images-na.ssl-images-amazon.com/images/I/213uchzTA0L.jpg, https://images-na.ssl-images-amazon.com/images/I/41EKKgnJYrL.jpg, https://images-na.ssl-images-amazon.com/images/I/511LSTbBIwL.jpg..."
5,B00081NX5U,108,4.509434,100,86.0,106,"{'the price': 99, 'quality': 9}","Dual Electronics LU43PB 4 inch 3-Way High Performance Indoor, Outdoor &amp; Bookshelf Studio Monitor Speakers with Swivel Brackets &amp; 100 Watts Peak Power","The Dual Electronics LU43PB 4 inch 3-Way High Performance Indoor, Outdoor &amp; Bookshelf Studio Monitor Speakers with Swivel Brackets &amp; 100 Watts Peak Power will make a great addition to any ...","https://images-na.ssl-images-amazon.com/images/I/51GQZT32D9L.jpg, https://images-na.ssl-images-amazon.com/images/I/51iHCQI7OVL.jpg, https://images-na.ssl-images-amazon.com/images/I/51jEBRfGxRL.jpg..."
6,B000A5S926,107,4.40566,100,85.0,106,"{'the price': 94, 'quality': 12, '2 weeks': 1}","Dual Electronics LU43PW 4 inch 3-Way High Performance Indoor, Outdoor &amp; Bookshelf Studio Monitor Speakers with Swivel Brackets &amp; 100 Watts Peak Power (Sold in Pairs)","The Dual Electronics LU43PW 4 inch 3-Way High Performance Indoor, Outdoor & Bookshelf Studio Monitor Speakers with Swivel Brackets & 100 Watts Peak Power will make a great addition to any home. Yo...","https://images-na.ssl-images-amazon.com/images/I/51DaTB-QoUL.jpg, https://images-na.ssl-images-amazon.com/images/I/51EN9ki4oGL.jpg, https://images-na.ssl-images-amazon.com/images/I/518JmpJWBoL.jpg..."
7,B00CLZ047Q,118,4.298077,90,575.0,104,"{'quality': 12, 'the price': 71, 'the stand': 34, '2 weeks': 1}","AOC I2267FW 22-Inch Class IPS Frameless/Slim LED Monitor, Full HD,250 cd/m2 Brightness,5ms,50M:1 DCR,VGA/DVI",Display your work and entertainment with a nearly borderless appearance thanks to the ultra-narrow bezel. Enjoy brilliant color reproduction and impressive 178-degree viewing angles with IPS techn...,"https://images-na.ssl-images-amazon.com/images/I/31UdbNgrvIL.jpg, https://images-na.ssl-images-amazon.com/images/I/31BjjEDvgvL.jpg, https://images-na.ssl-images-amazon.com/images/I/41eVcOM-tDL.jpg..."
8,B00D601UC8,107,4.306931,94,584.0,101,"{'the price': 73, 'the stand': 24, 'quality': 10}",ViewSonic VA2446M-LED 24 Inch Full HD 1080p LED Monitor with DVI and VGA Inputs,"The ViewSonic VA2446m-LED is a 24"" (23.6 Vis) widescreen monitor with an LED backlight and glossy, thin-bezel design ideal for use in the home or at the office. With Full HD 1920x1080 resolution a...","https://images-na.ssl-images-amazon.com/images/I/518mJ7vMY%2BL.jpg, https://images-na.ssl-images-amazon.com/images/I/41LJZ%2BBTBcL.jpg, https://images-na.ssl-images-amazon.com/images/I/419qPc643zL..."
9,B00IKDFL4O,101,4.59375,86,395.0,96,"{'the price': 76, 'the stand': 19, 'quality': 6}","BenQ GL2460HM 24 Inch 1080p LED Gaming Monitor, 2ms, HDMI, DVI, Built-In Speakers, Eye Care Technology, Low Blue Light, ZeroFlicker, Energy Star Certified Monitor, VESA mountable",BENQ GL2460HM 24 Gaming Monitor,"https://images-na.ssl-images-amazon.com/images/I/41fUr0x8tCL.jpg, https://images-na.ssl-images-amazon.com/images/I/51bqA7ibEYL.jpg"


In [268]:
top10_products.to_json(orient='records').replace('\\/', '/')

'[{"asin":"B0098Y77U0","n_matches":339,"rating":4.2736156352,"verified":288,"vote":322.0,"n_reviews":307,"phrase":{"the price":232,"the stand":82,"quality":25},"title":"Acer G236HL Bbd 23-Inch Screen LED-Lit Monitor","description":"Acer G-Series monitors sport a super-slim profile with an X-shaped stand that lets them fit nicely in spaces of any size. Oustanding performance, eco-friendliness and smart use of energy complement the sleek design - all while staying well within budget. The Acer G236HL Bbd showcases a 23\\" Widescreen LCD with impressive 1920 x 1080 resolution. Enjoy widely available 16:9 HD digital content without image distortion on the expansive widescreen LCD. The remarkable 100 Million:1 contrast ratio ensures a crystal-clear viewing experience while rapid 5ms response time keeps moving images sharp. It also supports both VGA and DVI-D inputs offering greater compatibility. The Acer G-Series LCD brings an amazing multimedia experience to your home with a widescreen cin

# Scratch

In [130]:
review_ids_for_query['review_id'].value_counts().head(20)

776932     3
227216     3
209586     3
222049     3
246770     3
1118343    3
1248438    3
847415     3
983402     3
572342     3
346497     3
878360     3
473934     3
594937     3
173319     3
736067     3
713825     3
898932     2
117150     2
488534     2
Name: review_id, dtype: int64