# In this notebook

- We build API functions that return the newer shortlisted attributes
- We refer to notebooks `13`, `14`, `16`, and `17` as needed.


In [140]:
import pandas as pd
import json

pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 200
pd.options.display.max_rows = 1000

In [141]:
from sqlalchemy import create_engine
import psycopg2 
import io

In [142]:
import os
import glob

In [143]:
import pickle

In [144]:
import numpy as np

### set up database connection

In [145]:
conn_string = 'postgresql+psycopg2://gabbydbuser:gabbyDBpass@localhost:5432/gabbyDB'

In [146]:
db = create_engine(conn_string)
conn = db.connect()

In [147]:
CATEGORY = 'headphone'

# get_attributes_list() API

In [148]:
def get_attributes_list(category, n_qphrase_attrs=10):
    shortlisted_attributes_query = \
        f'''
        SELECT *
        FROM shortlisted_attributes
        WHERE category='{category}'
        '''
    shortlisted_attributes = pd.read_sql(shortlisted_attributes_query, conn)
    sim_attrs_list = \
        shortlisted_attributes. \
            sort_values('neighbor_distances').sort_values('n_reviews', ascending=False). \
                groupby('qphrase'). \
                    head(n_qphrase_attrs). \
                        reset_index(drop=True)[['key_phrase_id', 'phrase', 'qphrase', 'n_reviewers']].sort_values('qphrase')
    sim_attrs_list_deduped =  sim_attrs_list[['key_phrase_id', 'phrase', 'n_reviewers']].drop_duplicates()
    return sim_attrs_list_deduped.sample(min(50, sim_attrs_list_deduped.shape[0]))
    

In [150]:
get_attributes_list(CATEGORY, 10)['n_reviewers'].sum()

617

# get_products_for_attributes()

In [89]:
def _gen_attribute_sentiment_query_v2(category, attribute_list, sentiment):
    return \
        f""" SELECT BR.asin, PHR.key_phrase_id, PHR.phrase, BR.sentiment, count(*) as count
            FROM (SELECT key_phrase_id, phrase
                FROM key_phrase_root 
                WHERE category='{category}' 
                    AND phrase IN ('{"', '".join(attribute_list)}') 
                ) as PHR
            LEFT JOIN key_phrase_reviews KPR
                ON KPR.key_phrase_id=PHR.key_phrase_id
            LEFT JOIN baseline_reviews BR
                ON BR.review_id=KPR.review_id
            WHERE BR.sentiment='{sentiment}'
            GROUP BY BR.asin, PHR.key_phrase_id, PHR.phrase, BR.sentiment
        """


In [90]:
attribute_list = ['comfortable design', 'good battery life', 'light weight']
attributes_counts_positive_sql_query = \
    _gen_attribute_sentiment_query_v2(CATEGORY, attribute_list, 'positive')
attributes_counts_negative_sql_query = \
    _gen_attribute_sentiment_query_v2(CATEGORY, attribute_list, 'negative')
    
    

In [91]:
positive_attributes_counts = pd.read_sql(attributes_counts_positive_sql_query, conn)
negative_attributes_counts = pd.read_sql(attributes_counts_negative_sql_query, conn)

In [92]:
attributes_counts = pd.concat([positive_attributes_counts, negative_attributes_counts])

In [93]:
attributes_counts.groupby(['asin', 'phrase'])['count'].sum().reset_index()

Unnamed: 0,asin,phrase,count
0,B00009MK3T,light weight,1
1,B000MMWT9Q,light weight,2
2,B005KJM30G,light weight,1
3,B00IUICOR6,light weight,2
4,B00M1NEUA0,light weight,2
5,B00M58CMTM,good battery life,1
6,B00M58CMTM,light weight,2
7,B00MBWIL0G,good battery life,1
8,B00MWDGW28,comfortable design,1
9,B00NCSIN4W,good battery life,2


In [94]:
product_attribute_counts = pd.pivot_table( 
                            attributes_counts.groupby(['asin', 'phrase'])['count'].sum().reset_index(),
                            values='count',
                            index='asin',
                            columns='phrase',
                            aggfunc=sum
                        ).fillna(0).reset_index()

In [95]:
product_attribute_counts
product_attribute_counts = product_attribute_counts.rename(columns={c:f"{'_'.join(c.split())}_num_reviews" for c in product_attribute_counts.columns[1:]})

In [96]:
product_attribute_counts

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews
0,B00009MK3T,0.0,0.0,1.0
1,B000MMWT9Q,0.0,0.0,2.0
2,B005KJM30G,0.0,0.0,1.0
3,B00IUICOR6,0.0,0.0,2.0
4,B00M1NEUA0,0.0,0.0,2.0
5,B00M58CMTM,0.0,1.0,2.0
6,B00MBWIL0G,0.0,1.0,0.0
7,B00MWDGW28,1.0,0.0,0.0
8,B00NCSIN4W,0.0,2.0,0.0
9,B00P89AVRU,0.0,2.0,0.0


In [97]:
product_attribute_counts['total_reviews_in_context'] = product_attribute_counts[product_attribute_counts.columns[1:]].sum(axis=1)
for phrase in product_attribute_counts.columns[1:-1]:
        product_attribute_counts[f"{phrase}_pbry"] = product_attribute_counts[phrase]/product_attribute_counts['total_reviews_in_context']

In [98]:
product_attribute_counts.sort_values('total_reviews_in_context', ascending=False).head()

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0


In [99]:
pos_counts = positive_attributes_counts[['asin', 'phrase', 'count']]. \
            pivot_table(values='count', index='asin', columns='phrase', aggfunc=sum). \
                fillna(0). \
                    reset_index()
pos_counts = pos_counts.rename(columns={c:f"{'_'.join(c.split())}_pos" for c in pos_counts.columns[1:]})

neg_counts = negative_attributes_counts[['asin', 'phrase', 'count']]. \
        pivot_table(values='count', index='asin', columns='phrase', aggfunc=sum). \
            fillna(0). \
                reset_index()
neg_counts = neg_counts.rename(columns={c:f"{'_'.join(c.split())}_neg" for c in neg_counts.columns[1:]})

prod_attr_prby = product_attribute_counts.merge(pos_counts, on='asin', how='left').merge(neg_counts, on='asin', how='left').fillna(0)

In [100]:
prod_attr_prby.sort_values('total_reviews_in_context', ascending=False).head()

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0


In [101]:
for phrase in [c for c in prod_attr_prby.columns if c.endswith('pos') or c.endswith('neg')]:
    prod_attr_prby[f"{phrase}_pbry"] = prod_attr_prby[phrase]/prod_attr_prby['total_reviews_in_context']

In [102]:
prod_attr_prby.sort_values('total_reviews_in_context', ascending=False).head()

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg,comfortable_design_pos_pbry,good_battery_life_pos_pbry,light_weight_pos_pbry,comfortable_design_neg_pbry,good_battery_life_neg_pbry
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0,0.0,0.5,0.0,0.125,0.375
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [103]:
prod_attr_prby['total_perc_rank'] = prod_attr_prby['total_reviews_in_context'].rank(pct=True)


In [104]:
prod_attr_prby.sort_values('total_reviews_in_context', ascending=False).head()

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg,comfortable_design_pos_pbry,good_battery_life_pos_pbry,light_weight_pos_pbry,comfortable_design_neg_pbry,good_battery_life_neg_pbry,total_perc_rank
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0,0.0,0.5,0.0,0.125,0.375,1.0
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.9
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333


In [105]:
for phrase in positive_attributes_counts['phrase'].unique():
    us_phrase = '_'.join(phrase.split()) 
    prod_attr_prby[f"{us_phrase}_score_level"] = prod_attr_prby[f"{us_phrase}_pos_pbry"]/prod_attr_prby[f"{us_phrase}_num_reviews_pbry"]
    prod_attr_prby = prod_attr_prby.fillna(0.5)
    


In [106]:
prod_attr_prby.sort_values('total_reviews_in_context', ascending=False).head()

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg,comfortable_design_pos_pbry,good_battery_life_pos_pbry,light_weight_pos_pbry,comfortable_design_neg_pbry,good_battery_life_neg_pbry,total_perc_rank,light_weight_score_level,good_battery_life_score_level,comfortable_design_score_level
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0,0.0,0.5,0.0,0.125,0.375,1.0,0.5,0.571429,0.0
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.9,1.0,1.0,0.5
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9,0.5,1.0,0.5
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5


In [107]:
prod_attr_prby['total_indicator_prby'] = (prod_attr_prby[['_'.join(c.split()) + '_num_reviews' for c in positive_attributes_counts['phrase'].unique()]] > 0).mean(axis=1)

In [108]:
prod_attr_prby.sort_values('total_reviews_in_context', ascending=False).head(10)

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg,comfortable_design_pos_pbry,good_battery_life_pos_pbry,light_weight_pos_pbry,comfortable_design_neg_pbry,good_battery_life_neg_pbry,total_perc_rank,light_weight_score_level,good_battery_life_score_level,comfortable_design_score_level,total_indicator_prby
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0,0.0,0.5,0.0,0.125,0.375,1.0,0.5,0.571429,0.0,0.666667
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.9,1.0,1.0,0.5,0.666667
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9,0.5,1.0,0.5,0.333333
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333
4,B00M1NEUA0,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333
8,B00NCSIN4W,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.633333,0.5,0.0,0.5,0.333333
9,B00P89AVRU,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.633333,0.5,1.0,0.5,0.333333
10,B00WU35S7K,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333
0,B00009MK3T,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.233333,1.0,0.5,0.5,0.333333


In [109]:
prod_attr_prby['score'] = prod_attr_prby[[c for c in prod_attr_prby.columns if c.endswith('score_level')]].mean(axis=1) * prod_attr_prby['total_perc_rank'] * prod_attr_prby['total_indicator_prby']
top10_products = prod_attr_prby.sort_values('score', ascending=False).head(10)
num_prods = prod_attr_prby.shape[0]

In [110]:
top10_products

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg,comfortable_design_pos_pbry,good_battery_life_pos_pbry,light_weight_pos_pbry,comfortable_design_neg_pbry,good_battery_life_neg_pbry,total_perc_rank,light_weight_score_level,good_battery_life_score_level,comfortable_design_score_level,total_indicator_prby,score
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.9,1.0,1.0,0.5,0.666667,0.5
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0,0.0,0.5,0.0,0.125,0.375,1.0,0.5,0.571429,0.0,0.666667,0.238095
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9,0.5,1.0,0.5,0.333333,0.2
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
4,B00M1NEUA0,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
9,B00P89AVRU,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.633333,0.5,1.0,0.5,0.333333,0.140741
10,B00WU35S7K,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
8,B00NCSIN4W,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.633333,0.5,0.0,0.5,0.333333,0.07037
0,B00009MK3T,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.233333,1.0,0.5,0.5,0.333333,0.051852


In [111]:
top10_products

phrase,asin,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg,comfortable_design_pos_pbry,good_battery_life_pos_pbry,light_weight_pos_pbry,comfortable_design_neg_pbry,good_battery_life_neg_pbry,total_perc_rank,light_weight_score_level,good_battery_life_score_level,comfortable_design_score_level,total_indicator_prby,score
5,B00M58CMTM,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.9,1.0,1.0,0.5,0.666667,0.5
12,B013HSW4N2,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0,0.0,0.5,0.0,0.125,0.375,1.0,0.5,0.571429,0.0,0.666667,0.238095
14,B019C1MBWW,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9,0.5,1.0,0.5,0.333333,0.2
1,B000MMWT9Q,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
3,B00IUICOR6,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
4,B00M1NEUA0,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
9,B00P89AVRU,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.633333,0.5,1.0,0.5,0.333333,0.140741
10,B00WU35S7K,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
8,B00NCSIN4W,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.633333,0.5,0.0,0.5,0.333333,0.07037
0,B00009MK3T,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.233333,1.0,0.5,0.5,0.333333,0.051852


In [112]:
fetch_matched_products_query = \
        f'''SELECT bp.*, br.num_reviews
            FROM (
                SELECT asin, count(*) as num_reviews
                FROM baseline_reviews
                WHERE asin IN ('{"','".join(top10_products['asin'].astype(str).tolist())}')
                GROUP BY asin
            ) br
            JOIN baseline_products bp
            ON bp.asin=br.asin
        '''
matched_products = pd.read_sql(fetch_matched_products_query, conn)
recommended_list = matched_products.merge(top10_products).sort_values('score', ascending=False)

In [113]:
recommended_list

Unnamed: 0,category,main_cat,description,title,brand,feature,also_view,also_buy,date,price,asin,imageURL,imageURLHighRes,num_reviews,comfortable_design_num_reviews,good_battery_life_num_reviews,light_weight_num_reviews,total_reviews_in_context,comfortable_design_num_reviews_pbry,good_battery_life_num_reviews_pbry,light_weight_num_reviews_pbry,comfortable_design_pos,good_battery_life_pos,light_weight_pos,comfortable_design_neg,good_battery_life_neg,comfortable_design_pos_pbry,good_battery_life_pos_pbry,light_weight_pos_pbry,comfortable_design_neg_pbry,good_battery_life_neg_pbry,total_perc_rank,light_weight_score_level,good_battery_life_score_level,comfortable_design_score_level,total_indicator_prby,score
4,"Electronics, Headphones, Over-Ear Headphones",Home Audio & Theater,"Cut the cord and set your music free with Bose SoundLink on-ear wireless headphones. The sound is powerful and clear, you can switch easily between music and calls with intuitive controls, and pla...",Bose SoundLink On-Ear Bluetooth Wireless Headphones - White,Bose,Crisp powerful sound from the best sounding wireless headphone in its class. Connectivity Technology: Wired/Wireless Advanced microphone system for clear calls in windy or noisy environments Up to...,,"B00Q784UL4, B00Q784XX4, B01MRL3DD0, B0117RGD0K, B0117RGG8E, B074QRK6CG, B00Q784UUA","September 15, 2014",$181.55,B00M58CMTM,"https://images-na.ssl-images-amazon.com/images/I/41yljKPreDL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/31-HOnVP67L._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/4...","https://images-na.ssl-images-amazon.com/images/I/41yljKPreDL.jpg, https://images-na.ssl-images-amazon.com/images/I/31-HOnVP67L.jpg, https://images-na.ssl-images-amazon.com/images/I/41%2BNozklKPL.j...",299,0.0,1.0,2.0,3.0,0.0,0.333333,0.666667,0.0,1.0,2.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.9,1.0,1.0,0.5,0.666667,0.5
8,"Electronics, Headphones, Earbud Headphones",All Electronics,Premium Bluetooth wireless music + calls with complete remote controls with a lifetime warranty against sweat,Jaybird X2 Wireless Sweat-Proof Micro-Sized Bluetooth Sport Headphones &ndash; Charge,Jaybird,Premium Bluetooth Audio For Skip-Free Music Outdoors 8 Hours of Music + Calls With Complete Remote Controls Secure Over/Under-Ear Fit Options Lifetime Sweat proof Warranty Includes Comply Premium ...,"B01MF4MYT3, B0728G6PYG, B01EHIQAR2, B071SC5VNH, B07K1GH9Q1, B01MQQPHPN, B01MECNLU5, B00NJMXETK, B01M2UVSHF, B00AIRUOI8, B00B6ZGSVI, B01EHIQB1C, B07D8HRWY2, B01M7NCT5O, B07GVG8TJD, B01EHIQASQ, B07H...","B01M2UVSHF, B01M7NCT5O","August 18, 2015",$69.99,B013HSW4N2,"https://images-na.ssl-images-amazon.com/images/I/41knxlI43DL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/31E1ip9Y30L._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/5...","https://images-na.ssl-images-amazon.com/images/I/41knxlI43DL.jpg, https://images-na.ssl-images-amazon.com/images/I/31E1ip9Y30L.jpg, https://images-na.ssl-images-amazon.com/images/I/51paAYHyRiL.jpg...",777,1.0,7.0,0.0,8.0,0.125,0.875,0.0,0.0,4.0,0.0,1.0,3.0,0.0,0.5,0.0,0.125,0.375,1.0,0.5,0.571429,0.0,0.666667,0.238095
9,"Electronics, Headphones",Cell Phones & Accessories,"<B>T3</B> , the latest Bluetooth headphones of Turbine Series, is designed and created by our professional Bluedio team. Compared with T2, it has a number of major upgrade: Zn alloy frame and body...","Bluedio T3 Extra Bass Bluetooth Headphones On Ear with Mic, 57mm Driver Folding Wireless Headset, Wired and Wireless Headphones for Cell Phone/TV/PC Gift (Black)",Bluedio,"NOTE: Original Bluedio T3 Bluetooth Headphones sold by"" Bluedio"".Zn ALLOY BODY, DURABLE AND STRONGQuality build that guarantees longevity. Built with an alloy frame, which allows these headphones ...",,"B01MQU3HM4, B072BL5F8H, B01G6PV5OM, B06XHBV8H2, B00Q2VIW9M, B00LA520Y8, B06XHHPWB5, B071SFQR9T, B01EH9YYPG, B01G1DL3J6, B0725DXRQ5, B07H6ST2VX, B07H6VR94M, B019DUFVT2",,$52.70,B019C1MBWW,"https://images-na.ssl-images-amazon.com/images/I/41PWu5dMsPL._SX38_SY50_CR,0,0,38,50_.jpg, https://images-na.ssl-images-amazon.com/images/I/51%2BIdbZqWyL._SX38_SY50_CR,0,0,38,50_.jpg, https://imag...","https://images-na.ssl-images-amazon.com/images/I/41PWu5dMsPL.jpg, https://images-na.ssl-images-amazon.com/images/I/51%2BIdbZqWyL.jpg, https://images-na.ssl-images-amazon.com/images/I/41HkG2nCtnL.j...",614,0.0,3.0,0.0,3.0,0.0,1.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.9,0.5,1.0,0.5,0.333333,0.2
1,"Electronics, Headphones, Over-Ear Headphones",Home Audio &amp; Theater,"Bose around-ear headphones can be used with portable media players, computers, or home stereo. Bose technologies help deliver remarkable audio clarity and tonal range you might not expect from hea...",Bose Around-Ear Headphones (Discontinued by Manufacturer),Bose,"Includes 1/4-inch stereo plug adapter and drawstring carry bag Ideal for use with portable players, laptops, home stereos, etc. Proprietary acoustic headphone technology delivers tonally balanced ...","B00478O0JI, B00009MK3T, B0117RFX38, B01DY0NRCA, B0054JJ0QW, B01HUJ470M, B009BV4V4K, B00OF8ME52, B002LK2QJK, B009BXX8II, B075PGL8NT, B01I2JJ7BI, B000AP05BO, B01G6XL6M0, B005586PT2, B078XTNHFC, B002...",,"January 11, 2006",$99.99,B000MMWT9Q,https://images-na.ssl-images-amazon.com/images/I/41WXe6sKGhL._SS40_.jpg,https://images-na.ssl-images-amazon.com/images/I/41WXe6sKGhL.jpg,79,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
2,"Electronics, Headphones, Over-Ear Headphones",Home Audio & Theater,"Powerful sound, lightweight design SoundTrue headphones feature an advanced acoustic design for clear, natural audio across the full spectrumfrom vocalists who hit the high note, right down to a g...","Bose SoundTrue Headphones Around-Ear Style, Black",Bose,Full and natural sound for all your music Softly padded headband and memory foam cushions Also available in an on-ear style Connectivity Technology: Wired,,"B0180T7XPC, B0117RFX38","April 6, 2014",$161.93,B00IUICOR6,"https://images-na.ssl-images-amazon.com/images/I/414G93rxWIL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/41FqPKY-q9L._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/4...","https://images-na.ssl-images-amazon.com/images/I/414G93rxWIL.jpg, https://images-na.ssl-images-amazon.com/images/I/41FqPKY-q9L.jpg, https://images-na.ssl-images-amazon.com/images/I/41IxFgoSa3L.jpg...",338,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
3,"Electronics, Headphones, Over-Ear Headphones",Home Audio & Theater,"QuietComfort 25 Acoustic Noise Cancelling headphones are the best-performing around-ear headphones from Bose. They give you crisp, powerful sound--and quiet that lets you hear your music better. B...","Bose QuietComfort 25 Acoustic Noise Cancelling Headphones for Apple devices, White(wired, 3.5mm)",Bose,"Industry leading noise reduction for travel, work and anywhere in between.Connectivity Technology: Wired Best-in-class sound with lifelike performance for the music you love Lightweight around-ear...",,"B00OGA2QZC, B00M1NEUKK, B07FN56NJH, B01MTQGY69, B00OCL620U, B00VW7U8X4, B07G95TJ3P, B005KJM30G, B005KJMTXM, B0756GB78C, B01LXJFMGF, B01E3SNO1G, B00OCL61VK",,$223.97,B00M1NEUA0,"https://images-na.ssl-images-amazon.com/images/I/41EYZB11mtL._SX38_SY50_CR,0,0,38,50_.jpg, https://images-na.ssl-images-amazon.com/images/I/410NjMez-KL._SX38_SY50_CR,0,0,38,50_.jpg, https://images...","https://images-na.ssl-images-amazon.com/images/I/41EYZB11mtL.jpg, https://images-na.ssl-images-amazon.com/images/I/410NjMez-KL.jpg, https://images-na.ssl-images-amazon.com/images/I/41rlgXfZfXL.jpg...",134,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
6,"Electronics, Headphones, Earbud Headphones",Cell Phones & Accessories,"Plantronics BackBeat FIT Wireless Bluetooth Headphones - Waterproof Earbuds for Running and Workout, Green, Frustration Free Packaging","Plantronics BackBeat FIT Wireless Bluetooth Headphones - Waterproof Earbuds for Running and Workout, Green, Frustration Free Packaging",Plantronics,WORKOUT HEADPHONES: Specifically constructed with a flexible design so sports earbuds remain stable in your ear throughout all levels of exercise; Case not included with running headphones. WATERP...,"B01JKJN972, B01I0VIMK0, B01JKJN7Q0, B07FTJVZC1, B01JKJN71A, B01JKJN8AA, B01JKJN8NW, B07FTTPVGG, B00KJLMBSO, B07532V3L2, B00KJLMBQQ, B07531C1H9, B07G4ST8YD, B0753324L3, B07G46Q8GR, B07G4NZ5M8, B07F...","B01JKJN972, B00KJLMBSO, B075T9M3SD, B07FTJVZC1, B00IU0YZIK, B01LY6FCHU, B00288L2N6, B00JF9EAHQ, B07FTTPVGG, B016C91ASS, B00EWJIVX8, B01CH0CZTE, B00FTBFU1Y, B06WW9TZ5V, B00MNAD944, 1929854439, B06W...",,$61.22,B00P89AVRU,"https://images-na.ssl-images-amazon.com/images/I/41FXLxebu%2BL._SX38_SY50_CR,0,0,38,50_.jpg, https://images-na.ssl-images-amazon.com/images/I/41DVuqiMG2L._SX38_SY50_CR,0,0,38,50_.jpg, https://imag...","https://images-na.ssl-images-amazon.com/images/I/41FXLxebu%2BL.jpg, https://images-na.ssl-images-amazon.com/images/I/41DVuqiMG2L.jpg, https://images-na.ssl-images-amazon.com/images/I/41%2BETjM8arL...",100,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.633333,0.5,1.0,0.5,0.333333,0.140741
7,"Electronics, Headphones, Over-Ear Headphones",Home Audio & Theater,"Bose around-ear headphones can be used with portable media players, computers, or home stereo. Bose technologies help deliver remarkable audio clarity and tonal range you might not expect from hea...",Bose Around-Ear Headphones (Discontinued by Manufacturer),Bose,"Includes 1/4-inch stereo plug adapter and drawstring carry bag Ideal for use with portable players, laptops, home stereos, etc. Proprietary acoustic headphone technology delivers tonally balanced ...","B00478O0JI, B01DY0NRCA, B0117RFX38, B00009MK3T, B002LK2QJK, B0054JJ0QW, B009BXX8II, B00OF8ME52, B01HUJ470M, B01I2JJ7BI, B009BV4V4K, B01G6XL6M0, B078XTNHFC, B011BIIO9I, B002M38I2U, B000AP05BO, B075...",,"January 11, 2006",$99.99,B00WU35S7K,,,78,0.0,0.0,2.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.633333,1.0,0.5,0.5,0.333333,0.140741
5,"Electronics, Headphones, Over-Ear Headphones",Home Audio & Theater,"Hesh 2 Wireless is the Bluetooth version of our iconic headphone with a new sleek profile and plush, synthetic leather ear pads. With on-board controls and a rechargeable battery, Hesh 2 Wireless ...","Skullcandy Hesh 2 Bluetooth Wireless Over-Ear Headphones with Microphone, Supreme Sound and Powerful Bass, 15-Hour Rechargeable Battery, Soft Synthetic Leather Ear Cushions, Black",Skullcandy,"Supreme sound: Hesh 2 Wireless features Supreme Sound technology, producing attacking, powerful bass, warm, natural vocals and precision highs. All-day comfort, everyday strength: Soft synthetic l...",,"B01JC4T4VK, B073B1MDJR, B00J67FC4C, B0756Z1XK7, B075749KHR, B01N9EU76H, B01BPF49RI, B00WGMRD2S, B074W8SC1L, B01N5OKGLH, B0756VVVF5, B0042ZBR38, B076X1VYF2, B07G2GRGW3, B01MFGALN9, B00LTZ8RS2, B01D...","March 13, 2015",$54.99,B00NCSIN4W,"https://images-na.ssl-images-amazon.com/images/I/61JpA7ArDDL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/51G5DlRJwqL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/4...","https://images-na.ssl-images-amazon.com/images/I/61JpA7ArDDL.jpg, https://images-na.ssl-images-amazon.com/images/I/51G5DlRJwqL.jpg, https://images-na.ssl-images-amazon.com/images/I/41z8tzENvcL.jpg...",515,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,1.0,0.633333,0.5,0.0,0.5,0.333333,0.07037
0,"Electronics, Headphones, Over-Ear Headphones",Home Audio &amp; Theater,"Get high-performance sound in a cool lightweight package with these Bose Slate Gray Triport headphones. Enjoy the full audio spectrum, highs and lows, whether you're plugged into your computer or ...",Bose TriPort Around Ear Headphones (Discontinued by Manufacturer),Bose,"Acoustic equalization - Innovative Bose technology that fine-tunes the frequency response of the headphones so you can experience rich, clear sound Adjustable headband - Up to three inches of adju...","B01DY0NRCA, B009BXX8II, B00478O0JI, B07JD9FRQS, B01HUJ470M, B075PGL8NT, B075WBR327, B00KW6WPD2, B002M38I2U, B0054JJ0QW, B000MMWT9Q, B000KLZ7VG, B0117RFX38, B006QM7RFM, B000AP05BO, B00009RDIF, B002...","B01DY0NRCA, B005KJM30G, B008VM84NY, B00NTWZ8WG","May 7, 2003",$129.93,B00009MK3T,https://images-na.ssl-images-amazon.com/images/I/41zT3cUX5%2BL._SS40_.jpg,https://images-na.ssl-images-amazon.com/images/I/41zT3cUX5%2BL.jpg,67,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.233333,1.0,0.5,0.5,0.333333,0.051852


### Trying to split data frame by attributes and construct a product specific json object for response 

In [120]:
recommended_list.columns

Index(['category', 'main_cat', 'description', 'title', 'brand', 'feature',
       'also_view', 'also_buy', 'date', 'price', 'asin', 'imageURL',
       'imageURLHighRes', 'num_reviews', 'comfortable_design_num_reviews',
       'good_battery_life_num_reviews', 'light_weight_num_reviews',
       'total_reviews_in_context', 'comfortable_design_num_reviews_pbry',
       'good_battery_life_num_reviews_pbry', 'light_weight_num_reviews_pbry',
       'comfortable_design_pos', 'good_battery_life_pos', 'light_weight_pos',
       'comfortable_design_neg', 'good_battery_life_neg',
       'comfortable_design_pos_pbry', 'good_battery_life_pos_pbry',
       'light_weight_pos_pbry', 'comfortable_design_neg_pbry',
       'good_battery_life_neg_pbry', 'total_perc_rank',
       'light_weight_score_level', 'good_battery_life_score_level',
       'comfortable_design_score_level', 'total_indicator_prby', 'score'],
      dtype='object')

In [130]:
def build_product_response_dict(recommended_list, attribute_list):
    #products_data_arr = recommended_list.iloc[:, 0:14].to_dict(orient='records')
    products_data_arr = recommended_list[['asin', 'num_reviews']].to_dict(orient='records')
    tot_df = recommended_list[ [c for c in recommended_list.columns if c.startswith('total') ] + ['score']]
    tot_data_arr = tot_df.to_dict(orient='records')
    for i in range(len(products_data_arr)):
        products_data_arr[i].update(tot_data_arr[i])
    for att in attribute_list:
        att_us = f"{'_'.join(att.split())}"
        att_df = recommended_list[ [c for c in recommended_list.columns if c.startswith(att_us) ] ]
        att_df = att_df.assign(name=[att] * len(products_data_arr))
        att_df.columns = [c.replace(f"{att_us}_", '') for c in att_df.columns]
        att_data_arr = att_df.to_dict(orient='records')
        for i in range(len(products_data_arr)):
            if "attributes" not in products_data_arr[i]:
                products_data_arr[i]["attributes"] = []
            products_data_arr[i]["attributes"].append(att_data_arr[i])
    return products_data_arr

In [131]:
products_data_arr = build_product_response_dict(recommended_list, attribute_list)
    

In [136]:
products_data_arr = recommended_list[['asin', 'num_reviews']].to_dict(orient='records')

In [137]:
products_data_arr

[{'asin': 'B00M58CMTM', 'num_reviews': 299},
 {'asin': 'B013HSW4N2', 'num_reviews': 777},
 {'asin': 'B019C1MBWW', 'num_reviews': 614},
 {'asin': 'B000MMWT9Q', 'num_reviews': 79},
 {'asin': 'B00IUICOR6', 'num_reviews': 338},
 {'asin': 'B00M1NEUA0', 'num_reviews': 134},
 {'asin': 'B00P89AVRU', 'num_reviews': 100},
 {'asin': 'B00WU35S7K', 'num_reviews': 78},
 {'asin': 'B00NCSIN4W', 'num_reviews': 515},
 {'asin': 'B00009MK3T', 'num_reviews': 67}]

In [138]:
tot_df = recommended_list[ [c for c in recommended_list.columns if c.startswith('total') ] + ['score']]
tot_data_arr = tot_df.to_dict(orient='records')
for i in range(len(products_data_arr)):
    products_data_arr[i].update(tot_data_arr[i])

In [139]:
products_data_arr

[{'asin': 'B00M58CMTM',
  'num_reviews': 299,
  'total_reviews_in_context': 3.0,
  'total_perc_rank': 0.9,
  'total_indicator_prby': 0.6666666666666666,
  'score': 0.5},
 {'asin': 'B013HSW4N2',
  'num_reviews': 777,
  'total_reviews_in_context': 8.0,
  'total_perc_rank': 1.0,
  'total_indicator_prby': 0.6666666666666666,
  'score': 0.23809523809523808},
 {'asin': 'B019C1MBWW',
  'num_reviews': 614,
  'total_reviews_in_context': 3.0,
  'total_perc_rank': 0.9,
  'total_indicator_prby': 0.3333333333333333,
  'score': 0.19999999999999998},
 {'asin': 'B000MMWT9Q',
  'num_reviews': 79,
  'total_reviews_in_context': 2.0,
  'total_perc_rank': 0.6333333333333333,
  'total_indicator_prby': 0.3333333333333333,
  'score': 0.14074074074074072},
 {'asin': 'B00IUICOR6',
  'num_reviews': 338,
  'total_reviews_in_context': 2.0,
  'total_perc_rank': 0.6333333333333333,
  'total_indicator_prby': 0.3333333333333333,
  'score': 0.14074074074074072},
 {'asin': 'B00M1NEUA0',
  'num_reviews': 134,
  'total_re

# get_reviews_for_attributes_and_asin()

- DONE: you need to add sentiment here
- TESTED:[this does not work consistently across attributes] you may need to search for nearest neighbors to the chosen attributes to expand the search
    - NA: this may change product recommendation function by a little bit
- TODO: try for attribute highlights in the contents

In [187]:
phrase_ids_query = \
    f'''SELECT key_phrase_id, phrase 
        FROM key_phrase_root 
        WHERE phrase IN ('{"','".join(attribute_list)}')
        AND category='{CATEGORY}'
    '''
query_phrases = pd.read_sql(phrase_ids_query, conn)

In [188]:
query_phrases

Unnamed: 0,key_phrase_id,phrase
0,328258,light weight
1,332508,good battery life
2,335216,comfortable design


In [191]:
query_attributes = attribute_list

review_results_query = \
    f'''SELECT key_phrase_id, review_id 
        FROM key_phrase_reviews 
        WHERE key_phrase_id IN 
        (SELECT key_phrase_id 
            FROM key_phrase_root 
            WHERE phrase IN ('{"','".join(query_attributes)}')
            AND category='{CATEGORY}'
        )
    '''

review_ids_for_query = pd.read_sql(review_results_query, conn)

In [192]:
review_ids_for_query

Unnamed: 0,key_phrase_id,review_id
0,332508,906785
1,328258,75939
2,328258,759076
3,328258,76005
4,328258,358172
5,328258,843920
6,328258,27089
7,328258,758705
8,328258,839666
9,328258,1017268


In [194]:
review_ids_for_query = review_ids_for_query.merge(query_phrases, on='key_phrase_id', how='left')

In [196]:
review_ids_for_query = review_ids_for_query.groupby('review_id')['phrase'].apply(list).reset_index()

In [198]:
review_ids_for_query['n_matches'] = review_ids_for_query['phrase'].apply(len)


In [199]:
review_ids_for_query

Unnamed: 0,review_id,phrase,n_matches
0,27089,[light weight],1
1,75939,[light weight],1
2,76005,[light weight],1
3,358172,[light weight],1
4,758705,[light weight],1
5,759076,[light weight],1
6,839666,[light weight],1
7,839669,[light weight],1
8,843743,[light weight],1
9,843785,[good battery life],1


In [200]:
asin = 'B00M58CMTM'
fetch_matched_reviews_query = \
    f'''SELECT *
        FROM baseline_reviews
        WHERE review_id IN (
                {','.join(review_ids_for_query['review_id'].astype(str).tolist())}
            ) AND
            asin='{asin}'
    '''
matched_reviews = pd.read_sql(fetch_matched_reviews_query, conn)
matched_reviews = matched_reviews.merge(review_ids_for_query, on='review_id', how='left')

In [201]:
matched_reviews

Unnamed: 0,review_id,rating,sentiment,vote,verified,reviewerID,asin,reviewText,reviewTitle,reviewTime,phrase,n_matches
0,843743,5.0,positive,0.0,True,AM5K8DY6KA4V3,B00M58CMTM,I bought these for my husband and he loves them. He especially loves the fact that he can converse on the phone while wearing them and there is no background noise or interference. These are light...,I bought these for my husband and he loves them ...,2017-01-25,[light weight],1
1,843785,5.0,positive,0.0,True,A39U4UJRQVBNR5,B00M58CMTM,"Great sound quality, comfortable fit, good battery life!\n\nThese are pricey, but worth it!!",Great Bose Sound via Bluetooth!,2016-04-27,[good battery life],1
2,843920,5.0,positive,0.0,True,A2CG191FKPTJUI,B00M58CMTM,"I could not be happier, such wonderful sound, light weight, easy connection, just magnificent ..I hear things in music that I have listened to for years that I never heard before..","I could not be happier, such wonderful sound",2015-02-05,[light weight],1


In [None]:
def get_reviews_for_attributes_asin_sentiment_v2(category, query_attributes, asin, sentiment=None):

    phrase_ids_query = \
    f'''SELECT key_phrase_id, phrase 
        FROM key_phrase_root 
        WHERE phrase IN ('{"','".join(query_attributes)}')
        AND category='{category}'
    '''
    query_phrases = pd.read_sql(phrase_ids_query, conn)

    review_results_query = \
    f'''SELECT key_phrase_id, review_id 
        FROM key_phrase_reviews 
        WHERE key_phrase_id IN 
        (SELECT key_phrase_id 
            FROM key_phrase_root 
            WHERE phrase IN ('{"','".join(query_attributes)}')
            AND category='{category}'
        )
    '''
    review_ids_for_query = pd.read_sql(review_results_query, conn)
    review_ids_for_query = review_ids_for_query.merge(query_phrases, on='key_phrase_id', how='left')
    review_ids_for_query = review_ids_for_query.groupby('review_id')['phrase'].apply(list).reset_index()
    review_ids_for_query['n_matches'] = review_ids_for_query['phrase'].apply(len)
    #top_matched_reviews = review_ids_for_query.sort_values('n_matches', ascending=False).head(10)

    
    # TODO: we may decided to build a version of this function with an optional asin (first iteration)
    
    fetch_matched_reviews_query = \
    f'''SELECT *
        FROM baseline_reviews
        WHERE review_id IN (
                {','.join(review_ids_for_query['review_id'].astype(str).tolist())}
            ) AND
            asin='{asin}'
    '''
    matched_reviews = pd.read_sql(fetch_matched_reviews_query, conn)
    matched_reviews = matched_reviews.merge(review_ids_for_query, on='review_id', how='left')

    if sentiment:
        matched_reviews = matched_reviews[matched_reviews['sentiment'] == sentiment]

    return matched_reviews

# Scratch

In [10]:
shortlisted_attributes_query = \
    f'''
    SELECT *
    FROM shortlisted_attributes
    WHERE category='{CATEGORY}'
    '''
shortlisted_attributes = pd.read_sql(shortlisted_attributes_query, conn)

In [13]:
shortlisted_attributes.shape

(386, 12)

In [33]:
sim_attrs_list = shortlisted_attributes.sort_values('neighbor_distances').sort_values('n_reviews', ascending=False).groupby('qphrase').head(10).reset_index(drop=True)[['key_phrase_id', 'phrase', 'qphrase']].sort_values('qphrase')
sim_attrs_list

Unnamed: 0,key_phrase_id,phrase,qphrase
33,331141,nice bass,awesome bass
15,328427,good bass,awesome bass
61,331635,great punchy bass,awesome bass
60,330317,mild bass,awesome bass
49,333649,decent bass,awesome bass
48,328397,big bass,awesome bass
29,328918,strong bass,awesome bass
12,328681,great bass,awesome bass
34,331766,heavy bass,awesome bass
62,333517,blissful bass,awesome bass


In [38]:
sim_attrs_list[['key_phrase_id', 'phrase']].drop_duplicates().sample(50)

Unnamed: 0,key_phrase_id,phrase
31,329045,quality product
15,328427,good bass
0,328250,good sound
12,328681,great bass
32,330168,great noise cancelling
36,332935,active noise cancellation
3,328821,good sound quality
39,329197,nice fit
16,329203,comfortable fit
7,328974,price range


In [21]:
sim_attr_lists['n_attrs'] = sim_attr_lists['phrase'].apply(lambda x: len(x))

In [16]:
#shortlisted_attributes.sort_values('n_reviews', ascending=False).sort_values('neighbor_distances').groupby('qphrase')['phrase'].apply(list).reset_index()

In [22]:
sim_attr_lists

Unnamed: 0,qphrase,phrase,n_attrs
0,awesome bass,"[great bass, good bass, strong bass, nice bass, heavy bass, big bass, decent bass, mild bass, great punchy bass, blissful bass, great bass response, great bass performance, great bass sound, reaso...",15
1,battery life,"[battery life, volume control, price range, volume level, quality product]",5
2,comfortable fit,"[good sound, good sound quality, great sound quality, excellent sound quality, comfortable fit, amazing sound quality, poor sound quality, great audio quality, good fit, good quality sound, high q...",122
3,effective noise cancelling,"[good battery life, great noise cancelling, active noise cancellation, good noise cancellation, active noise cancelling, good noise isolation, great customer support, good noise cancelling, great ...",124
4,good fit,"[great fit, comfortable fit, good fit, nice fit, poor fit, tight fit, excellent fit, bad fit, comfortable fit battery, new fit, decent fit]",11
5,good sound quality,"[good sound quality, great sound quality, excellent sound quality, amazing sound quality, poor sound quality, great audio quality, good quality sound, high quality sound, decent sound quality, goo...",100
6,is durable,"[works great, works good, sounds good, sounds excellent, gels tight]",5
7,light Weight,"[light weight, light usage, light weight easy, light music]",4
