# In this notebook

- We build API functions that return the newer shortlisted attributes
- We refer to notebooks `13`, `14`, `16`, and `17` as needed.


In [1]:
import pandas as pd
import json

pd.options.display.max_columns = 50
pd.options.display.max_colwidth = 200
pd.options.display.max_rows = 1000

In [2]:
from sqlalchemy import create_engine
import psycopg2 
import io

In [3]:
import os
import glob

In [4]:
import pickle

In [5]:
import numpy as np

### set up database connection

In [6]:
conn_string = 'postgresql+psycopg2://gabbydbuser:gabbyDBpass@localhost:5432/gabbyDB'

In [7]:
db = create_engine(conn_string)
conn = db.connect()

In [78]:
CATEGORY = 'mouse'

# get_attributes_list() API

In [79]:
def get_attributes_list(category, n_qphrase_attrs=10):
    shortlisted_attributes_query = \
        f'''
        SELECT *
        FROM shortlisted_attributes
        WHERE category='{category}'
        '''
    shortlisted_attributes = pd.read_sql(shortlisted_attributes_query, conn)
    sim_attrs_list = \
        shortlisted_attributes. \
            sort_values('neighbor_distances').sort_values('n_reviews', ascending=False). \
                groupby('qphrase'). \
                    head(n_qphrase_attrs). \
                        reset_index(drop=True)[['key_phrase_id', 'phrase', 'qphrase']].sort_values('qphrase')
    sim_attrs_list_deduped =  sim_attrs_list[['key_phrase_id', 'phrase']].drop_duplicates()
    return sim_attrs_list_deduped.sample(min(50, sim_attrs_list_deduped.shape[0]))
    

In [84]:
get_attributes_list(CATEGORY, 10)

Unnamed: 0,key_phrase_id,phrase
39,268733,great travel mouse
16,267636,wrist pain
3,268449,works great
27,271443,good battery life
1,267597,good mouse
93,304224,good construction
36,269626,great size
108,284193,good scroll wheel
84,269884,great quality product
92,269063,quality build


# get_products_for_attributes()

In [116]:
def _gen_attribute_sentiment_query_v2(category, attribute_list, sentiment):
    return \
        f""" SELECT BR.asin, PHR.key_phrase_id, PHR.phrase, BR.sentiment, count(*) as count
            FROM (SELECT key_phrase_id, phrase
                FROM key_phrase_root 
                WHERE category='{category}' 
                    AND phrase IN ('{"', '".join(attribute_list)}') 
                ) as PHR
            LEFT JOIN key_phrase_reviews KPR
                ON KPR.key_phrase_id=PHR.key_phrase_id
            LEFT JOIN baseline_reviews BR
                ON BR.review_id=KPR.review_id
            WHERE BR.sentiment='{sentiment}'
            GROUP BY BR.asin, PHR.key_phrase_id, PHR.phrase, BR.sentiment
        """


In [117]:
attribute_list = ['comfortable design', 'good battery life', 'light weight']
attributes_counts_positive_sql_query = \
    _gen_attribute_sentiment_query(CATEGORY, attribute_list, 'positive')
attributes_counts_negative_sql_query = \
    _gen_attribute_sentiment_query(CATEGORY, attribute_list, 'negative')
    
    

In [118]:
positive_attributes_counts = pd.read_sql(attributes_counts_positive_sql_query, conn)
negative_attributes_counts = pd.read_sql(attributes_counts_negative_sql_query, conn)

In [119]:
attributes_counts = pd.concat([positive_attributes_counts, negative_attributes_counts])

In [120]:
product_attribute_counts = pd.pivot_table( 
                            attributes_counts.groupby(['asin', 'phrase'])['count'].sum().reset_index(),
                            values='count',
                            index='asin',
                            columns='phrase',
                            aggfunc=sum
                        ).fillna(0).reset_index()

In [121]:
product_attribute_counts

phrase,asin,comfortable design,good battery life,light weight
0,B0000AOWWM,0.0,0.0,1.0
1,B0002CPBWS,1.0,2.0,1.0
2,B0007Z1M50,1.0,0.0,0.0
3,B000BDDBLG,0.0,1.0,0.0
4,B000GABCY2,0.0,0.0,1.0
5,B000HCRVUS,0.0,0.0,1.0
6,B000IE5W7Y,0.0,0.0,1.0
7,B000IF80ZE,0.0,1.0,0.0
8,B000MVI3VK,0.0,0.0,1.0
9,B000Q7V0W4,1.0,0.0,1.0


In [122]:
product_attribute_counts['total'] = product_attribute_counts[product_attribute_counts.columns[1:]].sum(axis=1)
for phrase in product_attribute_counts.columns[1:-1]:
        product_attribute_counts[f"{phrase}_pbry"] = product_attribute_counts[phrase]/product_attribute_counts['total']

In [126]:
product_attribute_counts.sort_values('total', ascending=False).head()

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5
47,B004YAVF8I,0.0,5.0,6.0,11.0,0.0,0.454545,0.545455
36,B003TG75EG,0.0,7.0,1.0,8.0,0.0,0.875,0.125
179,B01C9J3EYI,0.0,3.0,4.0,7.0,0.0,0.428571,0.571429
170,B016Y4NH6O,0.0,1.0,5.0,6.0,0.0,0.166667,0.833333


In [127]:
pos_counts = positive_attributes_counts[['asin', 'phrase', 'count']]. \
            pivot_table(values='count', index='asin', columns='phrase', aggfunc=sum). \
                fillna(0). \
                    reset_index()
pos_counts = pos_counts.rename(columns={c:f"{c}_pos" for c in pos_counts.columns[1:]})

neg_counts = negative_attributes_counts[['asin', 'phrase', 'count']]. \
        pivot_table(values='count', index='asin', columns='phrase', aggfunc=sum). \
            fillna(0). \
                reset_index()
neg_counts = neg_counts.rename(columns={c:f"{c}_neg" for c in neg_counts.columns[1:]})

prod_attr_prby = product_attribute_counts.merge(pos_counts, on='asin', how='left').merge(neg_counts, on='asin', how='left').fillna(0)

In [130]:
prod_attr_prby.sort_values('total', ascending=False).head()

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry,comfortable design_pos,good battery life_pos,light weight_pos,comfortable design_neg,good battery life_neg,light weight_neg
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5,0.0,7.0,6.0,0.0,0.0,1.0
47,B004YAVF8I,0.0,5.0,6.0,11.0,0.0,0.454545,0.545455,0.0,5.0,4.0,0.0,0.0,2.0
36,B003TG75EG,0.0,7.0,1.0,8.0,0.0,0.875,0.125,0.0,7.0,0.0,0.0,0.0,1.0
179,B01C9J3EYI,0.0,3.0,4.0,7.0,0.0,0.428571,0.571429,0.0,2.0,4.0,0.0,1.0,0.0
170,B016Y4NH6O,0.0,1.0,5.0,6.0,0.0,0.166667,0.833333,0.0,1.0,4.0,0.0,0.0,1.0


In [131]:
for phrase in [c for c in prod_attr_prby.columns if c.endswith('pos') or c.endswith('neg')]:
    prod_attr_prby[f"{phrase}_pbry"] = prod_attr_prby[phrase]/prod_attr_prby['total']

In [132]:
prod_attr_prby.sort_values('total', ascending=False).head()

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry,comfortable design_pos,good battery life_pos,light weight_pos,comfortable design_neg,good battery life_neg,light weight_neg,comfortable design_pos_pbry,good battery life_pos_pbry,light weight_pos_pbry,comfortable design_neg_pbry,good battery life_neg_pbry,light weight_neg_pbry
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5,0.0,7.0,6.0,0.0,0.0,1.0,0.0,0.5,0.428571,0.0,0.0,0.071429
47,B004YAVF8I,0.0,5.0,6.0,11.0,0.0,0.454545,0.545455,0.0,5.0,4.0,0.0,0.0,2.0,0.0,0.454545,0.363636,0.0,0.0,0.181818
36,B003TG75EG,0.0,7.0,1.0,8.0,0.0,0.875,0.125,0.0,7.0,0.0,0.0,0.0,1.0,0.0,0.875,0.0,0.0,0.0,0.125
179,B01C9J3EYI,0.0,3.0,4.0,7.0,0.0,0.428571,0.571429,0.0,2.0,4.0,0.0,1.0,0.0,0.0,0.285714,0.571429,0.0,0.142857,0.0
170,B016Y4NH6O,0.0,1.0,5.0,6.0,0.0,0.166667,0.833333,0.0,1.0,4.0,0.0,0.0,1.0,0.0,0.166667,0.666667,0.0,0.0,0.166667


In [133]:
prod_attr_prby['total_perc_rank'] = prod_attr_prby['total'].rank(pct=True)


In [134]:
prod_attr_prby.sort_values('total', ascending=False).head()

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry,comfortable design_pos,good battery life_pos,light weight_pos,comfortable design_neg,good battery life_neg,light weight_neg,comfortable design_pos_pbry,good battery life_pos_pbry,light weight_pos_pbry,comfortable design_neg_pbry,good battery life_neg_pbry,light weight_neg_pbry,total_perc_rank
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5,0.0,7.0,6.0,0.0,0.0,1.0,0.0,0.5,0.428571,0.0,0.0,0.071429,1.0
47,B004YAVF8I,0.0,5.0,6.0,11.0,0.0,0.454545,0.545455,0.0,5.0,4.0,0.0,0.0,2.0,0.0,0.454545,0.363636,0.0,0.0,0.181818,0.994737
36,B003TG75EG,0.0,7.0,1.0,8.0,0.0,0.875,0.125,0.0,7.0,0.0,0.0,0.0,1.0,0.0,0.875,0.0,0.0,0.0,0.125,0.989474
179,B01C9J3EYI,0.0,3.0,4.0,7.0,0.0,0.428571,0.571429,0.0,2.0,4.0,0.0,1.0,0.0,0.0,0.285714,0.571429,0.0,0.142857,0.0,0.984211
170,B016Y4NH6O,0.0,1.0,5.0,6.0,0.0,0.166667,0.833333,0.0,1.0,4.0,0.0,0.0,1.0,0.0,0.166667,0.666667,0.0,0.0,0.166667,0.960526


In [135]:
for phrase in positive_attributes_counts['phrase'].unique():
    prod_attr_prby[f"{phrase}_score_level"] = prod_attr_prby[f"{phrase}_pos_pbry"]/prod_attr_prby[f"{phrase}_pbry"]
    prod_attr_prby = prod_attr_prby.fillna(0.5)
    


In [136]:
prod_attr_prby.sort_values('total', ascending=False).head()

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry,comfortable design_pos,good battery life_pos,light weight_pos,comfortable design_neg,good battery life_neg,light weight_neg,comfortable design_pos_pbry,good battery life_pos_pbry,light weight_pos_pbry,comfortable design_neg_pbry,good battery life_neg_pbry,light weight_neg_pbry,total_perc_rank,light weight_score_level,comfortable design_score_level,good battery life_score_level
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5,0.0,7.0,6.0,0.0,0.0,1.0,0.0,0.5,0.428571,0.0,0.0,0.071429,1.0,0.857143,0.5,1.0
47,B004YAVF8I,0.0,5.0,6.0,11.0,0.0,0.454545,0.545455,0.0,5.0,4.0,0.0,0.0,2.0,0.0,0.454545,0.363636,0.0,0.0,0.181818,0.994737,0.666667,0.5,1.0
36,B003TG75EG,0.0,7.0,1.0,8.0,0.0,0.875,0.125,0.0,7.0,0.0,0.0,0.0,1.0,0.0,0.875,0.0,0.0,0.0,0.125,0.989474,0.0,0.5,1.0
179,B01C9J3EYI,0.0,3.0,4.0,7.0,0.0,0.428571,0.571429,0.0,2.0,4.0,0.0,1.0,0.0,0.0,0.285714,0.571429,0.0,0.142857,0.0,0.984211,1.0,0.5,0.666667
170,B016Y4NH6O,0.0,1.0,5.0,6.0,0.0,0.166667,0.833333,0.0,1.0,4.0,0.0,0.0,1.0,0.0,0.166667,0.666667,0.0,0.0,0.166667,0.960526,0.8,0.5,1.0


In [143]:
prod_attr_prby['total_indicator_prby'] = (prod_attr_prby[[c for c in positive_attributes_counts['phrase'].unique()]] > 0).mean(axis=1)

In [145]:
prod_attr_prby.sort_values('total', ascending=False).head(10)

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry,comfortable design_pos,good battery life_pos,light weight_pos,comfortable design_neg,good battery life_neg,light weight_neg,comfortable design_pos_pbry,good battery life_pos_pbry,light weight_pos_pbry,comfortable design_neg_pbry,good battery life_neg_pbry,light weight_neg_pbry,total_perc_rank,light weight_score_level,comfortable design_score_level,good battery life_score_level,total_indicator_prby
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5,0.0,7.0,6.0,0.0,0.0,1.0,0.0,0.5,0.428571,0.0,0.0,0.071429,1.0,0.857143,0.5,1.0,0.666667
47,B004YAVF8I,0.0,5.0,6.0,11.0,0.0,0.454545,0.545455,0.0,5.0,4.0,0.0,0.0,2.0,0.0,0.454545,0.363636,0.0,0.0,0.181818,0.994737,0.666667,0.5,1.0,0.666667
36,B003TG75EG,0.0,7.0,1.0,8.0,0.0,0.875,0.125,0.0,7.0,0.0,0.0,0.0,1.0,0.0,0.875,0.0,0.0,0.0,0.125,0.989474,0.0,0.5,1.0,0.666667
179,B01C9J3EYI,0.0,3.0,4.0,7.0,0.0,0.428571,0.571429,0.0,2.0,4.0,0.0,1.0,0.0,0.0,0.285714,0.571429,0.0,0.142857,0.0,0.984211,1.0,0.5,0.666667,0.666667
170,B016Y4NH6O,0.0,1.0,5.0,6.0,0.0,0.166667,0.833333,0.0,1.0,4.0,0.0,0.0,1.0,0.0,0.166667,0.666667,0.0,0.0,0.166667,0.960526,0.8,0.5,1.0,0.666667
161,B010MS67ZG,2.0,3.0,1.0,6.0,0.333333,0.5,0.166667,2.0,3.0,1.0,0.0,0.0,0.0,0.333333,0.5,0.166667,0.0,0.0,0.0,0.960526,1.0,1.0,1.0,1.0
30,B002TLTGM6,0.0,2.0,4.0,6.0,0.0,0.333333,0.666667,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667
139,B00S0CTU2Y,0.0,2.0,4.0,6.0,0.0,0.333333,0.666667,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667
38,B0043T7FXE,1.0,4.0,1.0,6.0,0.166667,0.666667,0.166667,1.0,3.0,1.0,0.0,1.0,0.0,0.166667,0.5,0.166667,0.0,0.166667,0.0,0.960526,1.0,1.0,0.75,1.0
163,B011WBI4S0,2.0,3.0,1.0,6.0,0.333333,0.5,0.166667,2.0,3.0,1.0,0.0,0.0,0.0,0.333333,0.5,0.166667,0.0,0.0,0.0,0.960526,1.0,1.0,1.0,1.0


In [146]:
prod_attr_prby['score'] = prod_attr_prby[[c for c in prod_attr_prby.columns if c.endswith('score_level')]].mean(axis=1) * prod_attr_prby['total_perc_rank'] * prod_attr_prby['total_indicator_prby']
top10_products = prod_attr_prby.sort_values('score', ascending=False).head(10)
num_prods = prod_attr_prby.shape[0]

In [147]:
top10_products

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry,comfortable design_pos,good battery life_pos,light weight_pos,comfortable design_neg,good battery life_neg,light weight_neg,comfortable design_pos_pbry,good battery life_pos_pbry,light weight_pos_pbry,comfortable design_neg_pbry,good battery life_neg_pbry,light weight_neg_pbry,total_perc_rank,light weight_score_level,comfortable design_score_level,good battery life_score_level,total_indicator_prby,score
161,B010MS67ZG,2.0,3.0,1.0,6.0,0.333333,0.5,0.166667,2.0,3.0,1.0,0.0,0.0,0.0,0.333333,0.5,0.166667,0.0,0.0,0.0,0.960526,1.0,1.0,1.0,1.0,0.960526
163,B011WBI4S0,2.0,3.0,1.0,6.0,0.333333,0.5,0.166667,2.0,3.0,1.0,0.0,0.0,0.0,0.333333,0.5,0.166667,0.0,0.0,0.0,0.960526,1.0,1.0,1.0,1.0,0.960526
38,B0043T7FXE,1.0,4.0,1.0,6.0,0.166667,0.666667,0.166667,1.0,3.0,1.0,0.0,1.0,0.0,0.166667,0.5,0.166667,0.0,0.166667,0.0,0.960526,1.0,1.0,0.75,1.0,0.880482
1,B0002CPBWS,1.0,2.0,1.0,4.0,0.25,0.5,0.25,1.0,1.0,1.0,0.0,1.0,0.0,0.25,0.25,0.25,0.0,0.25,0.0,0.884211,1.0,1.0,0.5,1.0,0.736842
64,B005HQ514C,0.0,3.0,3.0,6.0,0.0,0.5,0.5,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
30,B002TLTGM6,0.0,2.0,4.0,6.0,0.0,0.333333,0.666667,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
139,B00S0CTU2Y,0.0,2.0,4.0,6.0,0.0,0.333333,0.666667,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
94,B00BZC31WQ,0.0,5.0,1.0,6.0,0.0,0.833333,0.166667,0.0,5.0,1.0,0.0,0.0,0.0,0.0,0.833333,0.166667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5,0.0,7.0,6.0,0.0,0.0,1.0,0.0,0.5,0.428571,0.0,0.0,0.071429,1.0,0.857143,0.5,1.0,0.666667,0.52381
187,B01GI93F76,0.0,3.0,2.0,5.0,0.0,0.6,0.4,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.6,0.4,0.0,0.0,0.0,0.928947,1.0,0.5,1.0,0.666667,0.516082


In [149]:
top10_products

phrase,asin,comfortable design,good battery life,light weight,total,comfortable design_pbry,good battery life_pbry,light weight_pbry,comfortable design_pos,good battery life_pos,light weight_pos,comfortable design_neg,good battery life_neg,light weight_neg,comfortable design_pos_pbry,good battery life_pos_pbry,light weight_pos_pbry,comfortable design_neg_pbry,good battery life_neg_pbry,light weight_neg_pbry,total_perc_rank,light weight_score_level,comfortable design_score_level,good battery life_score_level,total_indicator_prby,score
161,B010MS67ZG,2.0,3.0,1.0,6.0,0.333333,0.5,0.166667,2.0,3.0,1.0,0.0,0.0,0.0,0.333333,0.5,0.166667,0.0,0.0,0.0,0.960526,1.0,1.0,1.0,1.0,0.960526
163,B011WBI4S0,2.0,3.0,1.0,6.0,0.333333,0.5,0.166667,2.0,3.0,1.0,0.0,0.0,0.0,0.333333,0.5,0.166667,0.0,0.0,0.0,0.960526,1.0,1.0,1.0,1.0,0.960526
38,B0043T7FXE,1.0,4.0,1.0,6.0,0.166667,0.666667,0.166667,1.0,3.0,1.0,0.0,1.0,0.0,0.166667,0.5,0.166667,0.0,0.166667,0.0,0.960526,1.0,1.0,0.75,1.0,0.880482
1,B0002CPBWS,1.0,2.0,1.0,4.0,0.25,0.5,0.25,1.0,1.0,1.0,0.0,1.0,0.0,0.25,0.25,0.25,0.0,0.25,0.0,0.884211,1.0,1.0,0.5,1.0,0.736842
64,B005HQ514C,0.0,3.0,3.0,6.0,0.0,0.5,0.5,0.0,3.0,3.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
30,B002TLTGM6,0.0,2.0,4.0,6.0,0.0,0.333333,0.666667,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
139,B00S0CTU2Y,0.0,2.0,4.0,6.0,0.0,0.333333,0.666667,0.0,2.0,4.0,0.0,0.0,0.0,0.0,0.333333,0.666667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
94,B00BZC31WQ,0.0,5.0,1.0,6.0,0.0,0.833333,0.166667,0.0,5.0,1.0,0.0,0.0,0.0,0.0,0.833333,0.166667,0.0,0.0,0.0,0.960526,1.0,0.5,1.0,0.666667,0.533626
92,B00BP5KOPA,0.0,7.0,7.0,14.0,0.0,0.5,0.5,0.0,7.0,6.0,0.0,0.0,1.0,0.0,0.5,0.428571,0.0,0.0,0.071429,1.0,0.857143,0.5,1.0,0.666667,0.52381
187,B01GI93F76,0.0,3.0,2.0,5.0,0.0,0.6,0.4,0.0,3.0,2.0,0.0,0.0,0.0,0.0,0.6,0.4,0.0,0.0,0.0,0.928947,1.0,0.5,1.0,0.666667,0.516082


In [150]:
fetch_matched_products_query = \
        f'''SELECT bp.*, br.num_reviews
            FROM (
                SELECT asin, count(*) as num_reviews
                FROM baseline_reviews
                WHERE asin IN ('{"','".join(top10_products['asin'].astype(str).tolist())}')
                GROUP BY asin
            ) br
            JOIN baseline_products bp
            ON bp.asin=br.asin
        '''
matched_products = pd.read_sql(fetch_matched_products_query, conn)
recommended_list = matched_products.merge(top10_products[['asin', 'score', 'total_perc_rank'] + \
                    [c for c in top10_products.columns if c.endswith('_score_level')]]).sort_values('score', ascending=False)

In [151]:
recommended_list

Unnamed: 0,category,main_cat,description,title,brand,feature,also_view,also_buy,date,price,asin,imageURL,imageURLHighRes,num_reviews,score,total_perc_rank,light weight_score_level,comfortable design_score_level,good battery life_score_level
7,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Keyboards, Mice & Accessories, Mice",Computers,Logitech Advanced Optical Tracking technology Improved LED light positioning makes this optical mouse more responsive and accurate. Wireless connection This mouse connects to your computer using a...,Logitech M325c Wireless Optical Mouse Marc Monkey,Logitech,Logitech M325c Wireless Optical Mouse. This mouse utilizes wireless USB connectivity for a wireless range up to 32.8'. Three buttons and a tilting scroll wheel make navigation easy. What's include...,"B01N5O4JM3, B00ZYK2860, B00ZYK2BJY, B00ZYK29XC, B01FM3FAHC, B07B65GMSD, B01FZP1UY4, B011WBI4S0, B00ZYK2F6S, B01FM3GFSA, B01HHI04OE, B01FM3G4T0, B01HP1J8EK, B00N84CYFE, B00ZYK2IWE, B01GOLXJBU, B01G...",,"June 29, 2015",$12.99,B010MS67ZG,https://images-na.ssl-images-amazon.com/images/I/51ewKi2IUlL._SS40_.jpg,https://images-na.ssl-images-amazon.com/images/I/51ewKi2IUlL.jpg,3397,0.960526,0.960526,1.0,1.0,1.0
8,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Keyboards, Mice & Accessories, Mice",Computers,Features. A mouse thats no longer a mouse but a flash reminder of fun that makes you smile.. With its colorful design its as bright and playful as you are.. Its all fun for a whole year and a half...,Logitech M325c Wireless Mouse Facets 910-004445,Logitech,Connection Type: Wireless Color: Blue Movement Detection: Optical Movement Resolution: 1000 dpi,"B01N5O4JM3, B00ZYK2BJY, B01FM3FSDS, B01FM3FBS0, B00ZYK29XC, B01MZIG7T8, B00N84CYFE, B005HQ5138, B010MS67ZG, B00ZYK2860, B005DSPLDA, B00E7IPN0I, B007T1CTDE, B07DKTJQNQ, B072K2G8RC, B072175DB5, B071...",,"July 6, 2015",$2.54,B011WBI4S0,"https://images-na.ssl-images-amazon.com/images/I/5177zduUZWL._SS36_.jpg, https://images-na.ssl-images-amazon.com/images/I/41uXdkjC7LL._SS36_.jpg, https://images-na.ssl-images-amazon.com/images/I/4...","https://images-na.ssl-images-amazon.com/images/I/5177zduUZWL.jpg, https://images-na.ssl-images-amazon.com/images/I/41uXdkjC7LL.jpg, https://images-na.ssl-images-amazon.com/images/I/41wT49oPHVL.jpg...",3398,0.960526,0.960526,1.0,1.0,1.0
2,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Input Devices, Trackballs",All Electronics,"The logitech wireless trackball m570 gives you a different kind of comfort. it stays in one place and supports your hand to let you work all day with ease. <div class=""aplus""> <div class=""leftimag...","Logitech M570 Wireless Trackball Mouse &ndash; Ergonomic Design with Sculpted Right-hand Shape, Compatible with Apple Mac and Microsoft Windows Computers, USB Unifying Receiver, Dark Gray",Logitech,TRACKBALL COMFORT - Sculpted shape supports your hand and stays in one placemove the cursor without moving your arm CONVENIENT CONTROLS - Easy-to-reach Back/Forward buttons makes moving quickly be...,,"B073XDKBHX, B002MMY4WY, B004SUIM4E, B075GZVD4T, B015VERVU6, B018GFBCO8, B01ALB08CC, B06ZXSMTYY, B01MYZMODY, B003VAGXWK, B0753P1GTS, B005L38VRU, B00CYX26BC, B06X3W3TM4, B00000JRRD, B004N627KS, B01A...","November 11, 2009",$2.54,B0043T7FXE,"https://images-na.ssl-images-amazon.com/images/I/311yzThqGyL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/41IXWFe%2Be3L._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I...","https://images-na.ssl-images-amazon.com/images/I/311yzThqGyL.jpg, https://images-na.ssl-images-amazon.com/images/I/41IXWFe%2Be3L.jpg, https://images-na.ssl-images-amazon.com/images/I/41gTSJpg3gL.j...",5457,0.880482,0.960526,1.0,1.0,0.75
0,"Electronics, Computers &amp; Accessories, Computer Accessories &amp; Peripherals, Keyboards, Mice &amp; Accessories, Mice",All Electronics,"This handy optical mouse features a tiny snap-in receiver that plugs into your notebook when you're ready to work, and snaps into your mouse when you're ready to roll. And extended battery life le...",Microsoft Wireless Notebook Optical Mouse 3000 - Winter Blue,Microsoft,Microsoft BX3-00001 Wireless Notebook Optical Mouse (Winter Blue) Scroll wheel optical mouse Customizeable buttons Ambidextrous design makes working more comfortable Connects with tiny snap-in rec...,"B003PBZNF8, B0035ERKYW, B006HUMTXI, B002DPUUL4, B008OEHR00, B00MK24EUI, B006HUMTZG, B005058B5Q, B0051ZQ7MO, B003PBZHPE, B008OEHT30, B00H9862GM, B001FQQR6Y, B005KSAJXK, B00KG5REIA, B005058B4W, B00B...",,"June 1, 2004",,B0002CPBWS,"https://images-na.ssl-images-amazon.com/images/I/41FRA0DAWRL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/41XAG2EXH3L._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/4...","https://images-na.ssl-images-amazon.com/images/I/41FRA0DAWRL.jpg, https://images-na.ssl-images-amazon.com/images/I/41XAG2EXH3L.jpg, https://images-na.ssl-images-amazon.com/images/I/41587QN417L.jpg...",680,0.736842,0.884211,1.0,1.0,0.5
1,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Keyboards, Mice & Accessories, Mice",All Electronics,"It began with iPhone. Then came iPod touch. Then MacBook Pro. Intuitive, smart, dynamic. Multi-Touch technology introduced a remarkably better way to interact with your portable devices - all usin...",Apple Magic Bluetooth Mouse (MB829LL/A),Apple,Laser technology delivers 20 times the performance of standard optical tracking Miniature sensors detect even the slightest movement Top-shell design matches other Apple products Bluetooth technol...,"B016QO5YNG, B01NAAY3RA, B0145NHF5K, B002VPU98K, B002NX0M8C, B007X3VEX4, B000B6D39I, B07BR94PPD, B01NBHVL0R, B00JUHDYSO, B01MT7EN0F, B06ZZJ6C3P, B01MDV0A0V, B00DPJZ3PU, B000UY4XQI, B0007Y79E4, B01N...","B01NABDNPH, B016QO64FI, B071ZZTNBM, B016QO5YNG, B000OOYECC, B078YG84MN, B005L38VRU, B000V07N9U, B016K0YY1Y, B01J4BO0X8, B00WRDS0AU, B06XZZ8YTG, B01N59VJED, B001UH4VFW, B002VPU98K, B016E2YUHG, B076...","October 20, 2009",$45.99,B002TLTGM6,,,1352,0.533626,0.960526,1.0,0.5,1.0
3,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Keyboards, Mice & Accessories, Mice",All Electronics,"LOG910002650 UNITED STATIONERS (OP) MOUSE,WIRELSS,M325,BE",Logitech Wireless Mouse M325 with Designed-For-Web Scrolling - Blue,Logitech,"Micro-precise scrolling: New designed-for-Web scrolling makes searches, shopping and browsing easier Feel-good design: Contoured shape and textured rubber grips keep your hand comfortable even aft...","B005HQ5138, B005DSPLDA, B007T1CTDE, B005KSAJXK, B012UG1WTO, B01HP1J8EK, B01GOLXKMS, B011WBI4S0, B00ZYK2F6S, B01GOLXJBU, B00ZYK2IWE, B00ZYK29XC, B010MS67ZG, B00ADBY97G, B00M55BIXG, B00ADBY98A, B00G...","B005HQ5138, B005DSPLDA, B007T1CTDE, B005DSPLCQ, B00BGSRK1M, B00GEE8ZIU, B01C445BOO, B012UG1WTO, B01JB70J7Q, B01GOLXJBU, B00M55BIXG, B008PU665A, B00ZYK2F6S, B006JRIDHK, B005KSAJXK, B01BL0BSVC, B003...","September 1, 2011",,B005HQ514C,"https://images-na.ssl-images-amazon.com/images/I/41IcxoMw-KL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/41DhP7T8d0L._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/4...","https://images-na.ssl-images-amazon.com/images/I/41IcxoMw-KL.jpg, https://images-na.ssl-images-amazon.com/images/I/41DhP7T8d0L.jpg, https://images-na.ssl-images-amazon.com/images/I/418UoX7RQ7L.jpg...",1443,0.533626,0.960526,1.0,0.5,1.0
5,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Keyboards, Mice & Accessories, Mice",All Electronics,"The blue, customizable Windows touch tab on Sculpt Comfort Mouse revolutionizes the relationship between hardware and software, providing quick and easy access to the Start Menu, Cortana*, OneNote...",Microsoft Sculpt Comfort Bluetooth Mouse (H3S-00001),Microsoft,"Customizable Windows touch tab for easy access to the Start Menu and more. Ergonomic design for right-handed users makes extended use more comfortable. Scroll left, right, front and back. Quickly ...",,"B01AP87PCC, B00D68ZVY8, B0054YQOM0, B07KPXBLH8, B0163HP38W, B01IQZFFIW, B06X3W3YQD, B001PII7S8, B07L582WNL, B001TKTHFY, B073W4MPJF, B003U6VS3O, B07G7ZJTJG, B07C3SL52G, B07KBZ4CJQ, B0017QFDQW, B016...","May 21, 2013",$2.54,B00BZC31WQ,"https://images-na.ssl-images-amazon.com/images/I/318CndKooGL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/418TjaiqolL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/3...","https://images-na.ssl-images-amazon.com/images/I/318CndKooGL.jpg, https://images-na.ssl-images-amazon.com/images/I/418TjaiqolL.jpg, https://images-na.ssl-images-amazon.com/images/I/31IACm9QqfL.jpg...",1737,0.533626,0.960526,1.0,0.5,1.0
6,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Keyboards, Mice & Accessories, Mice",Computers,Microsoft Designer Bluetooth Mouse,Microsoft Designer Bluetooth Mouse,Microsoft,"Stylish, ultra-thin profile complements your desktop. Clutter-free and reliable experience utilizing the latest in Bluetooth 4.0 technology. Microsoft BlueTrack Technology gives you precise contro...","B00JUHDYSO, B00BZC31WQ, B015HWLB2G, B072N16KPB, B00H9862GM, B0148NPIQK, B07FDQ2MGG, B01MG4JB12, B0774K47TS, B0148NPJ3W, B072FG8LBV, B07FDQ2MGD, B01EFAGMRA, B019MR4Y3U, B0788FPG37, B00W77BKEY, B07H...","B073W4MPJF, B0163HP38W, B074GYX6VR, B07FDHK5NW, B072N16KPB, B072K5TXGT, B07CSPSMQY, B07FDQ2MGG, B00JUHDYSO, B07FDKZQTY, B071SF41Y9, B0134V3KIA, B01C445FRM, B015HWLB2G, B072FG8LBV, B0168YIWSI, B07B...","March 10, 2015",$22.99,B00S0CTU2Y,"https://images-na.ssl-images-amazon.com/images/I/31VTT1Tb9UL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/31AkQDkwmlL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/3...","https://images-na.ssl-images-amazon.com/images/I/31VTT1Tb9UL.jpg, https://images-na.ssl-images-amazon.com/images/I/31AkQDkwmlL.jpg, https://images-na.ssl-images-amazon.com/images/I/31A05sE97uL.jpg...",435,0.533626,0.960526,1.0,0.5,1.0
4,"Electronics, Computers & Accessories",Computers,The stylish Logitech MK270 Wireless Keyboard and Mouse Combo is perfect for the home office or workplace. Ditch the touchpad for this full-size keyboard and mouse. Easily connect using Logitech's ...,"Logitech MK270 Wireless Keyboard and Mouse Combo - Keyboard and Mouse Included, 2.4GHz Dropout-Free Connection, Long Battery Life (Frustration-Free Packaging)",Logitech,WORK FOR LONGER WITH LONG BATTERY LIFE Basic AA and AAA batteries are included with the keyboard and mouse KEYBOARD AND MOUSE COMBO The Logitech MK270 Wireless Keyboard and Mouse Combo includes a ...,"B003VAGXZC, B00QXT5T3U, B00L1Y11D4, B003VAHYNC, B07K66BNT8, B079HR85C3, B07M6DDN3G, B014EUQOGK, B003VANO7C, B07M5W5FVB, B0036E8V08, B07CZK15G8, B07BR1KKL8, B01FM8H22S, B0787G1YFG, B00LZVP73S, B01A...",,"April 1, 2018",$19.99,B00BP5KOPA,"https://images-na.ssl-images-amazon.com/images/I/41SSc1HQBWL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/31R%2BVe6MATL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I...","https://images-na.ssl-images-amazon.com/images/I/41SSc1HQBWL.jpg, https://images-na.ssl-images-amazon.com/images/I/31R%2BVe6MATL.jpg, https://images-na.ssl-images-amazon.com/images/I/41VHOvtXMmL.j...",4625,0.52381,1.0,0.857143,0.5,1.0
9,"Electronics, Computers & Accessories, Computer Accessories & Peripherals, Keyboards, Mice & Accessories, Mice",Computers,"Logitech M325c Wireless Optical Mouse: Eliminate the hassle of cable clutter with this mouse, which utilizes wireless USB connectivity for a wireless range up to 32.8'. Three buttons and a tilting...",Logitech - M325c Wireless Optical Mouse - Red Harlequin,Logitech,Logitech M325c Wireless Optical Mouse. This mouse utilizes wireless USB connectivity for a wireless range up to 32.8'. Three buttons and a tilting scroll wheel make navigation easy. AA battery Nan...,"B012UG1WTO, B011WBI4S0, B01GOLXKMS, B00N84CYFE, B00ZYK2F6S, B01FZP1UY4, B01FM3GFSA, B01HP1J8EK, B071WS37H2, B00ZYK2IWE, B01FM3FAHC, B00ZYK29XC, B01FM3G4T0, B079P4Z97Y, B01HHI04OE, B07B65GMSD, B01G...","B01FM3GFSA, B00ZYK2KJA, B01FM3G4T0, B01HHI04OE, B012UG1WTO, B011WBI4S0, B01FM3FAHC, B01HP1J8EK, B01GOLXKMS, B01FZP1UY4, B007T1CTDE, B010MS67ZG, B006JRIDHK, B00ADBY97G, B01C445BOO, B079KCRLLD, B005...","March 25, 2016",,B01GI93F76,"https://images-na.ssl-images-amazon.com/images/I/41ZnefJEJIL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/41wmKe3VlOL._SS40_.jpg, https://images-na.ssl-images-amazon.com/images/I/5...","https://images-na.ssl-images-amazon.com/images/I/41ZnefJEJIL.jpg, https://images-na.ssl-images-amazon.com/images/I/41wmKe3VlOL.jpg, https://images-na.ssl-images-amazon.com/images/I/51Xhwk8vWNL.jpg...",1083,0.516082,0.928947,1.0,0.5,1.0


# get_reviews_for_attributes_and_asin()

In [None]:
TODO: you need to add sentiment here
TODO: you may need to search for nearest neighbors to the chosen attributes to expand the search
- TODO: this may change product recommendation function by a little bit

# Scratch

In [10]:
shortlisted_attributes_query = \
    f'''
    SELECT *
    FROM shortlisted_attributes
    WHERE category='{CATEGORY}'
    '''
shortlisted_attributes = pd.read_sql(shortlisted_attributes_query, conn)

In [13]:
shortlisted_attributes.shape

(386, 12)

In [33]:
sim_attrs_list = shortlisted_attributes.sort_values('neighbor_distances').sort_values('n_reviews', ascending=False).groupby('qphrase').head(10).reset_index(drop=True)[['key_phrase_id', 'phrase', 'qphrase']].sort_values('qphrase')
sim_attrs_list

Unnamed: 0,key_phrase_id,phrase,qphrase
33,331141,nice bass,awesome bass
15,328427,good bass,awesome bass
61,331635,great punchy bass,awesome bass
60,330317,mild bass,awesome bass
49,333649,decent bass,awesome bass
48,328397,big bass,awesome bass
29,328918,strong bass,awesome bass
12,328681,great bass,awesome bass
34,331766,heavy bass,awesome bass
62,333517,blissful bass,awesome bass


In [38]:
sim_attrs_list[['key_phrase_id', 'phrase']].drop_duplicates().sample(50)

Unnamed: 0,key_phrase_id,phrase
31,329045,quality product
15,328427,good bass
0,328250,good sound
12,328681,great bass
32,330168,great noise cancelling
36,332935,active noise cancellation
3,328821,good sound quality
39,329197,nice fit
16,329203,comfortable fit
7,328974,price range


In [21]:
sim_attr_lists['n_attrs'] = sim_attr_lists['phrase'].apply(lambda x: len(x))

In [16]:
#shortlisted_attributes.sort_values('n_reviews', ascending=False).sort_values('neighbor_distances').groupby('qphrase')['phrase'].apply(list).reset_index()

In [22]:
sim_attr_lists

Unnamed: 0,qphrase,phrase,n_attrs
0,awesome bass,"[great bass, good bass, strong bass, nice bass, heavy bass, big bass, decent bass, mild bass, great punchy bass, blissful bass, great bass response, great bass performance, great bass sound, reaso...",15
1,battery life,"[battery life, volume control, price range, volume level, quality product]",5
2,comfortable fit,"[good sound, good sound quality, great sound quality, excellent sound quality, comfortable fit, amazing sound quality, poor sound quality, great audio quality, good fit, good quality sound, high q...",122
3,effective noise cancelling,"[good battery life, great noise cancelling, active noise cancellation, good noise cancellation, active noise cancelling, good noise isolation, great customer support, good noise cancelling, great ...",124
4,good fit,"[great fit, comfortable fit, good fit, nice fit, poor fit, tight fit, excellent fit, bad fit, comfortable fit battery, new fit, decent fit]",11
5,good sound quality,"[good sound quality, great sound quality, excellent sound quality, amazing sound quality, poor sound quality, great audio quality, good quality sound, high quality sound, decent sound quality, goo...",100
6,is durable,"[works great, works good, sounds good, sounds excellent, gels tight]",5
7,light Weight,"[light weight, light usage, light weight easy, light music]",4
