## Set up

In [1]:
from pymongo import MongoClient
from pprint import pprint

import pandas as pd
import re
import nltk

import pickle
import json

In [2]:
from bson.objectid import ObjectId

In [3]:
# stemming
from nltk.stem.lancaster import LancasterStemmer
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /Users/Jocelyn/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
# import product list
with open('pickles/product_list.pickle', 'rb') as to_read:
    product_list = pickle.load(to_read)

with open('pickles/review_list.pickle', 'rb') as to_read:
    review_list = pickle.load(to_read)
    
with open('pickles/sim_asin.pickle', 'rb') as to_read:
    sim_asin = pickle.load(to_read)

In [None]:
# recommend:
# - top 5 products for each category
# - return dictionary with the product info

In [16]:
sim = pd.DataFrame(columns=['prod1','prod2','similarity'])

sim['prod1'] = [i[1][0] for i in sim_asin]
sim['prod2'] = [i[1][1] for i in sim_asin]
sim['similarity'] = [i[0][0][0] for i in sim_asin]

In [18]:
sim.tail(5)

Unnamed: 0,prod1,prod2,similarity
2509915,B0000Y3NO6,B00AF3RYHE,0.0
2509916,B0000Y3NO6,B008KAA8P4,0.0
2509917,B0000Y3NO6,B005JEK7DC,0.0
2509918,B0000Y3NO6,B003VCKZZO,0.0
2509919,B0000Y3NO6,B000AV31NC,0.0


In [143]:
# for given product, get complete list of similarities
prod = 'B0001EL5JA'
def get_other_prod(x,asin=prod):
    if x['prod1'] != asin:
        return x['prod1']
    else:
        return x['prod2']
    
def get_sims(asin):
    df = pd.DataFrame(sim[(sim['prod1']==asin)|(sim['prod2']==asin)])
    df['selected_asin'] = asin
    df['other_asin'] = df.apply(get_other_prod, axis=1)
    df.drop(['prod1','prod2'],axis=1,inplace=True)
    return df

sim_out = get_sims(prod)
# test[test['prod1']!='B0001EL5JA']

In [144]:
sim1 = pd.merge(sim_out,product_list,left_on='selected_asin',right_on='asin')
sim2 = pd.merge(sim1,product_list,left_on='other_asin',right_on='asin',suffixes=('_1','_2'))

In [145]:
sim2.head(5)

Unnamed: 0,similarity,selected_asin,other_asin,id_1,asin_1,product_1,overall_rating_1,review_count_1,description_1,category2_1,category3_1,id_2,asin_2,product_2,overall_rating_2,review_count_2,description_2,category2_2,category3_2
0,0.983379,B0001EL5JA,B00172M9Q8,5dc881769f9b98109203bcc5,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,4.3,41,This non-oily daily hydrator and sunscreen pr...,Creams & Moisturizers,Face Moisturizers,5dc881779f9b98109203c4b1,B00172M9Q8,Mario Badescu Hydrating Moisturizer with Bioca...,4.2,116,Intense moisture retention and antioxidant pro...,Creams & Moisturizers,Face Moisturizers
1,0.974769,B0001EL5JA,B0015ZAO6E,5dc881769f9b98109203bcc5,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,4.3,41,This non-oily daily hydrator and sunscreen pr...,Creams & Moisturizers,Face Moisturizers,5dc881769f9b98109203c454,B0015ZAO6E,"Mario Badescu Buttermilk Moisturizer, 2 oz.",3.9,98,Hydrate and rejuvenate your complexion. Lactic...,Creams & Moisturizers,Face Moisturizers
2,0.972906,B0001EL5JA,B000IO6NFE,5dc881769f9b98109203bcc5,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,4.3,41,This non-oily daily hydrator and sunscreen pr...,Creams & Moisturizers,Face Moisturizers,5dc881769f9b98109203c060,B000IO6NFE,La Roche-Posay Toleriane Fluide Soothing Prote...,4.0,207,Amazon is an authorized retailer of La Roche-...,Creams & Moisturizers,Not listed
3,0.970851,B0001EL5JA,B01EETSH5M,5dc881769f9b98109203bcc5,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,4.3,41,This non-oily daily hydrator and sunscreen pr...,Creams & Moisturizers,Face Moisturizers,5dc881799f9b98109203eb9c,B01EETSH5M,IMAGE Skincare Clear Cell Mattifying Moisturiz...,3.7,9,CLEAR CELL mattifying moisturizer for oily ski...,Creams & Moisturizers,Not listed
4,0.969799,B0001EL5JA,B0051OY7MC,5dc881769f9b98109203bcc5,B0001EL5JA,PCA SKIN Protecting Hydrator Broad Spectrum S...,4.3,41,This non-oily daily hydrator and sunscreen pr...,Creams & Moisturizers,Face Moisturizers,5dc881779f9b98109203ccec,B0051OY7MC,"L'Occitane Angelica Protective Lotion SPF 15, ...",4.5,3,Angelica Protective Lotion SPF 15 helps moistu...,Creams & Moisturizers,Not listed


In [85]:
sim2.shape

(2492028, 19)

### generate recommendation

In [45]:
# generate top 5 matches in each category

def get_rec(asin):
    prods = sim2[sim2['prod1']==asin].sort_values(['category2_2','similarity'],ascending=False)
    recs = prods.groupby(['category2_2']).head(5)
    return recs
    

In [82]:
# test = get_rec('B0001EL5Q8') #anti-aging
# test = get_rec('B0001EL5JA') #hydrating
test = get_rec('B01HEESSHG')
test.shape

(2, 19)

In [83]:
test[0:5]

Unnamed: 0,prod1,prod2,similarity,id_1,asin_1,product_1,overall_rating_1,review_count_1,description_1,category2_1,category3_1,id_2,asin_2,product_2,overall_rating_2,review_count_2,description_2,category2_2,category3_2
620054,B01HEESSHG,B01HBS7XP8,0.570531,5dc881799f9b98109203ec9d,B01HEESSHG,L'Occitane 20-Piece Summer Treat,4.1,6,Composed with some of the most cherished scent...,Sets & Kits,Not listed,5dc881799f9b98109203eca3,B01HBS7XP8,COSMEDIX Benefit Balance Antioxidant Infused T...,5.0,3,"Soothe, calm and nourish the skin with green t...",Toners & Astringents,Not listed
248905,B01HEESSHG,B01HGSJPMW,0.837413,5dc881799f9b98109203ec9d,B01HEESSHG,L'Occitane 20-Piece Summer Treat,4.1,6,Composed with some of the most cherished scent...,Sets & Kits,Not listed,5dc881799f9b98109203eca4,B01HGSJPMW,ELEMIS Superfood Facial Oil - Nourishing Face ...,4.7,33,This natural blend of highly concentrated plan...,Creams & Moisturizers,Face Oil


In [71]:
test.iloc[0]['description_1']

' This non-oily daily hydrator and sunscreen provides light moisture with a matte finish. Active Ingredients: Directions: For the best protection, apply sunscreen every day, year-round, in every type of weather. Apply sunscreen 15 minutes prior to exposure to allow the active ingredients to penetrate the skin. Formulated with Beautiful Skin in Mind Our products are scientifically developed and carefully formulated to improve the health and appearance of your unique skin. We understand the importance of formulating with the best combinations of innovative ingredients to provide you with safe, highly effective products that deliver on their promises. All of our products are free of potential irritants like synthetic dyes and fragrances. PCA SKIN does not perform or condone animal testing.  PCA SKIN is a trusted innovator in the development of highly effective skincare products. Our vision is to improve peoples lives by providing results-oriented skin care solutions for the health of your

In [73]:
test.iloc[25]['description_2']

"Intense moisture retention and antioxidant protection to meet the needs of dry and mature skin. Vitamin A and collagen formula nourishes and softens rough, dry skin keeping it plump and supple. Hyaluronic acid holds the skin's natural moisture preventing moisture loss all day long."

In [None]:
# Save out dictionaries for flask app
# dictionary of product info - asin, product name, description, overall rating, category, topic values?
# dictionary of similarities (sim2)
