In [251]:
import pandas as pd
import pickle

from sklearn.preprocessing import MinMaxScaler

pd.set_option('display.max_colwidth', 60)

Common Function

In [252]:
def read_json(path):
    return pd.read_json(path, orient="records", compression="gzip")

In [253]:
df_products = read_json(f'../data/interim/final/products.json.gz')
df_reviews = read_json('../data/interim/final/reviews.json.gz')
df_users = read_json('../data/interim/final/reviews.json.gz')
df_categories = read_json('../data/interim/final/categories.json.gz')
df_products_categories = read_json('../data/interim/final/products_categories.json.gz')

In [254]:
df_processed_products = pd.read_json('../data/processed/products.json.gz', orient="records", compression="gzip")
df_processed_reviews = pd.read_json('../data/processed/reviews.json.gz', orient="records", compression="gzip")

In [255]:
df_reviews['cleaned_review'] = df_processed_reviews['cleaned_review']

df_products_reviews = df_products.merge(df_reviews, how='inner', on='product_id', validate='one_to_many'
)[['product_id','cleaned_review']]

Content-based Filtering

In [256]:
indices = pd.Series(df_products.index, index = df_products['product_id'])
CB_item_similarity_matrix = pickle.load(open(f'../models/content_based_filtering/item_similarity_matrix.pkl', 'rb'))

In [257]:
def content_based_filtering(product_id, n = 10):
    idx = indices[product_id]

    scores = pd.Series(CB_item_similarity_matrix[idx]).sort_values(ascending = False)
    
    top_n_idx = list(scores.iloc[1:n].index)

    return df_products.iloc[top_n_idx]

Item-based collaborative Filtering 

In [258]:
item_based_rating_prediction_model = pickle.load(open('../models/item_based_collaborative_filtering/svd.pkl', 'rb'))
item_similarity_model = pickle.load(open('../models/item_based_collaborative_filtering/svd.pkl', 'rb'))

In [259]:
def predict_rating(user_id, item_id):
    return item_based_rating_prediction_model.predict(user_id, item_id) 

def get_item_similarity(item_1, item_2):
    return item_similarity_model.predict(item_1, item_2)

def get_unrated_items(user_id):
    df_1 = df_reviews[df_reviews['user_id'] == user_id]
    return df_products[~df_products['product_id'].isin(df_1['product_id'])].copy()

def item_based_collaborative_filtering(user_id, item_id):
    df_unrated_items = get_unrated_items(user_id)
    df_unrated_items['sim'] = df_unrated_items['product_id'].apply(lambda x : get_item_similarity(item_id, x).est)
    df_unrated_items['pre'] = df_unrated_items['product_id'].apply(lambda x : predict_rating(user_id, x).est)
    return df_unrated_items.sort_values(by=['pre'], ascending=False)

Sentiment Analysis

In [260]:
sentiment_analysis_model = pickle.load(open('../models/sentiment_analysis/hyperparameter_tuning/logistic_regression_with_tfidf_vectorizer.pkl', 'rb'))

In [261]:
# predict the average sentiment of all reviews for a product
def predict_mean_sentiment(product_id:str):
    review_list = df_products_reviews[df_products_reviews['product_id'] == product_id]['cleaned_review'].values.astype('U')
    if (len(review_list) == 0): return 0
    sentiment = predict_sentiment(review_list)
    if(sentiment is None): return 0
    return sentiment.mean()

def predict_sentiment(features:list):
    if (len(features) == 0): return
    return sentiment_analysis_model.predict(features)

Proposed Recommendation Algorithm

In [262]:
def recommend(user_id, item_id, n = 10):
    df_unrated_items = item_based_collaborative_filtering(user_id, item_id)
    df_unrated_items['sen'] = df_unrated_items['product_id'].apply(predict_mean_sentiment)
    
    scaler = MinMaxScaler(feature_range=(1, 5))
    scaler.fit(df_unrated_items[['sen']])
    
    df_unrated_items['sen'] = scaler.transform(df_unrated_items[['sen']])
    df_unrated_items['ranking_score'] = 1 * df_unrated_items['pre'] + 2 * df_unrated_items['sen']
    
    return df_unrated_items.sort_values(by=['ranking_score'], ascending = False).head(10)

Starting Point of the System

In [263]:
user_id = 'A0203183BAH3TR08FZGB'
product_id = 'B00004TLW2'

In [264]:
item_list = recommend(user_id, product_id)

In [265]:
item_list[['product_id', 'name', 'description']].head(5)

Unnamed: 0,product_id,name,description
964,B00QD6H9TK,"OfsPower OPS-1010B Bluetooth Earbuds, XS Earphones with ...",Excellent sounds quality Built with high-performance spe...
24,B0002A6YVC,Apple iBook Laptop 12&quot; M9623LL/A (1.2 GHz PowerPC G...,iBook is the lightest all-in-one consumer notebook avail...
9,B00005UKBD,Sony DSCF707 Cyber-shot 5MP Digital Still Camera w/ 5x O...,Accurately realistic photos are captured using this digi...
638,B00H8J0JZW,Asus VN248H-P 24-Inch Full-HD LED Monitor,Full HD displays for amazing high-definition visual clar...
1102,B00W2I3Z3W,Canon EOS Rebel T6i 24.2MP Digital SLR Camera Bundle wit...,Packing a high resolution 24.2-megapixel CMOS sensor and...


In [266]:
item_based_list = item_based_collaborative_filtering(user_id, product_id)

In [267]:
item_based_list.head(5)

Unnamed: 0,product_id,name,description,price,image_url,sim,pre
33,B0006HU56Q,"Apple MacBook Pro MA611LL/A 17"" Notebook PC (2.33 GHz In...","17"" Macbook Pro 2.33GHz Intel Core 2 Duo (MA611LL/A) 2.3...",197.89,[https://images-na.ssl-images-amazon.com/images/I/41bHaA...,4.671126,4.751658
1124,B00X7QTTKG,Fujifilm X-T10 Body Black Mirrorless Digital Camera (Old...,Create your Lifegraphy with Fujifilm's new X-T10 camera.,1.6,[https://images-na.ssl-images-amazon.com/images/I/41ZdHL...,4.613868,4.67653
311,B006U49XM6,Nikon D4 16.2 MP CMOS FX Digital SLR with Full 1080p HD ...,Nikon D4 16.2 MP CMOS FX Digital SLR with Full 1080p HD ...,108.21,[https://images-na.ssl-images-amazon.com/images/I/31hZ6s...,4.586022,4.668424
234,B0049WJWJ0,Canon EOS 60D 18 MP CMOS Digital SLR Camera with EF-S 18...,"With the new EOS 60D DSLR, Canon gives the photo enthusi...",699.99,[https://images-na.ssl-images-amazon.com/images/I/41HJMv...,4.621295,4.666409
1558,B01CO2JPYS,Canon EOS Rebel T6 Digital SLR Camera Kit with EF-S 18-5...,Canon T6 18MP CMOS DIGIC 4+ Image Processor Wi-Fi & NFC ...,33.16,[https://images-na.ssl-images-amazon.com/images/I/41gk-i...,4.597404,4.663023


In [268]:
content_based_list = content_based_filtering(product_id)

In [269]:
content_based_list.head(5)

Unnamed: 0,product_id,name,description,price,image_url
37,B0007GIXQK,Fujifilm Finepix A350 5.2MP Digital Camera with 3x Optic...,Fujifilm Finepix A350 5.2MP Digital Camera with 3x Optic...,35.0,[https://images-na.ssl-images-amazon.com/images/I/51A6X7...
38,B0007GIXQU,Fujifilm Finepix F10 6.3MP Digital Camera with 3x Optica...,Fujifilm Finepix F10 6.3MP Digital Camera with 3x Optica...,62.0,[https://images-na.ssl-images-amazon.com/images/I/41qjdI...
70,B000M4HLEC,Sony Cybershot DSC-T20 8MP Digital Camera with 3x Optica...,Sony Cybershot DSC-T20 8MP Digital Camera with 3x Optica...,49.95,[https://images-na.ssl-images-amazon.com/images/I/41uupA...
76,B000NBOA0M,Olympus Stylus FE-250 8.0MP Digital Camera with 3x Optic...,"Olympus FE250 8.0MP Digital Camera 3x optical zoom 2.5"" ...",79.95,[https://images-na.ssl-images-amazon.com/images/I/51Bics...
71,B000M4KXEC,Sony Cybershot DSCW55 7.2MP Digital Camera with 3x Optic...,Sony Cybershot DSCW55 7.2MP Digital Camera with 3x Optic...,39.99,[https://images-na.ssl-images-amazon.com/images/I/51eu61...


In [270]:
items[['product_id', 'name', 'description']].head(10)

Unnamed: 0,product_id,name,description
964,B00QD6H9TK,"OfsPower OPS-1010B Bluetooth Earbuds, XS Earphones with ...",Excellent sounds quality Built with high-performance spe...
24,B0002A6YVC,Apple iBook Laptop 12&quot; M9623LL/A (1.2 GHz PowerPC G...,iBook is the lightest all-in-one consumer notebook avail...
9,B00005UKBD,Sony DSCF707 Cyber-shot 5MP Digital Still Camera w/ 5x O...,Accurately realistic photos are captured using this digi...
638,B00H8J0JZW,Asus VN248H-P 24-Inch Full-HD LED Monitor,Full HD displays for amazing high-definition visual clar...
1102,B00W2I3Z3W,Canon EOS Rebel T6i 24.2MP Digital SLR Camera Bundle wit...,Packing a high resolution 24.2-megapixel CMOS sensor and...
36,B0007CZ2UO,Olympus Camedia C5500 5.1MP Digital Camera with 5x Optic...,This Olympus Sport Zoom digital camera is as durable as ...
443,B00BBGMD16,"Chic Tangle Free Arts Earbud Headphones with Microphone,...",Say bye-bye to blase earbuds and hello to a new world of...
1233,B013KKFMOK,HP Pavilion 23-q120 23-Inch All-in-One Desktop (Intel Co...,HP Pavilion 23-q120 All-in-One
232,B0047DVW30,Apple MacBook Air MC504LL/A 13.3-Inch Laptop (OLD VERSION),"Introducing the new MacBook Air, the most mobile Mac in ..."
194,B0035JD0C8,Fujifilm FinePix JZ300 12 MP Digital Camera with 10x Wid...,"Fujifilm FinePix JZ300 - 12MP, Fujinon 10x Wide Optical ..."
