In [68]:
import pandas as pd
import pickle

from sklearn.preprocessing import MinMaxScaler

In [69]:
df_products = pd.read_json('../../../data/interim/final/products.json.gz', orient="records", compression="gzip")
df_reviews = pd.read_json('../../../data/interim/final/reviews.json.gz', orient="records", compression="gzip")
df_processed_reviews = pd.read_json('../../../data/processed/reviews.json.gz', orient="records", compression="gzip")

In [70]:
df_reviews['processed_review_text'] = df_processed_reviews['cleaned_review']
df_reviews.head()

Unnamed: 0,user_id,product_id,ratings,review_text,summary,created_at,processed_review_text
0,A0203183BAH3TR08FZGB,B0043T7FHK,5,I got this to run as a dual monitor. This is ...,This is my second time purchasing this monitor...,2015-06-30,get run dual monitor second time purchase moni...
1,A0261431Y0V4MHWY4B7W,B00AFH2E8E,4,"Not as good as I had hoped, music is very low,...",Bluetooth headset,2014-08-03,not good hop music low phone volume pretty goo...
2,A034116598G557EYZ9BC,B0013FRNKG,5,Appreciate if product\nNeed to buy one more if...,great value,2012-11-28,appreciate product need buy one promotion need...
3,A0404374X0HL5T332XSN,B00MNOPS1C,3,You get what you pay for,Three Stars,2016-02-02,get pay three star
4,A0431622H67YR5IPJRN,B0058UUR6E,5,Arrived in 2 days. working great. Recommend to...,working great. Recommend to others,2015-03-11,arrive day work great recommend others work gr...


In [71]:
model = pickle.load(open('../../../models/sentiment_analysis/hyperparameter_tuning/logistic_regression_with_tfidf_vectorizer.pkl', 'rb'))

In [72]:
def predict_mean_sentiment(product_id:str):
    review_list = df_reviews[df_reviews['product_id'] == product_id]['processed_review_text'].tolist()
    if(len(review_list) == 0): return 0
    sentiment = predict_sentiment(review_list)
    if(sentiment is None):return 0
    
    return sentiment.mean()

def predict_sentiment(features:list):
    if (len(features) == 0): return
    return model.predict(features)

def recommend(df_products, n = 10):
    # Calculate the mean sentiment for each product
    df_products['sen'] = df_products['product_id'].apply(predict_mean_sentiment)
    
    # Scale the sentiment score to fit the rating [formula: (x - xmin) / (xmax - xmin)] for each product
    scaler = MinMaxScaler(feature_range=(1, 5))
    scaler.fit(df_products[['sen']])
    df_products['scaled_sen'] = scaler.transform(df_products[['sen']])
    
    # Sorting
    df_products = df_products.sort_values(by=['scaled_sen'], ascending = False)
    
    if n < len(df_products): 
        df_products = df_products[:n]
    
    return df_products

In [73]:
recommend(df_products, 10)

Unnamed: 0,product_id,name,description,price,image_url,sen,scaled_sen
0,B00001W0DG,Sony MDR-V500DJ Monitor Series Headphones with...,Revel in high-quality audio with the MDR-V500D...,6.61,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1646,B01EPZIJRQ,Acer ChromeBook 14 CP5-471-35T4 Black (NX.GE8A...,The Acer Chromebook 14 has everything you'd ex...,86.5,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1045,B00TR0576G,Samsung 23.6-Inch Screen LED-Lit Monitor (S24E...,"Samsung 23.6"" LED Monitor - Glossy White with ...",499.0,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1047,B00TRO5Q36,HP Compaq Elite 8200 Small Form Factor PC SP67...,Returned from lease in very good used conditio...,108.88,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1500,B01AWGXN00,Samsung NP900X5L-K02US Notebook 9 15&quot; Lap...,Samsungs Notebook 9 15 brings together a sleek...,999.0,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1057,B00U67WPDM,HP Pavilion 23cw 23-in IPS LED Backlit Monitor...,"Play games, watch movies and more with this HP...",8.16,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1496,B01AW10G14,Olympus PEN-F (Body-Only) (Black),New 20 Megapixel Live MOS sensor with 50Megapi...,66.76,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1061,B00UJC03A6,ChiTronic Newest Magic Smart Ring Universal Fo...,"<b>Note<b><br> Color:black,white. #7:inner per...",1.96,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1067,B00UPGWC5K,Acer Chromebook 15 C910-3916 NX.EF3AA.010 16-I...,The Acer Chromebook C771-C4TM is perfect for y...,373.99,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
1493,B01AT394A4,"Kingdel Windows 10 Industrial PC, Fanless Mini...","<b> Kindly be Advised: </b> <br> <br> 1, The I...",375.0,[https://images-na.ssl-images-amazon.com/image...,1.0,5.0
