In [1]:
#imports
import pandas as pd
import numpy as np
import re
import time
import sys
pd.set_option('display.max_colwidth', None)
import pickle
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity
from scipy import sparse
from matplotlib import pyplot as plt
import utils as ut

I'm going to attempt to modify the recommender dictionaries I already made and remove 50% of the recommended products (from 100 to 50). I think this is going to save large amount of memory and translate to faster load times on my app - I also suspect it will be faster than re-creating them all over again.

In [2]:
#load in new recommender dictionaries (not dataframes)
with open('./compressed/new_vg_rec.pkl', 'rb') as f:
    vg_rec = pickle.load(f)

with open('./pickles/new_movies_rec.pkl', 'rb') as f:
    movies_rec = pickle.load(f)

with open('./compressed/new_books_rec.pkl', 'rb') as f:
    books_rec = pickle.load(f)

In [12]:
len(list(vg_rec['PlayStation 4 500GB Console [Old Model]'])[:50])

50

In [14]:
def shorten_recs(recommender):
    new_rec = {}
    for key, value in recommender.items(): #open dictionary
        new_rec[key] = list(recommender[key])[:50] #new dictionary where values list is a portion of previous size
    return new_rec

In [15]:
new_vg_rec = shorten_recs(vg_rec)

In [17]:
len(new_vg_rec)

15938

In [18]:
len(list(new_vg_rec['PlayStation 4 500GB Console [Old Model]']))

50

Wow... that ran INSTANTLY. Time to run it over the other two dictionaries.

In [19]:
new_movies_rec = shorten_recs(movies_rec)

In [20]:
new_books_rec = shorten_recs(books_rec)

Time to pickle these and see how much smaller they are than the originals.

In [21]:
with open('./compressed/vg_rec_small.pkl', 'wb') as f:
    pickle.dump(new_vg_rec, f)
    
with open ('./compressed/movies_rec_small.pkl', 'wb') as f:
    pickle.dump(new_movies_rec, f)
    
with open('./compressed/books_rec_small.pkl', 'wb') as f:
    pickle.dump(new_books_rec, f)

Okay, let's test these out and make sure the lookup function still works

In [23]:
vg_look = pd.read_pickle('./pickles/vg_look_small.pkl')
movies_look = pd.read_pickle('./pickles/movies_look_small.pkl')
books_look = pd.read_pickle('./pickles/books_look_small.pkl')

In [35]:
#NEW - load in NLP dictionaries from review text to display top 5 words for product

with open('./compressed/vg_text_dict.pkl', 'rb') as f:
    vg_text = pickle.load(f)

with open('./compressed/movies_text_dict.pkl', 'rb') as f:
    movies_text = pickle.load(f)

with open('./compressed/books_text_dict.pkl', 'rb') as f:
    books_text = pickle.load(f)

In [41]:
def make_recs_new(query, wout=''):  #need to set lookup and recommender global variables prior to calling   
    try:
        query=query.lower() #lowercase entry, lowercase titles (only during search, below)
        titles = list(lookup[lookup['product_title'].map(lambda x: x.lower()).str.contains(query)]['product_title'])
        q = titles[0] #this is the item to search for
        
        message = f'''
        **Most Popular Item Containing Your Search Term(s):** {q}  
        There are {round(lookup[lookup['product_title']==q]['tot_prod_reviews'].mean())} total reviews for this item and it has an average star rating of {round(lookup[lookup['product_title']==q]['avg_prod_stars'].mean(), 2)}
        '''      

        if wout == '':           
            top10_prods = []
            num_prod_revs = []
            avg_prod_stars = []
            top_5_words = []
            for key in list(recommender[q])[1:11]:
                top10_prods.append(key)
                num_prod_revs.append(round(lookup[lookup['product_title']==key]['tot_prod_reviews'].mean()))
                avg_prod_stars.append(round(lookup[lookup['product_title']==key]['avg_prod_stars'].mean(), 2))
                top_5_words.append(text[key])
            final_output_df = pd.DataFrame(data = {
                'Recommended Items':top10_prods,
                'Total Reviews for Product':num_prod_revs,
                'Avg Product Star Rating(1-5)':avg_prod_stars,
                'Most Common Words in Review Text':top_5_words
            }, index=range(1,11))
            return message, final_output_df            
            
        else:
            
            wout = wout.lower() #lowercase
            filtered_query = [] #make empty list
            for key in list(recommender[q]):
                if wout not in key.lower(): #check if avoided keyword is in results
                    filtered_query.append(key)
            top10_prods = []
            num_prod_revs = []
            avg_prod_stars = []
            top_5_words = []
            for item in filtered_query[1:11]:
                top10_prods.append(item)
                num_prod_revs.append(round(lookup[lookup['product_title']==item]['tot_prod_reviews'].mean()))
                avg_prod_stars.append(round(lookup[lookup['product_title']==item]['avg_prod_stars'].mean(), 2))
                top_5_words.append(text[item])
            final_output_df = pd.DataFrame(data = {
                'Recommended Items':top10_prods,
                'Total Reviews for Product':num_prod_revs,
                'Avg Product Star Rating(1-5)':avg_prod_stars,
                'Most Common Words in Review Text':top_5_words
                }, index=range(1,11))
            return message, final_output_df
        
    except:
        return f'Sorry, "{query}" does not appear to be in the product database'

In [36]:
lookup, recommender, text = vg_look, new_vg_rec, vg_text

In [42]:
make_recs_new('harry potter', 'lego')

('\n        **Most Popular Item Containing Your Search Term(s):** LEGO Harry Potter: Years 1-4  \n        There are 1012 total reviews for this item and it has an average star rating of 4.17\n        ',
                                                  Recommended Items  \
 1                        Harry Potter and the Order of the Phoenix   
 2                                     Alien 3 - Nintendo Super NES   
 3   HORI 3DS Protector and Pouch Set (Super Mario 3D Land version)   
 4                                                       Duck Tales   
 5                   Marvel Super Hero Squad: The Infinity Gauntlet   
 6                      Harry Potter and the Deathly Hallows Part 1   
 7                         Disney's A Christmas Carol - Nintendo DS   
 8                                                 Cake Mania 3 NDS   
 9                                            Pac Pix - Nintendo DS   
 10                                  Romancing SaGa - PlayStation 2   
 
     Total Rev

Great! It also looks like the new NLP feature is working too! Time to update the app.