In [2]:
#imports
import pandas as pd
import numpy as np
import re
import time
import sys
pd.set_option('display.max_colwidth', None)
import pickle
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity
from scipy import sparse
from matplotlib import pyplot as plt
import utils as ut

In [3]:
#load in new recommender dictionaries (not dataframes)
with open('./pickles/new_vg_rec.pkl', 'rb') as f:
    vg_rec = pickle.load(f)

with open('./pickles/new_movies_rec.pkl', 'rb') as f:
    movies_rec = pickle.load(f)

with open('./pickles/new_books_rec.pkl', 'rb') as f:
    books_rec = pickle.load(f)

In [4]:
#load in lookup dataframes
vg_look = pd.read_pickle('./pickles/vg_look_small.pkl')
movies_look = pd.read_pickle('./pickles/movies_look_small.pkl')
books_look = pd.read_pickle('./pickles/books_look_small.pkl')

In [25]:
#test

lookup = books_look
recommender = books_rec

def make_recs_new(query, wout=''):  #need to set lookup and recommender global variables prior to calling   
    try:
        query=query.lower() #lowercase entry, lowercase titles (only during search, below)
        titles = list(lookup[lookup['product_title'].map(lambda x: x.lower()).str.contains(query)]['product_title'])
        q = titles[0] #this is the item to search for

        if wout == '':           
            top10_prods = []
            num_prod_revs = []
            avg_prod_stars = []
            for key in list(recommender[q].keys())[1:11]:
                top10_prods.append(key)
                num_prod_revs.append(round(lookup[lookup['product_title']==key]['tot_prod_reviews'].mean()))
                avg_prod_stars.append(round(lookup[lookup['product_title']==key]['avg_prod_stars'].mean(), 2))
            final_output_df = pd.DataFrame(data = {
                'Recommended Items':top10_prods,
                'Total Reviews for Product':num_prod_revs,
                'Avg Product Star Rating(1-5)':avg_prod_stars
            }, index=range(1,11))
            return final_output_df            
            
        else:
            
            wout = wout.lower() #lowercase
            filtered_query = [] #make empty list
            for key in list(recommender[q].keys()):
                if wout not in key.lower(): #check if avoided keyword is in results
                    filtered_query.append(key)
            top10_prods = []
            num_prod_revs = []
            avg_prod_stars = []
            for item in filtered_query[1:11]:
                top10_prods.append(item)
                num_prod_revs.append(round(lookup[lookup['product_title']==item]['tot_prod_reviews'].mean()))
                avg_prod_stars.append(round(lookup[lookup['product_title']==item]['avg_prod_stars'].mean(), 2))
            final_output_df = pd.DataFrame(data = {
                'Recommended Items':top10_prods,
                'Total Reviews for Product':num_prod_revs,
                'Avg Product Star Rating(1-5)':avg_prod_stars
                }, index=range(1,11))
            return final_output_df
        
    except:
        return f'Sorry, "{query}" does not appear to be in the product database'

In [26]:
make_recs_new('batman')

Unnamed: 0,Recommended Items,Total Reviews for Product,Avg Product Star Rating(1-5)
1,Watchmen,203,4.68
2,Batman: Year One,39,4.72
3,Batman: The Killing Joke,80,4.51
4,Batman: Arkham Asylum,33,3.82
5,Batman: Year One (Batman (DC Comics Hardcover)),22,4.59
6,V for Vendetta,92,4.54
7,Batman: The Long Halloween,56,4.38
8,"Daredevil Visionaries - Frank Miller, Vol. 1",18,4.28
9,Batman: Dark Victory,14,4.71
10,Marvels,31,4.48


In [41]:
#https://docs.python.org/3/library/bz2.html
#https://medium.com/better-programming/load-fast-load-big-with-compressed-pickles-5f311584507e#:~:text=Compressed%20Pickles%20If%20you%20have%20been%20working%20in,bytes.%20It%20cuts%20loading%20time%20to%20a%20fraction.
import bz2

In [42]:
import _pickle as cPickle

In [43]:
#save compressed pickle
with bz2.BZ2File('./compressed/movies_rec_c.pbz2', 'w') as f:
    cPickle.dump(movies_rec, f)

In [44]:
#test opening compressed pickle, assert = to original
test_movies = bz2.BZ2File('./compressed/movies_rec_c.pbz2', 'rb')
test_movies = cPickle.load(test_movies)

print(test_movies == movies_rec)

True


Okay - that worked! Time to save compressed forms of the other two dataframes in the same format (for good measure).

In [45]:
with bz2.BZ2File('./compressed/vg_rec_c.pbz2', 'w') as f:
    cPickle.dump(vg_rec, f)
    
with bz2.BZ2File('./compressed/books_rec_c.pbz2', 'w') as f:
    cPickle.dump(books_rec, f)

In [48]:
#adding duplicate pickled lookup dataframes to the "compressed" folder
books_look.to_pickle('./compressed/books_look_c.pkl')
movies_look.to_pickle('./compressed/movies_look_c.pkl')
vg_look.to_pickle('./compressed/vg_look_c.pkl')