In [15]:
# %load product_view.py
# wrapper class of product features from postgresql
import pandas as pd
import sys
from sqlalchemy import create_engine
import numpy as np

# features
#price (fullprice in products table)
#isinstock( products table)
#avg review rating (products & reviews on asin)
#total_review_count
#review_helpful_rate
#total_sold_copies_current_month
# dynamic features
#total_sold_copies_during
#total_sold_copies_channel
class ProductView:
    def __init__(self, server = 'localhost', port = 5432, database = 'SQLBook'):
        dburl = 'postgresql://postgres:@' + server + ':' + str(port) + '/' + database
        self.pg_conn = create_engine(dburl)
        self.feature_map = {
            'fullprice' : self.get_product_orders,
            'isinstock' : self.get_product_orders,
            'total_review_count' : self.get_product_reviews,
            'avg_review_rating'  : self.get_product_reviews,
            #'review_helpful_rate': self.get_product_reviews,
            'total_order_count'  : self.get_product_orders,
            'total_copy_count'   : self.get_product_orders,
        }

    def _execute(self, cmd, **kwargs):
        if 'limit' in kwargs:
            cmd += "LIMIT {}".format(kwargs['limit'])
        if kwargs.get('debug', False):
            print(cmd)
        df = pd.read_sql_query(cmd, self.pg_conn)
        return df

    def get_product_orders(self, **kwargs):
        cmd = '''
SELECT p.productid, p.fullprice, p.isinstock, count(ol) as total_order_count, SUM(ol.numunits) as total_copy_count
FROM products p, orderlines ol
WHERE p.productid = ol.productid
GROUP BY p.productid
'''
        if kwargs['view']:
            cmd = "product_orders as (\n{}\n)".format(cmd)
            return cmd

        return self._execute(cmd, **kwargs)

    def get_product_reviews(self, **kwargs):
        cmd = '''
SELECT p.productid, avg(r.overall) as avg_review_rating, COUNT(1) as total_review_count
FROM products p, reviews r
WHERE p.asin = r.asin
GROUP BY p.productid
'''
        if kwargs['view']:
            cmd = "product_reviews as (\n{}\n)".format(cmd)
            return cmd
        return self._execute(cmd, **kwargs)

    def get_product_view(self, features=[], **kwargs):
        if len(features) == 0:
            features = self.feature_map.keys()
        else:
            # validate features against feature_map
            invalid_features = set(features) - set(self.feature_map.keys())
            if invalid_features:
                print('get_product_view not support features:', invalid_features)
                return None

        func_ptrs = set(self.feature_map[f] for f in features)
        cmd = 'WITH '
        cmd += ",\n".join([f(view=True) for f in func_ptrs])
        # HACK: hardcode view names
        cmd += '''
SELECT product_orders.productid,{}
FROM product_orders, product_reviews
WHERE product_orders.productid = product_reviews.productid
'''.format(', '.join(features))
        return self._execute(cmd, **kwargs)


        


In [17]:
pv = ProductView()
pv.get_product_view(limit=10)

Unnamed: 0,productid,isinstock,avg_review_rating,total_review_count,total_order_count,total_copy_count,fullprice
0,10501,Y,4.0,1,2,2,$340.00
1,10295,Y,4.0,1,26,26,$195.00
2,11523,Y,4.5,4,2,2,$195.00
3,11534,Y,5.0,1,7,7,$340.00
4,11890,Y,2.0,1,1,1,$540.00
5,12831,N,3.976562,256,125,133,$34.00
6,12310,Y,5.0,1,15,15,$195.00
7,11739,Y,3.0,1,3,3,$340.00
8,12752,Y,5.0,2,4,4,$337.00
9,12785,Y,4.666667,9,1,1,$355.00
