In [94]:
# %load product_view.py
# wrapper class of product features from postgresql
import pandas as pd
import sys
from sqlalchemy import create_engine
import numpy as np

# features
#price (fullprice in products table)
#isinstock( products table)
#avg review rating (products & reviews on asin)
#total_review_count
#review_helpful_rate
#total_copy_current_month
# dynamic features
#total_copy_during
#total_copy_channel
#ProductView(productid, name, nodeid, asin, total_review_count, avg_review_rating, total_order_count)
class ProductView:
    def __init__(self, server = 'localhost', port = 5432, database = 'SQLBook'):
        dburl = 'postgresql://postgres:@' + server + ':' + str(port) + '/' + database
        self.pg_conn = create_engine(dburl)
        self.feature_map = {
            'nodeid' : ('product_self', self.get_product),
            'fullprice' : ('product_self', self.get_product),
            'isinstock' : ('product_self', self.get_product),
            'total_review_count' : ('product_reviews', self.get_product_reviews),
            'avg_review_rating'  : ('product_reviews', self.get_product_reviews),
            #'review_helpful_rate': self.get_product_reviews),
            'total_order_count'  : ('product_orders', self.get_product_orders),
            'total_copy_count'   : ('product_orders', self.get_product_orders),
        }

    def _execute(self, viewname, cmd, **kwargs):
        dbcmd = kwargs.get('prefix', '')
        if 'view' in kwargs:
            dbcmd += "{} as ({})".format(viewname, cmd)
            return dbcmd
        dbcmd += cmd
        if 'limit' in kwargs:
            dbcmd += "LIMIT {}".format(kwargs['limit'])
        if kwargs.get('debug', False):
            print(dbcmd)
        df = pd.read_sql_query(dbcmd, self.pg_conn)
        return df

    def get_product(self, **kwargs):
        cmd = '''
SELECT p.productid, p.category as nodeid, p.fullprice, p.isinstock
FROM products p
'''
        return self._execute('product_self', cmd, **kwargs)

    def get_product_orders(self, **kwargs):
        cmd = '''
SELECT p.productid, count(ol) as total_order_count, SUM(ol.numunits) as total_copy_count
FROM products p, orderlines ol
WHERE p.productid = ol.productid
GROUP BY p.productid
'''
        return self._execute('product_orders', cmd, **kwargs)

    def get_product_reviews(self, **kwargs):
        cmd = '''
SELECT p.productid, avg(r.overall) as avg_review_rating, COUNT(1) as total_review_count
FROM products p, reviews r
WHERE p.asin = r.asin
GROUP BY p.productid
'''
        return self._execute('product_reviews', cmd, **kwargs)

    def get_product_view(self, features=[], **kwargs):
        if len(features) == 0:
            features = self.feature_map.keys()
        else:
            # validate features against feature_map
            invalid_features = set(features) - set(self.feature_map.keys())
            if invalid_features:
                print('get_product_view not support features:', invalid_features)
                return None

        func_ptrs = set(self.feature_map[f] for f in features)
        subviews = [f[0] for f in func_ptrs]
        view_contents = [f[1](view=True) for f in func_ptrs]
        join_keys = ["{}.productid = product_self.productid".format(sv) for sv in (set(subviews) - set('product_self'))]
        feature_list = [self.feature_map[f][0]+'.'+f for f in features]
        kwargs['prefix'] = '' if 'view' in kwargs else 'WITH '
        kwargs['prefix'] += ",\n".join(view_contents)
        kwargs['prefix'] += ",\n"
        cmd = '''
SELECT {}
FROM {}
WHERE {}
'''.format(', '.join(feature_list), ', '.join(subviews), ' AND '.join(join_keys))
        
        if 'where' in kwargs:
            cmd += "AND {}\n".format(kwargs['where'])

        return self._execute('product_view', cmd, **kwargs)


        


In [95]:
pv = ProductView()
aa = pv.get_product_view(limit=10, debug=True)

WITH product_orders as (
SELECT p.productid, count(ol) as total_order_count, SUM(ol.numunits) as total_copy_count
FROM products p, orderlines ol
WHERE p.productid = ol.productid
GROUP BY p.productid
),
product_self as (
SELECT p.productid, p.category as nodeid, p.fullprice, p.isinstock
FROM products p
),
product_reviews as (
SELECT p.productid, avg(r.overall) as avg_review_rating, COUNT(1) as total_review_count
FROM products p, reviews r
WHERE p.asin = r.asin
GROUP BY p.productid
),

SELECT product_reviews.total_review_count, product_self.nodeid, product_self.isinstock, product_orders.total_order_count, product_orders.total_copy_count, product_self.fullprice, product_reviews.avg_review_rating
FROM product_orders, product_self, product_reviews
WHERE product_orders.productid = product_self.productid AND product_reviews.productid = product_self.productid AND product_self.productid = product_self.productid
LIMIT 10


ProgrammingError: (psycopg2.ProgrammingError) syntax error at or near "SELECT"
LINE 18: SELECT product_reviews.total_review_count, product_self.node...
         ^
 [SQL: 'WITH product_orders as (\nSELECT p.productid, count(ol) as total_order_count, SUM(ol.numunits) as total_copy_count\nFROM products p, orderlines ol\nWHERE p.productid = ol.productid\nGROUP BY p.productid\n),\nproduct_self as (\nSELECT p.productid, p.category as nodeid, p.fullprice, p.isinstock\nFROM products p\n),\nproduct_reviews as (\nSELECT p.productid, avg(r.overall) as avg_review_rating, COUNT(1) as total_review_count\nFROM products p, reviews r\nWHERE p.asin = r.asin\nGROUP BY p.productid\n),\n\nSELECT product_reviews.total_review_count, product_self.nodeid, product_self.isinstock, product_orders.total_order_count, product_orders.total_copy_count, product_self.fullprice, product_reviews.avg_review_rating\nFROM product_orders, product_self, product_reviews\nWHERE product_orders.productid = product_self.productid AND product_reviews.productid = product_self.productid AND product_self.productid = product_self.productid\nLIMIT 10']

In [96]:
print(aa)

product_orders as (
SELECT p.productid, count(ol) as total_order_count, SUM(ol.numunits) as total_copy_count
FROM products p, orderlines ol
WHERE p.productid = ol.productid
GROUP BY p.productid
),
product_reviews as (
SELECT p.productid, avg(r.overall) as avg_review_rating, COUNT(1) as total_review_count
FROM products p, reviews r
WHERE p.asin = r.asin
GROUP BY p.productid
),
product_self as (
SELECT p.productid, p.category as nodeid, p.fullprice, p.isinstock
FROM products p
),
product_view as (
SELECT product_reviews.total_review_count, product_self.nodeid, product_self.isinstock, product_orders.total_order_count, product_orders.total_copy_count, product_self.fullprice, product_reviews.avg_review_rating
FROM product_orders, product_reviews, product_self
WHERE product_orders.productid = product_self.productid AND product_reviews.productid = product_self.productid AND product_self.productid = product_self.productid
)
