In [1]:
# to facilitate live changes when running the web app and jupyter simultaneously:
import sys
import importlib
if 'web.utils.utils' in sys.modules:
    print("reloading web.utils.utils")
    importlib.reload(utils)
else:
    from web.utils import utils

/Users/i857913/.ipython/products.db
/Users/i857913/Documents/mids/w209/w209/env/lib/python3.7/site-packages/IPython/extensions/products.db
/Users/i857913/Documents/mids/w209/w209/env/lib/python3.7/site-packages/products.db
products.db
DB_FILE products.db full path /Users/i857913/Documents/mids/w209/w209/products.db


In [19]:
df = utils.load_data()
df = utils.clean_data(df)

In [20]:
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [28]:
source = df.loc[(df['Est_Monthly_Sales'] <1500) & (df['Reviews'] <60)]


In [29]:
# category selection

cat_selection = alt.selection_single(empty = 'all', fields=['Category'], clear=alt.EventStream(type='dblclick'))
cat_color = alt.condition(cat_selection, 'Category:N', alt.ColorValue('whitesmoke'), legend=None)
cat_legend = alt.Chart(source).mark_circle(size=80).encode(
    y=alt.Y('Category:N', axis=alt.Axis(orient='right')),
    color=cat_color
).add_selection(
    cat_selection
)


In [30]:
# scatterplot global configuration - with single selection
width=400
height=375
circle_size=60
single_select = alt.selection_single(empty = 'all', fields=['ASIN'], clear=alt.EventStream(type='dblclick'))
color = alt.condition(single_select, 'Category:N', alt.ColorValue('transparent'), legend=None)

tooltip=['Product_Name','ASIN','Est_Monthly_Sales','Category','Reviews', 'LQS', 'Net','Price']


In [31]:
def make_base_chart(x, title):
  result = alt.Chart(source, title=title).mark_circle(size=circle_size).encode(
    x = x,
    y = alt.Y('Est_Monthly_Sales', scale=alt.Scale(domain=[0, 1600])),
    color=color,
    tooltip=tooltip
  ).transform_filter(
    cat_selection
  ).add_selection(
    single_select
  ).properties(
    height=height,
    width=width
  )
  return result


In [32]:
reviews_vs_sales = make_base_chart('Reviews', ["Reviews vs. Demand", "Number of Reviews indicates Competition"])
lqs_vs_sales = make_base_chart('LQS', ["Listing Quality Score vs. Demand", "Low LQS indicates Bad Marketing"])
net_vs_sales = make_base_chart('Net', ["Estimated Net vs Demand", "Indicates Return on Investment"])
rating_vs_sales = make_base_chart('Rating', ["Quality Rating vs Demand", "Low Quality indicates Opportunity"])

In [34]:
(reviews_vs_sales | lqs_vs_sales | cat_legend) & (net_vs_sales | rating_vs_sales)
#lqs_vs_sales | cat_legend

In [36]:
# guide lines
import pandas as pd

sales_y = alt.Chart(pd.DataFrame({'y': [200]})).mark_rule(color='red').encode(y='y')
reviews_x = alt.Chart(pd.DataFrame({'x': [50]})).mark_rule(color='red').encode(x='x')
lqs_x = alt.Chart(pd.DataFrame({'x': [5.5]})).mark_rule(color='red').encode(x='x')
net_x = alt.Chart(pd.DataFrame({'x': [15]})).mark_rule(color='red').encode(x='x')
rating_x = alt.Chart(pd.DataFrame({'x': [3.7]})).mark_rule(color='red').encode(x='x')

reviews_plot = reviews_vs_sales + sales_y + reviews_x
lqs_plot = lqs_vs_sales + sales_y + lqs_x
net_plot = net_vs_sales + sales_y + net_x
rating_plot = rating_vs_sales + sales_y +rating_x


In [37]:
(reviews_plot | lqs_plot | cat_legend) & (net_plot | rating_plot)


In [38]:
from altair import datum

# sliders galore
def make_slider_set(dimension, min, max, step):
  range_start = alt.binding_range(min=min, max=max, step=step, name=dimension + ' start:')
  range_end = alt.binding_range(min=min, max=max, step=step, name=dimension + ' end:')

  select_range_start = alt.selection_single(name=dimension + "_select_range_start", fields=[dimension], bind=range_start, init={dimension: min})
  select_range_end   = alt.selection_single(name=dimension + "_select_range_end"  , fields=[dimension], bind=range_end,   init={dimension: max})

  return {'start': select_range_start, 
          'end': select_range_end}

sliders = {}
sliders['Est_Monthly_Sales'] = make_slider_set('Est_Monthly_Sales', 0, 1600, 10)
sliders['LQS'] = make_slider_set('LQS', 0, 8, .5)
sliders['Reviews'] = make_slider_set('Reviews', 0, 60, 1)
sliders['Net'] = make_slider_set('Net', 0, 45, 1)
sliders['Rating'] = make_slider_set('Rating', 0, 5, .5)

def add_sliders(c):
  for dimension in ['Est_Monthly_Sales', 'LQS', 'Reviews', 'Net', 'Rating']:
    c = c.add_selection(sliders[dimension]['start'], sliders[dimension]['end']
      ).transform_filter(
        (datum[dimension] >= sliders[dimension]['start'][dimension]) & 
        (datum[dimension] <= sliders[dimension]['end'][dimension])
      )
  return c

reviews_vs_sales = add_sliders(make_base_chart('Reviews', ["Reviews vs. Demand", "Number of Reviews indicates Competition"]))
lqs_vs_sales = add_sliders(make_base_chart('LQS', ["Listing Quality Score vs. Demand", "Low LQS indicates Bad Marketing"]))
net_vs_sales = add_sliders(make_base_chart('Net', ["Estimated Net vs Demand", "Indicates Return on Investment"]))
rating_vs_sales = add_sliders(make_base_chart('Rating', ["Quality Rating vs Demand", "Low Quality indicates Opportunity"]))

In [39]:
sales_y = alt.Chart(pd.DataFrame({'y': [200]})).mark_rule(color='red').encode(y='y')
reviews_x = alt.Chart(pd.DataFrame({'x': [50]})).mark_rule(color='red').encode(x='x')
lqs_x = alt.Chart(pd.DataFrame({'x': [5.5]})).mark_rule(color='red').encode(x='x')
net_x = alt.Chart(pd.DataFrame({'x': [15]})).mark_rule(color='red').encode(x='x')
rating_x = alt.Chart(pd.DataFrame({'x': [3.7]})).mark_rule(color='red').encode(x='x')

reviews_plot = reviews_vs_sales + sales_y + reviews_x
lqs_plot = lqs_vs_sales + sales_y + lqs_x
net_plot = net_vs_sales + sales_y + net_x
rating_plot = rating_vs_sales + sales_y +rating_x


In [40]:
(reviews_plot | lqs_plot | cat_legend) & (net_plot | rating_plot)
