In [None]:
from web.utils import utils
df = utils.load_data()
df = utils.clean_data(df)

In [None]:
import pandas as pd
import sqlite3
import altair as alt

In [None]:
## to drop off outliers, choose data that has 'Est_Monthly_Sales' <1500 & 'Reviews' <200)
source = df.loc[(df['Est_Monthly_Sales'] <1500) & (df['Reviews'] <60)]


In [None]:
# category selection
cat_selection = alt.selection_single(empty = 'all', fields=['Category'], clear=alt.EventStream(type='dblclick'))
cat_color = alt.condition(cat_selection, 'Category:N', alt.ColorValue('whitesmoke'), legend=None)
cat_legend = alt.Chart(source).mark_circle(size=80).encode(
    y=alt.Y('Category:N', axis=alt.Axis(orient='right')),
    color=cat_color
).add_selection(
    cat_selection
)

# scatterplot global configuration - with single selection
width=300
height=300
circle_size=60
single_select = alt.selection_single(empty = 'all', fields=['ASIN'], clear=alt.EventStream(type='dblclick'))
color = alt.condition(single_select, 'Category:N', alt.ColorValue('whitesmoke'), legend=None)

tooltip=['Product_Name','ASIN','Est_Monthly_Sales','Category','Reviews', 'LQS', 'Net','Price']


# individual plots

## reviews_vs_sales - High Demand and Low Competition
reviews_vs_sales = alt.Chart(source,title=["Reviews vs. Demand", "Number of Reviews indicates Competition"]).mark_circle(size=circle_size).encode(
    x = 'Reviews',
    y = alt.Y('Est_Monthly_Sales', scale=alt.Scale(domain=[0, 1600])),
    color=color,
    tooltip=tooltip
).transform_filter(
    cat_selection
).add_selection(
    single_select
).properties(
    height=height,
    width=width
)

## lqs_vs_sales - High Demand and Bad Marketing
lqs_vs_sales = alt.Chart(source,title=["Listing Quality Score vs. Demand", "Low LQS indicates Bad Marketing"]).mark_circle(
    size=circle_size).encode(
    x='LQS',
    y = alt.Y('Est_Monthly_Sales', scale=alt.Scale(domain=[0, 1600])),
    color=color,
    tooltip=tooltip
).add_selection(
    single_select
).transform_filter(
    cat_selection
).properties(
    height=height,
    width=width)

net_vs_sales = alt.Chart(source,title=["Estimated Net vs Demand", "Indicates Return on Investment"]).mark_circle(
    size=circle_size).encode(
    x='Net',
    y = alt.Y('Est_Monthly_Sales', scale=alt.Scale(domain=[0, 1600])),
    color=color,
    tooltip=tooltip
).add_selection(
    single_select
).transform_filter(
    cat_selection
).properties(
    height=height,
    width=width)

rating_vs_sales = alt.Chart(source,title=["Quality Rating vs Demand", "Low Quality indicates Opportunity"]).mark_circle(
    size=circle_size).encode(
    x='Rating',
    y = alt.Y('Est_Monthly_Sales', scale=alt.Scale(domain=[0, 1600])),
    color=color,
    tooltip=tooltip
).add_selection(
    single_select
).transform_filter(
    cat_selection
).properties(
    height=height,
    width=width)

# guide lines

sales_y = alt.Chart(pd.DataFrame({'y': [200]})).mark_rule(color='red').encode(y='y')
reviews_x = alt.Chart(pd.DataFrame({'x': [50]})).mark_rule(color='red').encode(x='x')
lqs_x = alt.Chart(pd.DataFrame({'x': [5.5]})).mark_rule(color='red').encode(x='x')
net_x = alt.Chart(pd.DataFrame({'x': [15]})).mark_rule(color='red').encode(x='x')
rating_x = alt.Chart(pd.DataFrame({'x': [3.7]})).mark_rule(color='red').encode(x='x')

reviews_plot = reviews_vs_sales + sales_y + reviews_x
lqs_plot = lqs_vs_sales + sales_y + lqs_x
net_plot = net_vs_sales + sales_y + net_x
rating_plot = rating_vs_sales + sales_y +rating_x



In [None]:
( reviews_plot | lqs_plot | cat_legend ) & ( net_plot | rating_plot)