# Feature: Use built-in Cosmos Python SDK

In [1]:
import azure.cosmos
from azure.cosmos.partition_key import PartitionKey

database = cosmos_client.create_database_if_not_exists('DemoDatabase')
print('Database RetailDemo created')

container = database.create_container_if_not_exists(id='WebsiteData', partition_key=PartitionKey(path='/CartID'))
print('Container WebsiteData created')

Database RetailDemo created
Container WebsiteData created


# Feature: Upload JSON file into a container

In [2]:
%%upload --databaseName RetailDemo --containerName WebsiteData --url https://cosmosnotebooksdata.blob.core.windows.net/notebookdata/websiteData.json

# Feature: Query data into Pandas DataFrame

In [2]:
%%sql --database RetailDemo --container WebsiteData --output df_cosmos
SELECT c.Action, c.Price as ItemRevenue, c.Country, c.Item FROM c

In [3]:
# See a sample of the result
df_cosmos.head(10)

Unnamed: 0,Action,ItemRevenue,Country,Item
0,Viewed,19.99,Haiti,Flannel Shirt
1,Added,19.99,Haiti,Flannel Shirt
2,Viewed,99.99,Puerto Rico,Puffy Jacket
3,Added,99.99,Puerto Rico,Puffy Jacket
4,Viewed,85.0,Romania,Jeans
5,Viewed,350.0,India,Cosmos T-shirt
6,Viewed,8.0,South Africa,Earring
7,Viewed,79.99,Tunisia,Walking Shoes
8,Added,79.99,Tunisia,Walking Shoes
9,Added,30.0,Aruba,Green Sweater


### Visualize DataFrame

In [4]:
import sys
!{sys.executable} -m pip install bokeh==1.4.0 --user



In [6]:
from bokeh.io import show, output_notebook

from bokeh.plotting import figure
from bokeh.palettes import Spectral3
from bokeh.transform import factor_cmap
from bokeh.models import ColumnDataSource, FactorRange


# Get the top 10 items as an array
top_10_items = df_cosmos[df_cosmos['Action']=='Purchased'].groupby('Item').size().sort_values(ascending=False)[:10].index.values.tolist()

# Filter our data to only these 10 items
df_top10 = df_cosmos[df_cosmos['Item'].isin(top_10_items)]

# Group by Item and Action, sorting by event count
df_top10_sorted = df_top10.groupby(['Item', 'Action']).count().rename(columns={'Country':'ResultCount'}, inplace=False).reset_index().sort_values(['Item', 'ResultCount'], ascending = False).set_index(['Item', 'Action'])

# Get sorted X-axis values - this way, we can display the funnel of view -> add -> purchase
x_axis_values = df_top10_sorted.index.values.tolist()

group = df_top10_sorted.groupby(['Item', 'Action'])

# Specifiy colors for X axis
index_cmap = factor_cmap('Item_Action', palette=Spectral3, factors=sorted(df_top10.Action.unique()), start=1, end=2)

# Create the plot

p = figure(plot_width=1200, plot_height=500, title="Conversion rate of items from View -> Add to cart -> Purchase", x_range=FactorRange(*x_axis_values), toolbar_location=None, tooltips=[("Number of events", "@ResultCount_max"), ("Item, Action", "@Item_Action")])

p.vbar(x='Item_Action', top='ItemRevenue_max', width=1, source=group,
       line_color="white", fill_color=index_cmap, )

#Configure how the plot looks
p.y_range.start = 0
p.x_range.range_padding = 0.05
p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 1.2
p.outline_line_color = "black"
p.xaxis.axis_label = "Item"
p.yaxis.axis_label = "Count"

#Display figure inline in Jupyter Notebook.
output_notebook()

#Display figure.
show(p)
