# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [40]:
import pandas as pd
import chart_studio as py
from chart_studio import plotly
import plotly.graph_objs as go
import cufflinks as cf
from ipywidgets import interact
import numpy as np

cf.go_offline()


In [2]:
data = pd.read_excel('../data/Online Retail.xlsx')

In [3]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [4]:
quantityRevenue = data[(data['Country'] != 'United Kingdom') & 
                       (data['InvoiceDate'] > '2011-04-01') &
                      (data['InvoiceDate'] < '2011-04-30')]

quantityRevenue = quantityRevenue[['Quantity','Revenue','Country']].groupby(['Country']).sum()
quantityRevenue

Unnamed: 0_level_0,Quantity,Revenue
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Australia,224,421.6
Austria,308,584.78
Belgium,1170,1788.48
Brazil,356,1143.6
Channel Islands,96,243.0
EIRE,4129,7270.5
Finland,810,1368.92
France,2265,3899.31
Germany,5702,10994.79
Greece,260,509.74


In [23]:
quantityRevenue.iplot(kind='bar', xTitle='Countries',
                  yTitle='Count', title='Quantity & Revenue')


## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [18]:
toFrance = data[(data['Country'] == 'France') &
               (data['InvoiceDate'] > '2011-01-01') &
               (data['InvoiceDate'] < '2011-05-31')]

In [21]:
toFrance = toFrance[['InvoiceDate','Quantity','Revenue']].groupby('InvoiceDate').sum()
toFrance

Unnamed: 0_level_0,Quantity,Revenue
InvoiceDate,Unnamed: 1_level_1,Unnamed: 2_level_1
2011-01-05 12:42:00,359,502.07
2011-01-05 14:48:00,369,763.11
2011-01-06 14:26:00,261,283.77
2011-01-06 16:12:00,177,425.25
2011-01-07 12:07:00,488,676.16
...,...,...
2011-05-25 12:44:00,180,390.96
2011-05-25 12:54:00,374,514.74
2011-05-26 14:53:00,86,196.50
2011-05-26 17:16:00,424,362.00


In [25]:
toFrance.iplot(kind='line', xTitle='Date',
                  yTitle='Count', title='Quantity & Revenue - France')


## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [38]:
pb = data[(data['Description'] == 'PARTY BUNTING')]
pb = pb[['Quantity','UnitPrice','Country']].groupby('Country', as_index=False).mean()
pb


Unnamed: 0,Country,Quantity,UnitPrice
0,Australia,33.125,4.7125
1,Austria,8.0,4.95
2,Belgium,4.0,4.95
3,Channel Islands,13.333333,4.95
4,Cyprus,2.333333,4.75
5,Denmark,12.0,4.95
6,EIRE,21.210526,4.739474
7,Finland,6.0,4.95
8,France,5.727273,4.922727
9,Germany,6.8,4.89


In [39]:
pb.iplot(x='Quantity', y='UnitPrice', categories='Country')



The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [45]:
data

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.20,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.60,13408,United Kingdom
...,...,...,...,...,...,...,...,...,...
396029,580691,2011-12-05 15:48:00,90214W,"LETTER ""W"" BLING KEY RING",12,0.29,3.48,13790,United Kingdom
396030,580691,2011-12-05 15:48:00,90214Z,"LETTER ""Z"" BLING KEY RING",12,0.29,3.48,13790,United Kingdom
396031,580865,2011-12-06 11:58:00,90089,PINK CRYSTAL SKULL PHONE CHARM,12,0.19,2.28,17914,United Kingdom
396032,580865,2011-12-06 11:58:00,90089,PINK CRYSTAL SKULL PHONE CHARM,12,0.19,2.28,17914,United Kingdom


In [120]:
histo = data[(data['Country'] == 'EIRE') | 
            (data['Country'] == 'Germany') |
            (data['Country'] == 'France') |
            (data['Country'] == 'Netherlands')]


In [121]:
histo = histo[['Quantity','InvoiceNo','Country']] 

In [122]:
histo['EIRE'] = histo[histo['Country'] == 'EIRE']['Quantity']
histo['Germany'] = histo[histo['Country'] == 'Germany']['Quantity']
histo['France'] = histo[histo['Country'] == 'France']['Quantity']
histo['Netherlands'] = histo[histo['Country'] == 'Netherlands']['Quantity']


In [128]:
histo[['EIRE','Germany','France','Netherlands']].iplot(kind="histogram", bins=10, theme="white", title="Distributions of quantity per invoice")

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [130]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']



In [131]:
data_bar = data[data['Description'].isin(product_list)]
data_bar = data[data['Country'].isin(country_list)]
data_bar


Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country,Year,Month,Day
179,539320,2010-12-16 19:16:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.80,14911,EIRE,2010,12,16
198,539722,2010-12-21 13:45:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,24,2.95,70.80,14911,EIRE,2010,12,21
304,541570,2011-01-19 12:34:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,256,2.55,652.80,14646,Netherlands,2011,1,19
322,541979,2011-01-24 14:54:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.70,14911,EIRE,2011,1,24
367,542777,2011-02-01 08:31:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.95,17.70,14911,EIRE,2011,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...
396001,581001,2011-12-07 08:07:00,23562,SET OF 6 RIBBONS PERFECTLY PRETTY,6,2.89,17.34,12583,France,2011,12,7
396005,581175,2011-12-07 15:16:00,23562,SET OF 6 RIBBONS PERFECTLY PRETTY,60,2.49,149.40,14646,Netherlands,2011,12,7
396007,581266,2011-12-08 11:25:00,23562,SET OF 6 RIBBONS PERFECTLY PRETTY,12,2.89,34.68,12621,Germany,2011,12,8
396021,581175,2011-12-07 15:16:00,23561,SET OF 6 RIBBONS PARTY,60,2.49,149.40,14646,Netherlands,2011,12,7


In [139]:
data_bar = data_bar.groupby(['Description','Country']).sum()
data_bar = data_bar.reset_index()
data_bar

Unnamed: 0,Description,Country,InvoiceNo,Quantity,UnitPrice,Revenue,CustomerID,Year,Month,Day
0,4 PURPLE FLOCK DINNER CANDLES,EIRE,579553,6,2.55,15.30,14156,2011,11,30
1,50'S CHRISTMAS GIFT BAG LARGE,EIRE,568095,48,1.25,60.00,14911,2011,9,23
2,50'S CHRISTMAS GIFT BAG LARGE,France,580126,12,1.25,15.00,12562,2011,12,1
3,50'S CHRISTMAS GIFT BAG LARGE,Germany,2851436,60,6.25,75.00,62912,10055,48,101
4,DOLLY GIRL BEAKER,EIRE,1139123,24,2.50,30.00,29822,4022,19,39
...,...,...,...,...,...,...,...,...,...,...
5896,ZINC T-LIGHT HOLDER STARS SMALL,Netherlands,2256556,444,2.99,321.00,56713,8044,31,81
5897,ZINC WILLIE WINKIE CANDLE STICK,EIRE,2231437,60,3.40,51.00,58889,8044,26,46
5898,ZINC WILLIE WINKIE CANDLE STICK,Germany,2233686,60,3.40,51.00,50312,8044,24,74
5899,ZINC WILLIE WINKIE CANDLE STICK,Netherlands,1710715,384,2.16,276.48,43938,6033,29,54


## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [129]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']


## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)


## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.