# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [26]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
import datetime
from ipywidgets import interact

cf.go_offline()

In [4]:
data = pd.read_excel('../data/Online Retail.xlsx')

In [24]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396034 entries, 0 to 396033
Data columns (total 9 columns):
InvoiceNo      396034 non-null int64
InvoiceDate    396034 non-null datetime64[ns]
StockCode      396034 non-null object
Description    396034 non-null object
Quantity       396034 non-null int64
UnitPrice      396034 non-null float64
Revenue        396034 non-null float64
CustomerID     396034 non-null int64
Country        396034 non-null object
dtypes: datetime64[ns](1), float64(2), int64(3), object(3)
memory usage: 27.2+ MB


In [33]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [77]:
data1 = data[(data['Country'] != 'United Kingdom') & (data['InvoiceDate'] >= '2011-04-01') & (data['InvoiceDate'] < '2011-05-01')]

In [79]:
group = data1.groupby('Country').sum()[['Quantity', 'Revenue']]

In [82]:
group.iplot(kind = 'bar', xTitle = 'Countries (excluding UK)', yTitle = 'Quantity/Revenue', title = 'Quantity and Revenue by country (exc UK) for month of April 2011')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [36]:
data2 = data[(data['Country'] == 'France') & (data['InvoiceDate'] > datetime.datetime(2011,1,1)) & (data['InvoiceDate'] < datetime.datetime(2011,5,31))]

In [85]:
data2.groupby(pd.DatetimeIndex(data2['InvoiceDate']).normalize()).sum()[['Quantity','Revenue']].iplot(kind = 'line', xTitle = 'Quantity/Revenue sold to France',yTitle = 'Quantity/Revenue', title = 'Quantity/Revenue sold to France between January 1st to May 31st 2011 (Daily)')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [86]:
data[data['Description'] == 'PARTY BUNTING'].groupby('Country').mean().reset_index().iplot(kind = 'scatter',x = 'Quantity', y = 'UnitPrice', xTitle = 'Average Quantity', yTitle = 'Average Unit Price', categories = 'Country', title = 'Avg Quantity and Avg Unit price by country')

## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [87]:
histo_countries = data[(data['Country'] == 'EIRE') | (data['Country'] == 'Germany') | (data['Country'] == 'France') | (data['Country'] == 'Netherlands')].pivot_table(values = 'Quantity', index = 'InvoiceNo', columns = 'Country', aggfunc = 'mean')

In [88]:
histo_countries.iplot(kind='hist', histnorm='percent', xTitle='Distribution by country', subplots = True,
                      yTitle='Percent', title='Distribution of quantity per invoice for (EIRE, Germany, France and Netherlands)')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [69]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [91]:
data5 = data[(data['Description'].isin(product_list)) & (data['Country'].isin(country_list))].pivot_table(values = 'Revenue', columns = 'Country', index = 'Description', aggfunc = 'mean')

In [94]:
data5.iplot(kind = 'bar', xTitle = 'Products', yTitle = 'Revenue', title = 'Revenue by country listed for each product listed')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [95]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.