# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [8]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
from ipywidgets import interact

cf.go_offline()

In [2]:
# Import the dataset
data = pd.read_excel('../data/Online Retail.xlsx')

In [3]:
# Check the result
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [4]:
# Create masks to filter the dataframe
mask_month = data.InvoiceDate.apply(lambda x : x.month) == 4
mask_year = data.InvoiceDate.apply(lambda x : x.year) == 2011

# Filter the dataframe and create the interactive chart
data[mask_month & mask_year].groupby(by='Country').sum().loc['Australia' : 'USA', 
                                                             ['Quantity', 'Revenue']].iplot(kind='bar')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [5]:
# Create masks to filter the dataframe
mask_begin = data.InvoiceDate >= '2011-01-01'
mask_end = data.InvoiceDate <= '2011-05-31'
mask_country = data.Country == 'France'

# Filter the dataframe and create the interactive chart
data.loc[mask_begin & mask_end & mask_country, ['Quantity', 'Revenue']].iplot()

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [6]:
# Filter the dataframe and create the interactive chart
data[data.Description == 'PARTY BUNTING'].groupby(by=['Country']).mean().reset_index().iplot(x='Quantity', y='UnitPrice', kind='scatter', categories='Country')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [15]:
data.loc[data.Country == 'EIRE', 'InvoiceNo'].iplot(kind='hist', bins=50, title='Distribution of quantity per invoice - EIRE')
data.loc[data.Country == 'Germany', 'InvoiceNo'].iplot(kind='hist',bins=50, title='Distribution of quantity per invoice - Germany')
data.loc[data.Country == 'France', 'InvoiceNo'].iplot(kind='hist',bins=50, title='Distribution of quantity per invoice - France')
data.loc[data.Country == 'Netherlands', 'InvoiceNo'].iplot(kind='hist',bins=50, title='Distribution of quantity per invoice - Netherlands')

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [16]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [60]:
# Create masks to filter the dataframe
mask_product = data.Description.isin(product_list)
mask_country = data.Country.isin(country_list)

data.loc[mask_product & mask_country, :].groupby(by=['Country', 'Description']).sum().Revenue.reset_index().iplot(kind='bar', x=['Country', 'Description'], y='Revenue')

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [61]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [79]:
@interact(Year=list(data['Year'].unique()),
          Month=list(data['Month'].unique()),
          Day=list(list(data['Day'].unique())
         )
          
def line_chart(Year, Month, Day):
          
    global uk
          
    mask_year = uk.Year == Year
    mask_month = uk.Month == Month
    mask_day = uk.Day == Day
          
    data = uk.loc[mask_year & mask_month & mask_day, :]
          
    data.iplot(kind='line', x='InvoiceDate', y='Quantity')

SyntaxError: invalid syntax (<ipython-input-79-04475bbcf398>, line 6)

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.