# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import plotly.plotly as py
import cufflinks as cf
from ipywidgets import interact

cf.go_offline()

In [2]:
data = pd.read_excel('../data/Online Retail.xlsx')

## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [31]:
data.head()

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.3,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.2,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.6,13408,United Kingdom


In [4]:
data.Country.unique()

array(['United Kingdom', 'Spain', 'Cyprus', 'EIRE', 'Portugal',
       'Netherlands', 'Australia', 'Singapore', 'Switzerland', 'Finland',
       'Channel Islands', 'France', 'Malta', 'Italy', 'Israel', 'Germany',
       'Austria', 'Norway', 'Denmark', 'Sweden', 'Belgium', 'Canada',
       'Poland', 'Japan', 'Iceland', 'Greece', 'Lebanon', 'South Africa',
       'Czech Republic', 'USA', 'Brazil', 'United Arab Emirates',
       'Lithuania', 'Saudi Arabia', 'Bahrain'], dtype=object)

In [14]:
data.columns

Index(['InvoiceNo', 'InvoiceDate', 'StockCode', 'Description', 'Quantity',
       'UnitPrice', 'Revenue', 'CustomerID', 'Country'],
      dtype='object')

In [35]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396034 entries, 0 to 396033
Data columns (total 9 columns):
 #   Column       Non-Null Count   Dtype         
---  ------       --------------   -----         
 0   InvoiceNo    396034 non-null  int64         
 1   InvoiceDate  396034 non-null  datetime64[ns]
 2   StockCode    396034 non-null  object        
 3   Description  396034 non-null  object        
 4   Quantity     396034 non-null  int64         
 5   UnitPrice    396034 non-null  float64       
 6   Revenue      396034 non-null  float64       
 7   CustomerID   396034 non-null  int64         
 8   Country      396034 non-null  object        
dtypes: datetime64[ns](1), float64(2), int64(3), object(3)
memory usage: 27.2+ MB


In [12]:
import datetime

In [13]:
@interact(Country = list(data.Country.unique()[1:]))
def bchart_country(Country):
    a = datetime.datetime(2011, 4, 1)
    b = datetime.datetime(2011, 4, 30)
    mask = ((data['InvoiceDate'] >= a) & (data['InvoiceDate'] <= b))
    df = data.loc[(data['Country'] == Country) &
                  mask,:].groupby(by = 'Country', as_index = False)[['Quantity', 'Revenue']].sum()
    df.iplot(kind = 'bar', x = 'Country', y = ['Quantity', 'Revenue'])

interactive(children=(Dropdown(description='Country', options=('Spain', 'Cyprus', 'EIRE', 'Portugal', 'Netherl…

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [69]:
d1 = datetime.datetime(2011, 1, 1)
d2 = datetime.datetime(2011, 5, 31)
mask = ((data['Country'] == 'France')&(data['InvoiceDate'] >= d1)&(data['InvoiceDate'] <= d2))
df2 = data.loc[mask,:].sort_values(by = 'InvoiceDate')\
.groupby(by = 'InvoiceDate', as_index = False)[['Quantity', 'Revenue']].sum()
df2.iplot(kind = 'scatter', x = 'InvoiceDate')

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [9]:
data.sample(20)

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
131691,540149,2011-01-05 11:00:00,21154,PARTY BUNTING,10,1.25,12.5,12921,United Kingdom
24855,571497,2011-10-17 15:03:00,22386,PARTY BUNTING,20,2.08,41.6,14841,United Kingdom
51579,563555,2011-08-17 13:21:00,22662,PARTY BUNTING,2,1.65,3.3,16755,United Kingdom
101925,557468,2011-06-20 13:14:00,22716,PARTY BUNTING,12,0.42,5.04,16713,United Kingdom
156315,556016,2011-06-08 11:54:00,22699,PARTY BUNTING,24,2.55,61.2,18092,United Kingdom
335232,569672,2011-10-05 13:58:00,23298,PARTY BUNTING,2,4.95,9.9,17841,United Kingdom
54891,563071,2011-08-11 15:43:00,22915,PARTY BUNTING,12,0.42,5.04,17633,United Kingdom
230330,578944,2011-11-27 13:41:00,22190,PARTY BUNTING,1,2.1,2.1,16005,United Kingdom
51040,576062,2011-11-13 15:30:00,85049A,PARTY BUNTING,3,1.25,3.75,12867,United Kingdom
94964,553754,2011-05-19 10:25:00,22296,PARTY BUNTING,12,1.65,19.8,16175,United Kingdom


In [19]:
mask = data['Description'] == 'PARTY BUNTING'
df = data.loc[mask,:].groupby(by = 'Country', as_index = False)[['Quantity', 'UnitPrice']].mean()
df.iplot(kind = 'scatter', 
         x = 'Quantity', 
         y = 'UnitPrice', 
         categories = 'Country',
         xTitle = 'Average Quantity',
         yTitle = 'Average Unit Price')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [22]:
@interact(country = ['EIRE','Germany', 'France','Netherlands'])

def qty_hist(country):
    mask = data['Country'] == country
    df = data.loc[mask,:].groupby(by = 'InvoiceNo')['Quantity'].sum()
    df.iplot('hist')

interactive(children=(Dropdown(description='country', options=('EIRE', 'Germany', 'France', 'Netherlands'), va…

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [4]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [47]:
mask = ((data['Country'] == country_list[0])|
        (data['Country'] == country_list[1])|
        (data['Country'] == country_list[2])|
        (data['Country'] == country_list[3])&
        (data['Description'] == 'JUMBO BAG RED RETROSPOT'))
    
df = data.loc[mask,:].groupby(by = ["Country",'Description'], as_index = False)['Revenue'].sum()
df

Unnamed: 0,Country,Description,Revenue
0,EIRE,4 PURPLE FLOCK DINNER CANDLES,15.30
1,EIRE,50'S CHRISTMAS GIFT BAG LARGE,60.00
2,EIRE,DOLLY GIRL BEAKER,30.00
3,EIRE,NINE DRAWER OFFICE TIDY,29.90
4,EIRE,OVAL WALL MIRROR DIAMANTE,69.65
...,...,...,...
5116,Germany,ZINC METAL HEART DECORATION,15.00
5117,Germany,ZINC T-LIGHT HOLDER STAR LARGE,22.80
5118,Germany,ZINC T-LIGHT HOLDER STARS SMALL,19.92
5119,Germany,ZINC WILLIE WINKIE CANDLE STICK,51.00


In [6]:
@interact(product = product_list)
def bar_revenue(product):
    mask = (((data['Country'] == country_list[0])|
           (data['Country'] == country_list[1])|
           (data['Country'] == country_list[2])|
            (data['Country'] == country_list[3]))&
           (data['Description'] == product))
    
    df = data.loc[mask,:].groupby(by = ["Country",'Description'], as_index = False)['Revenue'].sum()
    df.iplot('bar', 
             x = 'Country', 
             y = 'Revenue',
             xTitle = 'Product',
             yTitle = 'Total Revenue', 
             color = 'purple')
            

interactive(children=(Dropdown(description='product', options=('JUMBO BAG RED RETROSPOT', 'CREAM HANGING HEART…

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [7]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [11]:
@interact(month = data['Month'].unique(), 
          year = data['Year'].unique())
def line_qty(month,year):
    mask = (uk['Month']==month)&(uk['Year']==year)
    df = uk.loc[mask,:].groupby(by = 'Day', as_index = False)[['Quantity']].sum()
    df.iplot('line',
            x = 'Day',
            y = 'Quantity',
            color = 'Teal',
            xTitle = 'Day',
            yTitle = 'Total Daily Sales in quantity')

interactive(children=(Dropdown(description='month', options=(12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), value=12)…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [None]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.