# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [1]:
import pandas as pd
import numpy as np

In [4]:
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact, widgets
# import plotly.graph_objs as go;
import warnings;

cf.go_offline()
warnings.filterwarnings('ignore')
# py.init_notebook_mode(connected=True)


In [5]:
data = pd.read_excel(r'C:\Users\Mtime\OneDrive\Bureau\Online Retail.xlsx')

#### 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [12]:

dataEXTRA = data.query('Country != "United Kingdom"')

In [13]:
mask = dataEXTRA.loc[:,'InvoiceDate'].map(lambda x: x.strftime('%Y-%m')) == '2011-04'

In [14]:
plot = dataEXTRA.loc[mask].groupby(by='Country').sum().reset_index()
plot.iplot(kind='bar', x='Country', y='Quantity', title='Quantity by country as of April-2011')

#### 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [16]:
df1 = dataEXTRA.set_index(keys='InvoiceDate').loc['2011-01':'2011-03-31']
df1 = df1.reset_index()


df1.loc[:,'InvoiceDate'] = df1.loc[:,'InvoiceDate'].apply(lambda row: row.strftime("%m/%d"))


df1 = df1.groupby(by='InvoiceDate').sum()


title = 'Quantity and Reveune sold to France (Jan:March)'
df1.iplot(kind='line', y='Quantity', title=title)

#### 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [17]:
mean_by_country = data.query('Description == "PARTY BUNTING"').groupby('Country').mean().reset_index().loc[:,['Country', 'Quantity', 'UnitPrice', 'Revenue']]
mean_by_country.columns = ['Country', 'Avg_Quantity', 'Avg_UnitPrice', 'Avg_Revenue']

In [18]:
mean_by_country.iplot(kind='scatter',x='Avg_Quantity', y= 'Avg_UnitPrice', categories='Country')

#### 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [20]:
@interact(Country=['EIRE', 'Germany','France','Netherlands'])
def histograma(Country):
    
    df = dataWOUK.loc[(dataEXTRA['Country']==Country)]

    df.Quantity.iplot(kind='histogram', bins=50, xTitle='quantity', yTitle='frequency', title=f'Quantity distribution of {Country}')

interactive(children=(Dropdown(description='Country', options=('EIRE', 'Germany', 'France', 'Netherlands'), va…

#### 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [21]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [22]:
@interact(Products=product_list)
def barplotinho(Products):
    
    df = dataWOUK.groupby(by=['Description','Country'], as_index=False).sum()
    
    df = df.loc[(df['Description']==Products) & (df['Country'].map(lambda x: x in country_list))]

    df.iplot(kind='bar', x='Country', y='Revenue', xTitle='Revenue', yTitle='Country', title=f'Country revenue of {Products}')

interactive(children=(Dropdown(description='Products', options=('JUMBO BAG RED RETROSPOT', 'CREAM HANGING HEAR…

#### 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [23]:
# we dont need this but ok:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day

# this wee need:
uk = data[data['Country']=='United Kingdom']

In [24]:
@interact(FromMonth=list(data.Month.unique()), 
          FromYear=list(data.Year.unique()),
          ToMonth=list(data.Month.unique()), 
          ToYear=list(data.Year.unique()))
def lineplotinho(FromMonth, FromYear, ToMonth, ToYear):
    
    if (FromMonth > ToMonth) or (FromYear > ToYear):
        print('\nSelect valid interval plix!')
        return None
    
    
    # We first filter by date
    df = uk.set_index('InvoiceDate').loc[f'{FromYear}-{FromMonth}':f'{ToYear}-{ToMonth}'].reset_index()
    
    # now we format the dates as MM/DD/YYYY
    df.loc[:,'InvoiceDate'] = df.loc[:,'InvoiceDate'].apply(lambda x: x.strftime("%D"))

    # groupby date now
    df = df.groupby(by='InvoiceDate').sum()

    # add title
    title = 'Quantity and Reveune sold in UK from:' + f'{FromYear}-{FromMonth}-01 to {ToYear}-{ToMonth}-31'

    # plot
    df.iplot(kind='line', y='Quantity', title=title)

interactive(children=(Dropdown(description='FromMonth', options=(12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11), value…

#### 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [25]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

# storing max values for slider
maxinv = round(products.InvoiceNo.max())
maxcust = round(products.CustomerID.max())

In [26]:
#creating sliders
my_slider1 = widgets.IntSlider(
    value=maxcust/2,
    min=1, max=maxcust, step=2, description='maxcust:', 
    disabled=False, continuous_update=False, orientation='horizontal', readout=True, readout_format='d')

my_slider2 = widgets.IntSlider(
    value=maxinv/2,
    min=1, max=maxinv, step=2, description='maxinv:', 
    disabled=False, continuous_update=False, orientation='horizontal', readout=True, readout_format='d')

In [27]:
# joining everything

@interact(maxinv = my_slider1, maxcust = my_slider2)
def plot_graph(maxinv, maxcust):
    
    # filter the data
    df3 = products.loc[(products.InvoiceNo <= maxinv)&(products.CustomerID <= maxcust)]
    df3 = df3.reset_index()
    
    # we have a lot of products, so we will show only 20 of them
    df3 = df3.head(20)

    df3.iplot(kind='spread',x='InvoiceNo',y='CustomerID', categories='Description',
             title='Number of Invoices and Customers of each product',
            xTitle='Number of Invoices',
            yTitle='Number of Customers',)

interactive(children=(IntSlider(value=410, continuous_update=False, description='maxcust:', max=821, min=1, st…

#### 8. Create an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [28]:
# Interactive menu
@interact(product='')

# Update function
def chart(product):
    # Transform the data
    df4 = data.groupby('Description',as_index=False).sum()

    # treat data Description
    df4.loc[:,'Description'] = df4.loc[:,'Description'].apply(lambda x: x.strip())

    #filter data of product
    mask = df4.Description.apply(lambda x: x.startswith(product))
    
    #add layout
    df4.loc[mask].set_index('Description').loc[:,['Revenue']].iplot(kind='bar', y='Revenue')

interactive(children=(Text(value='', description='product'), Output()), _dom_classes=('widget-interact',))