# Interactive Visualization Lab

Complete the following set of exercises to solidify your knowledge of interactive visualization using Plotly, Cufflinks, and IPyWidgets.

In [3]:
import pandas as pd
import chart_studio.plotly as py
import cufflinks as cf
from ipywidgets import interact
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, date

cf.go_offline()

In [4]:
data = pd.read_excel('/Users/alessandra/Documents/Ironhack/Curso/Data/Online Retail.xlsx')
data

Unnamed: 0,InvoiceNo,InvoiceDate,StockCode,Description,Quantity,UnitPrice,Revenue,CustomerID,Country
0,536365,2010-12-01 08:26:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
1,536373,2010-12-01 09:02:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
2,536375,2010-12-01 09:32:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,6,2.55,15.30,17850,United Kingdom
3,536390,2010-12-01 10:19:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,64,2.55,163.20,17511,United Kingdom
4,536394,2010-12-01 10:39:00,85123A,CREAM HANGING HEART T-LIGHT HOLDER,32,2.55,81.60,13408,United Kingdom
...,...,...,...,...,...,...,...,...,...
396029,580691,2011-12-05 15:48:00,90214W,"LETTER ""W"" BLING KEY RING",12,0.29,3.48,13790,United Kingdom
396030,580691,2011-12-05 15:48:00,90214Z,"LETTER ""Z"" BLING KEY RING",12,0.29,3.48,13790,United Kingdom
396031,580865,2011-12-06 11:58:00,90089,PINK CRYSTAL SKULL PHONE CHARM,12,0.19,2.28,17914,United Kingdom
396032,580865,2011-12-06 11:58:00,90089,PINK CRYSTAL SKULL PHONE CHARM,12,0.19,2.28,17914,United Kingdom


## 1. Create an interactive bar chart showing total quantity and revenue by country (excluding United Kingdom) for the month of April 2011.

In [5]:
mask1 = data['Country'] != 'United Kingdom'
mask2 = data.InvoiceDate.dt.year == 2011
mask3 = data.InvoiceDate.dt.month == 4     
data.loc[mask1 & mask2 & mask3,:][['Country','Quantity','Revenue']].iplot(kind='bar', x='Country', title='Total Quantity and Revenue by Country')

## 2. Create an interactive line chart showing quantity and revenue sold to France between January 1st and May 31st 2011.

In [29]:
mask1 = data['Country'] == 'France'
mask2 = data.InvoiceDate.dt.date >= date(2011,1,1)
mask3 = data.InvoiceDate.dt.date <= date(2011,5,31)
df = data.loc[mask1 & mask2 & mask3,:][['InvoiceDate','Quantity','Revenue']].groupby('InvoiceDate').sum().reset_index()
df['date'] = df.InvoiceDate.dt.date
df.drop(columns = 'InvoiceDate', inplace=True)
df = df.groupby('date').sum().reset_index()
df.iplot(kind='line', x='date', title='Quantity and Revenue sold to France', color=['red','blue'])

## 3. Create an interactive scatter plot showing the relationship between average quantity (x-axis) and average unit price (y-axis) for the product PARTY BUNTING with the plot points color-coded by country (categories).

In [6]:
mask = data.Description == 'PARTY BUNTING'
df2 = data.loc[mask,:].groupby(by='Country').mean().reset_index()[['Country','Quantity','UnitPrice']]
df2.iplot(kind='scatter',x='Quantity', y='UnitPrice', categories='Country', color=['blue'], title='Average of Quantity X Unit Price')


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead


The pandas.np module is deprecated and will be removed from pandas in a future version. Import numpy directly instead



## 4. Create a set of interactive histograms showing the distributions of quantity per invoice for the following countries: EIRE, Germany, France, and Netherlands.

In [13]:
country = ['EIRE', 'Germany', 'France', 'Netherlands']
mask = data['Country'].isin(country)
distribution = data.loc[mask, :]
dristribution = distribution.groupby('InvoiceDate')[['Quantity']].count()
distribution.iplot(kind='hist', title='Distributions of Quantity per Invoice Date', bins=20)

## 5. Create an interactive side-by-side bar chart showing the revenue by country listed below (bars) for each of the products listed below.

In [15]:
product_list = ['JUMBO BAG RED RETROSPOT', 
                'CREAM HANGING HEART T-LIGHT HOLDER',
                'REGENCY CAKESTAND 3 TIER']

country_list = ['EIRE', 'Germany', 'France', 'Netherlands']

In [18]:
mask1 = data['Country'].isin(country_list)
mask2 = data['Description'].isin(product_list)


df3 = data.loc[mask1 & mask2, :]
df3 = df3[['Description', 'Revenue', 'Country']]
df3 = df3.groupby(by=['Description','Country']).sum().reset_index()
df3.iplot(kind='bar', x='Description', y='Revenue', title='Revenue by Country', color=['red'])

## 6. Create an interactive line chart showing quantity sold by day for the United Kingdom. Add drop-down boxes for Year and Month that allow you to filter the date range that appears in the chart.

In [20]:
data['Year'] = pd.DatetimeIndex(data['InvoiceDate']).year
data['Month'] = pd.DatetimeIndex(data['InvoiceDate']).month
data['Day'] = pd.DatetimeIndex(data['InvoiceDate']).day
uk = data[data['Country']=='United Kingdom']

In [23]:
years = uk['Year'].unique()
months = uk['Month'].unique()

@interact(year=years, 
          month=months)

def plot(year, month):
    df4 = uk[(uk['Year']==year) & (uk['Month']==month)]
    df4 = df4.groupby('Day').sum()['Quantity'].reset_index()

    df4.iplot(kind='line', x='Day', y='Quantity', title='Quantity Sold by Day for United Kingdom', color=['blue'])

interactive(children=(Dropdown(description='year', options=(2010, 2011), value=2010), Dropdown(description='mo…

## 7. Create an interactive scatter plot that plots number of invoices (x-axis) vs. number of customers (y-axis) and the plot points represent individual products. Add two sliders that control the x and y axis ranges.

In [24]:
agg_func = {'InvoiceNo':'nunique',
            'Quantity':'sum',
            'UnitPrice':'mean',
            'Revenue':'sum',
            'CustomerID':'nunique'}

products = uk.groupby('Description').agg(agg_func)

In [26]:
@interact(invoices=(products['InvoiceNo'].min(), products['InvoiceNo'].max()), 
          customers=(products['CustomerID'].min(), products['CustomerID'].max()))

def plot (invoices, customers):
    
    df5 = products[(products['CustomerID'] > customers) & 
                  (products['InvoiceNo'] > invoices)]
    
    df5.iplot(kind='scatter', x='InvoiceNo', y='CustomerID', color=['blue'], title='Number of Invoices X Number of Customers')

interactive(children=(IntSlider(value=946, description='invoices', max=1891, min=1), IntSlider(value=411, desc…

## 8. Creat an interactive bar chart that shows revenue by product description. Add a text field widget that filters the results to show the product that contain the text entered in their description.

In [28]:
df6 = data.groupby(by='Description')[['Description','Revenue']].sum().reset_index()

@interact(Description='')

def plot(Description):

    data = df6.loc[df6['Description'].str.contains(Description)]
    data = data.groupby(by='Description')[['Description','Revenue']].sum().reset_index()
    
    data.iplot(kind='bar', x='Description', y='Revenue', color='blue', title='Revenue by Product Description')

interactive(children=(Text(value='', description='Description'), Output()), _dom_classes=('widget-interact',))