### Plotly chart wrappers (python)
    
#### Motivation
This is a set of functions wrapping the powerful customization options of Plotly charts into single-function calls with a few parameters that should produce good-looking charts covering at least 50% of the typical charting of a data analyst. The rationale behind writing the wrappers is twofold:  
  
1) To streamline analyst work and make the generation of lucid charts accessible even to analysts who are not familiar with the intricacies of Plotly's Python library  
2) To set up a consistent visual style that would be easily customizable to fit any corporate design by pre-defining colors and font styles used in all the charts
  
#### Contents

0) Style setup  
1) Bar charts (stacked, grouped, percentage)  
2) Line charts  
3) Scatter plots  
4) Box plots 
  
#### Reference  
  
1) [Plotly reference](https://plot.ly/python/)  
2) [IBM Sample Datasets](https://www.ibm.com/communities/analytics/watson-analytics-blog/guide-to-sample-datasets/)

In [1]:
import pandas as pd
import numpy as np

import plotly.offline as py
from plotly.graph_objs import *
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)
%matplotlib inline

In [2]:
dat = pd.read_csv("https://community.watsonanalytics.com/wp-content/uploads/2015/03/WA_Fn-UseC_-Marketing-Campaign-Eff-UseC_-FastF.csv")

In [3]:
dat.head()

Unnamed: 0,MarketID,MarketSize,LocationID,AgeOfStore,Promotion,week,SalesInThousands
0,1,Medium,1,4,3,1,33.73
1,1,Medium,1,4,3,2,35.67
2,1,Medium,1,4,3,3,29.03
3,1,Medium,1,4,3,4,39.25
4,1,Medium,2,5,2,1,27.81


In [13]:
dat.describe()

Unnamed: 0,MarketID,LocationID,AgeOfStore,Promotion,week,SalesInThousands
count,548.0,548.0,548.0,548.0,548.0,548.0
mean,5.715328,479.656934,8.50365,2.029197,2.5,53.466204
std,2.877001,287.973679,6.638345,0.810729,1.119055,16.755216
min,1.0,1.0,1.0,1.0,1.0,17.34
25%,3.0,216.0,4.0,1.0,1.75,42.545
50%,6.0,504.0,7.0,2.0,2.5,50.2
75%,8.0,708.0,12.0,3.0,3.25,60.4775
max,10.0,920.0,28.0,3.0,4.0,99.65


In [21]:
sales_per_market_and_promo = dat.groupby(["MarketID","Promotion"])["SalesInThousands"].sum().reset_index()

In [22]:
sales_per_market_and_promo.head()

Unnamed: 0,MarketID,Promotion,SalesInThousands
0,1,1,814.38
1,1,2,603.04
2,1,3,407.87
3,2,1,262.4
4,2,3,1219.87


### 0. Style setup: Fonts and colors

### 1. Bar charts

In [109]:
# Generate a discrete greys colorscale for "the rest"

cmap = matplotlib.cm.get_cmap('Greys')

def matplotlib_to_plotly(cmap, pl_entries):
    """
    Generates a plotly colorscale list from a matplotlib colorscale
    Based on: https://plot.ly/python/matplotlib-colorscales/#formatting-the-colormap
    """
    try:
        h = 1.0/(pl_entries-1)
        pl_colorscale = []

        for k in range(pl_entries):
            C = list(map(np.uint8, np.array(cmap(k*h)[:3])*255))
            pl_colorscale.append(['rgb'+str((C[0], C[1], C[2]))])

        return pl_colorscale

    except ZeroDivisionError:
        return ['rgb(155, 155, 155)']


In [110]:
matplotlib_to_plotly(cmap, 1)

['rgb(155, 155, 155)']

In [114]:


def generate_title_string(x,y,group):
    return y + ' per ' + x + ' grouped by ' + group

# General chart setup
df = sales_per_market_and_promo
x = "Promotion" # "Promotion"
y = "SalesInThousands"
group =  "MarketID"   # variable to make groups
barmode = "stack"   # enum: "group", "stack", "relative" (for positive and negative values) 

# Accents and highlights
accent_1 = 1
accent_1_color = "rgba(204,20,20,1)"
accent_2 = 2
accent_2_color = "rgba(20,20,204,1)"


# Chart and axis titles and axes titles
title = generate_title_string(x,y,group) ## or fill title here
xaxis_title = ""
yaxis_title = ""


groups = sorted(df[group].unique())
rest = [item for item in groups if item not in [accent_1, accent_2]]


In [115]:
list(np.repeat('rgba(204,204,204,1)',3))

['rgba(204,204,204,1)', 'rgba(204,204,204,1)', 'rgba(204,204,204,1)']

In [116]:
# Separate groups into accent_1, accent_2, rest

groups = sorted(df[group].unique())
rest = [item for item in groups if item not in [accent_1, accent_2]]


traces = []

# All other traces
for item in rest:
    
    text = list(df[df[group]==item][y])
    trace_idx = 0
    scl = matplotlib_to_plotly(cmap, len(rest))
    
    traces.append(
    Bar(x = df[df[group]==item][x],
        y = df[df[group]==item][y],
        name = str(item),
        marker=dict(
        color=list(np.repeat(scl[trace_idx],len(df[df[group]==item][y])))
       )
       )
    )
    trace_idx += 1

# Append traces for both accents, the order can be important for rendering(?)
# Only these accents get text overlay and custom colors

# Accent 1
traces.append(
    Bar(x = df[df[group]==accent_1][x],
        y = df[df[group]==accent_1][y],
        name = str(accent_1),
        text = df[df[group]==accent_1][y],textposition = 'auto',
        marker=dict(color=list(np.repeat(accent_1_color,len(df[df[group]==accent_1][y]))))
       )
    )

# Accent 2
traces.append(
    Bar(x = df[df[group]==accent_2][x],
        y = df[df[group]==accent_2][y],
        name = str(accent_2),
        text = df[df[group]==accent_2][y],textposition = 'auto',
        marker=dict(color=list(np.repeat(accent_2_color,len(df[df[group]==accent_2][y]))))
       )
    )

    
data = traces
layout = Layout(
    title=title,
    xaxis=dict(
        title=x,
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),
    yaxis=dict(
        title=y,
        titlefont=dict(
            size=16,
            color='rgb(107, 107, 107)'
        ),
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),
    legend=dict(orientation="h",
        bgcolor='rgba(255, 255, 255, 0)',
        bordercolor='rgba(255, 255, 255, 0)'
    )
    ,barmode=barmode,bargap=0.15,bargroupgap=0.1)

fig = Figure(data=data, layout=layout)
py.iplot(fig, filename='grouped-bar')

In [64]:
df[(df["Promotion"]==1) & (df["MarketID"]==1)]

Unnamed: 0,MarketID,Promotion,SalesInThousands
0,1,1,814.38
