In [46]:
import pandas as pd
import numpy as np
import string

# Load Data

In [47]:
# load example mtcars data 
df = pd.read_csv('mtcars.csv')
print(df.shape)
df.head()

(32, 11)


Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


# Create Simple Barplot

In [48]:
from rapid_plotly import barplot

A simple barplot can be created by passing three dataframes to `barplot.create_graph`:

* `in_data` - the height of the bars
* `names` - a dataframe containing the hover text for the bars, otherwise identical to `in_data`
* `errors` - a dataframe containing the half-height of the error bars, otherwise identical to `in_data`

In [49]:
# create graph data 
in_data = pd.DataFrame(df.groupby('cyl').mean()['mpg'])
in_data.index = in_data.index.astype(int).astype(str) + ' Cylinders'
print('main data:')
display(in_data.head())

# generate names
l = string.ascii_lowercase
names = in_data.copy()
f = lambda: l[np.random.randint(0,len(l))]

for x in names.index:
    names.loc[x, 'mpg'] = f()+f()
    
print('names:')
display(names.head())

# generate error bars data
errors = in_data.copy()
errors['mpg'] = 2.5
print('errors:')
display(errors.head())

main data:


Unnamed: 0,mpg
4 Cylinders,26.663636
6 Cylinders,19.742857
8 Cylinders,15.1


names:


Unnamed: 0,mpg
4 Cylinders,iv
6 Cylinders,od
8 Cylinders,fb


errors:


Unnamed: 0,mpg
4 Cylinders,2.5
6 Cylinders,2.5
8 Cylinders,2.5


A simple graph can be quickly created to verify that the data is as expected:

In [50]:
# create input data for graph 
args = dict(
    in_data=in_data
)

# view plot inline 
fig = barplot.create_graph(**args)

Now that the graph appears to be as expected, more characteristics can be added by adding them to `args`:

In [51]:
# add additional characteristics to graph
title = '<b>Fuel Mileage by Number of Cylinders</b>'
title += '<br><i>for mtcars data</i>'
args['title'] = title
args['names'] = names
args['errors'] = errors
args['xlab'] = 'Number of Cylinders'
args['ylab'] = 'Miles Per Gallon'
args['annotations'] = [{'text':'More cylinders correlates to better<br> fuel mileage', 'x':1.5, 'y':24.5, 'showarrow':False}]

Preview the results again:

In [52]:
# view plot inline 
fig = barplot.create_graph(**args)

After creating a graph, it can be written to an html file by passing `fig` to `barplot.output_graph`:

In [53]:
# write graph to html file 
fp = 'barplot-example.html'
barplot.output_graph(fig, fp)

In [54]:
# write graph to png file 
fp = 'barplot-example.png'
barplot.output_graph(fig, fp)

# Create Grouped Barplot

A grouped barplot compares the effect of the same treatment across multiple categories.

The next graph will show the relationship between fuel mileage, the number of cylinders and the number of gears for cars.

For grouped barplots, dataframes can be passed where the rows represent the x-axis categories and the columns represent each bar in each category. 

In [55]:
# create data for grouped barplot
in_data = df.groupby(['cyl', 'gear']).mean()[['mpg']].reset_index()

in_data = pd.pivot_table(
    data=in_data,
    columns=['gear'],
    index=['cyl']
)
in_data.columns = ['3 gears', '4 gears', '5 gears']

in_data = in_data.fillna(in_data.loc[8].mean())
in_data.index = in_data.index.astype(str) + ' Cylinders'
print('main data:')
display(in_data)

# create names
names = in_data.copy()

for row in names.index:
    for col in names.columns:
        names.loc[row, col] = f()+f()
        
print('names:')
display(names)

# create error bars 
errors = in_data.copy()

for col in errors.columns:
    errors[col] = 0.75
    
print('errors:')
display(errors)

main data:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,21.5,26.925,28.2
6 Cylinders,19.75,19.75,19.7
8 Cylinders,15.05,15.225,15.4


names:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,lz,eh,nh
6 Cylinders,um,ls,gv
8 Cylinders,oy,ry,vs


errors:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,0.75,0.75,0.75
6 Cylinders,0.75,0.75,0.75
8 Cylinders,0.75,0.75,0.75


Get a quick visual of the data:

In [56]:
# create args
args = {'in_data':in_data}
fig = barplot.create_graph(**args)

Now add more detail by adding elements to `args`:

In [57]:
# add additional characteristics to graph
title = '<b>Fuel Mileage by Number of Cylinders and Number of Gears</b>'
title += '<br><i>for mtcars data</i>'
args['title'] = title
args['names'] = names
args['errors'] = errors
args['xlab'] = 'Number of Cylinders'
args['ylab'] = 'Miles Per Gallon'
args['annotations'] = [{'text':'More gears correlate to better fuel<br> mileage for cars with 4 cylinder engines',
                        'x':0.45, 'y':28, 'ax':150, 'ay':25, 'showarrow':True}]

fig = barplot.create_graph(**args)

This looks okay with the default colors, but the main point of the graph would be more immediately visible if the "4 Cylinder" bargroup was a different shade of color than the other bargroups. 

New colors were generated using [coolors.co](https://coolors.co) and tints of the new colors were created on [color-hex.com](www.color-hex.com).

A new dataframe `colors` can be created in a similar fashion to `in_data`, `names` and `errors`:

In [58]:
# create new colors
colors = pd.DataFrame({
           '3 gears':['#9195b2']*3,
           '4 gears':['#969694']*3,
           '5 gears':['#c1c991']*3
       }, index=in_data.index)

colors.loc['4 Cylinders'] = ['#232C65', '#2D2D2A', '#849324']

args['colors'] = colors
print('colors:')
colors

colors:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,#232C65,#2D2D2A,#849324
6 Cylinders,#9195b2,#969694,#c1c991
8 Cylinders,#9195b2,#969694,#c1c991


In [59]:
fig = barplot.create_graph(**args)

In [60]:
# write graph to html file 
fp = 'grouped-barplot-example.html'
barplot.output_graph(fig, fp)

In [61]:
# write graph to png file 
fp = 'grouped-barplot-example.png'
barplot.output_graph(fig, fp)

# Create Scatterplot

In [62]:
from rapid_plotly import scatterplot

First, set up some data which can be used to create an example scatterplot:

In [63]:
# create main data 
sl = df[['hp', 'mpg']].copy()
x_data = sl[['hp']].copy()
y_data = sl[['mpg']].copy()

print('x values:')
display(x_data.head())

print('y values:')
display(y_data.head())

# create names
n = (df[['cyl', 'carb', 'gear', 'wt']].apply(
      lambda x: '# Cylinders: %s<br># Carbs: %s<br># Gears: %s<br>Weight: %s' % (x['cyl'], x['carb'], 
                                                                                   x['gear'], x['wt']),
      axis=1
    )
  ).copy()

n = n.rename('mpg')

names = sl.copy()
names['hp'] = n
del names['mpg']

print('names: ')
display(names.head())

# create colors 
colors = sl.copy()
colors.loc[:, :] = '#C14953'
del colors['mpg']

print('colors: ')
display(colors.head())

x values:


Unnamed: 0,hp
0,110
1,110
2,93
3,110
4,175


y values:


Unnamed: 0,mpg
0,21.0
1,21.0
2,22.8
3,21.4
4,18.7


names: 


Unnamed: 0,hp
0,# Cylinders: 6.0<br># Carbs: 4.0<br># Gears: 4...
1,# Cylinders: 6.0<br># Carbs: 4.0<br># Gears: 4...
2,# Cylinders: 4.0<br># Carbs: 1.0<br># Gears: 4...
3,# Cylinders: 6.0<br># Carbs: 1.0<br># Gears: 3...
4,# Cylinders: 8.0<br># Carbs: 2.0<br># Gears: 3...


colors: 


Unnamed: 0,hp
0,#C14953
1,#C14953
2,#C14953
3,#C14953
4,#C14953


The `scatterplot` module takes a separate dataframe for the x values and for the y values:

In [64]:
args = {'x_data':x_data, 'y_data':y_data}
fig = scatterplot.create_graph(**args)

Adding names, labels and colors:

In [65]:
# build graph args 
args['names'] = names
args['colors'] = colors
args['title'] = '<b>Fuel Mileage as a Function of Horsepower</b><br><i>for mtcars data</i>'
args['xlab'] = 'Horsepower'
args['ylab'] = 'Fuel Mileage (mpg)'

# display plot
fig = scatterplot.create_graph(**args)

The `scatterplot` module allows for passing lists of x and y values to plot multiple series of data on the sample plot. 

Generate example data which compares `hp` and `mpg` before and after a made-up fuel mileage enhancement:

In [66]:
# create main data 
sl = df[['hp', 'mpg']].copy()
x_data = sl[['hp']].copy()
x_data_treat = x_data.copy()
x_data_treat['hp'] = x_data['hp'] - (np.random.normal(loc=5, scale=2, size=len(x_data)))
x_data_treat.columns = ['hp_alt']
y_data = sl[['mpg']].copy()
y_data_treat = y_data.copy()
y_data_treat['mpg'] = y_data['mpg'] + (np.random.normal(loc=5, scale=2, size=len(x_data)))
y_data_treat.columns = ['mpg_alt']

print('x1 values:')
display(x_data.head())

print('x2 values:')
display(x_data_treat.head())

print('y1 values:')
display(y_data.head())

print('y2 values:')
display(y_data_treat.head())

# create names
n = (df.reset_index()[['index', 'cyl', 'carb', 'gear', 'wt']].apply(
      lambda x: 'Car ID %s<br># Cylinders: %s<br># Carbs: %s<br># Gears: %s<br>Weight: %s' % (x['index'], x['cyl'], x['carb'], x['gear'], x['wt']),
      axis=1
    )
  ).copy()

names = sl.copy()
names['hp'] = 'Before Treatment<br>' + n
del names['mpg']
names['hp_alt'] = 'After Treatment<br>' + n

print('names: ')
display(names.head())

# create colors 
colors = sl.copy()
colors.loc[:, :] = '#232C65'
del colors['mpg']
colors['hp_alt'] = '#2D2D2A'

print('colors: ')
display(colors.head())

x1 values:


Unnamed: 0,hp
0,110
1,110
2,93
3,110
4,175


x2 values:


Unnamed: 0,hp_alt
0,104.948497
1,104.644567
2,85.32359
3,101.050002
4,168.022492


y1 values:


Unnamed: 0,mpg
0,21.0
1,21.0
2,22.8
3,21.4
4,18.7


y2 values:


Unnamed: 0,mpg_alt
0,24.858688
1,25.499757
2,29.325633
3,27.745704
4,23.629976


names: 


Unnamed: 0,hp,hp_alt
0,Before Treatment<br>Car ID 0.0<br># Cylinders:...,After Treatment<br>Car ID 0.0<br># Cylinders: ...
1,Before Treatment<br>Car ID 1.0<br># Cylinders:...,After Treatment<br>Car ID 1.0<br># Cylinders: ...
2,Before Treatment<br>Car ID 2.0<br># Cylinders:...,After Treatment<br>Car ID 2.0<br># Cylinders: ...
3,Before Treatment<br>Car ID 3.0<br># Cylinders:...,After Treatment<br>Car ID 3.0<br># Cylinders: ...
4,Before Treatment<br>Car ID 4.0<br># Cylinders:...,After Treatment<br>Car ID 4.0<br># Cylinders: ...


colors: 


Unnamed: 0,hp,hp_alt
0,#232C65,#2D2D2A
1,#232C65,#2D2D2A
2,#232C65,#2D2D2A
3,#232C65,#2D2D2A
4,#232C65,#2D2D2A


Now a list of x data and a list of y data can be used to plot both cases on the same graph:

In [67]:
# build graph args 
args['x_data'] = [x_data, x_data_treat]
args['y_data'] = [y_data, y_data_treat]
args['names'] = names
args['colors'] = colors
args['title'] = '<b>Fuel Mileage as a Function of Horsepower</b><br><i>for mtcars data</i>'
args['xlab'] = 'Horsepower'
args['ylab'] = 'Fuel Mileage (mpg)'

# set up callout text 
sl = y_data.join(y_data_treat)
sl['diff'] = sl.mpg_alt - sl.mpg
cid = (sl[(sl.mpg < sl.mpg_alt)]
         .sort_values(by=['diff'], ascending=False).index[0])

x1_loc = x_data.iloc[cid].values[0]
x2_loc = x_data_treat.iloc[cid].values[0]

y1_loc = y_data.iloc[cid].values[0]
y2_loc = y_data_treat.iloc[cid].values[0]

c1 = {'text':'Car %s before upgrade' % cid, 'x':x1_loc, 'y':y1_loc, 
      'showarrow':True, 'ax':150, 'ay':-25}

c2 = {'text':'Car %s after upgrade' % cid, 'x':x2_loc, 'y':y2_loc, 
      'showarrow':True, 'ax':150, 'ay':0}

text = 'Fuel mileage upgrade works for most cars'
args['annotations'] = [{'text':text, 'x':200, 'y':37, 'showarrow':False}, 
                       c1, c2]

# display plot
fig = scatterplot.create_graph(**args)

In [68]:
# write graph to html file 
fp = 'scatterplot-example.html'
barplot.output_graph(fig, fp)

In [69]:
# write graph to png file 
fp = 'scatterplot-example.png'
barplot.output_graph(fig, fp)