In [5]:
import pandas as pd
import numpy as np
import string
import importlib as imp

# Load Data

In [6]:
# load example mtcars data 
df = pd.read_csv('mtcars.csv')
print(df.shape)
df.head()

(32, 11)


Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


# Create Simple Barplot

In [7]:
import os
os.chdir('../')
import barplot
imp.reload(barplot)
os.chdir('examples')

A simple barplot can be created by passing three dataframes to `barplot.create_graph`:

* `in_data` - the height of the bars
* `names` - a dataframe containing the hover text for the bars, otherwise identical to `in_data`
* `errors` - a dataframe containing the half-height of the error bars, otherwise identical to `in_data`

In [8]:
# create graph data 
in_data = pd.DataFrame(df.groupby('cyl').mean()['mpg'])
in_data.index = in_data.index.astype(int).astype(str) + ' Cylinders'
print('main data:')
display(in_data.head())

# generate names
l = string.ascii_lowercase
names = in_data.copy()
f = lambda: l[np.random.randint(0,len(l))]

for x in names.index:
    names.loc[x, 'mpg'] = f()+f()
    
print('names:')
display(names.head())

# generate error bars data
errors = in_data.copy()
errors['mpg'] = 2.5
print('errors:')
display(errors.head())

main data:


Unnamed: 0,mpg
4 Cylinders,26.663636
6 Cylinders,19.742857
8 Cylinders,15.1


names:


Unnamed: 0,mpg
4 Cylinders,ue
6 Cylinders,pv
8 Cylinders,ha


errors:


Unnamed: 0,mpg
4 Cylinders,2.5
6 Cylinders,2.5
8 Cylinders,2.5


A simple graph can be quickly created to verify that the data is as expected:

In [9]:
# create input data for graph 
args = dict(
    in_data=in_data
)

# view plot inline 
fig = barplot.create_graph(**args)

Now that the graph appears to be as expected, more characteristics can be added by adding them to `args`:

In [10]:
# add additional characteristics to graph
title = '<b>Fuel Mileage by Number of Cylinders</b>'
title += '<br><i>for mtcars data</i>'
args['title'] = title
args['names'] = names
args['errors'] = errors
args['xlab'] = 'Number of Cylinders'
args['ylab'] = 'Miles Per Gallon'
args['annotations'] = [{'text':'More cylinders correlates to lower fuel mileage', 'x':1.5, 'y':24.5, 'showarrow':False}]

Preview the results again:

In [11]:
# view plot inline 
fig = barplot.create_graph(**args)

After creating a graph, it can be written to an html file by passing `fig` to `barplot.output_graph`:

In [12]:
# write graph to html file 
fp = 'barplot-example.html'
barplot.output_graph(fp, fig)

# Create Grouped Barplot

A grouped barplot compares the effect of the same treatment across multiple categories.

The next graph will show the relationship between fuel mileage, the number of cylinders and the number of gears for cars.

For grouped barplots, dataframes can be passed where the rows represent the x-axis categories and the columns represent each bar in each category. 

In [13]:
# create data for grouped barplot
in_data = df.groupby(['cyl', 'gear']).mean()[['mpg']].reset_index()

in_data = pd.pivot_table(
    data=in_data,
    columns=['gear'],
    index=['cyl']
)
in_data.columns = ['3 gears', '4 gears', '5 gears']

in_data = in_data.fillna(in_data.loc[8].mean())
in_data.index = in_data.index.astype(str) + ' Cylinders'
print('main data:')
display(in_data)

# create names
names = in_data.copy()

for row in names.index:
    for col in names.columns:
        names.loc[row, col] = f()+f()
        
print('names:')
display(names)

# create error bars 
errors = in_data.copy()

for col in errors.columns:
    errors[col] = 0.75
    
print('errors:')
display(errors)

main data:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,21.5,26.925,28.2
6 Cylinders,19.75,19.75,19.7
8 Cylinders,15.05,15.225,15.4


names:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,he,bl,kk
6 Cylinders,ez,sh,il
8 Cylinders,db,cp,xj


errors:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,0.75,0.75,0.75
6 Cylinders,0.75,0.75,0.75
8 Cylinders,0.75,0.75,0.75


Get a quick visual of the data:

In [14]:
# create args
args = {'in_data':in_data}

In [15]:
fig = barplot.create_graph(**args)

Now add more detail by adding elements to `args`:

In [16]:
# add additional characteristics to graph
title = '<b>Fuel Mileage by Number of Cylinders and Number of Gears</b>'
title += '<br><i>for mtcars data</i>'
args['title'] = title
args['names'] = names
args['errors'] = errors
args['xlab'] = 'Number of Cylinders'
args['ylab'] = 'Miles Per Gallon'
args['annotations'] = [{'text':'More gears correlate to better fuel mileage for 4 cylinder cars',
                        'x':0.45, 'y':28, 'ax':250, 'ay':25, 'showarrow':True}]

fig = barplot.create_graph(**args)

In [17]:
# write graph to html file 
fp = 'grouped-barplot-example.html'
barplot.output_graph(fp, fig)