In [276]:
import pandas as pd
import numpy as np
import string
import importlib as imp

# Load Data

In [277]:
# load example mtcars data 
df = pd.read_csv('mtcars.csv')
print(df.shape)
df.head()

(32, 11)


Unnamed: 0,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


# Create Simple Barplot

In [278]:
import os
os.chdir('../')
import barplot
imp.reload(barplot)
os.chdir('examples')

A simple barplot can be created by passing three dataframes to `barplot.create_graph`:

* `in_data` - the height of the bars
* `names` - a dataframe containing the hover text for the bars, otherwise identical to `in_data`
* `errors` - a dataframe containing the half-height of the error bars, otherwise identical to `in_data`

In [279]:
# create graph data 
in_data = pd.DataFrame(df.groupby('cyl').mean()['mpg'])
in_data.index = in_data.index.astype(int).astype(str) + ' Cylinders'
print('main data:')
display(in_data.head())

# generate names
l = string.ascii_lowercase
names = in_data.copy()
f = lambda: l[np.random.randint(0,len(l))]

for x in names.index:
    names.loc[x, 'mpg'] = f()+f()
    
print('names:')
display(names.head())

# generate error bars data
errors = in_data.copy()
errors['mpg'] = 2.5
print('errors:')
display(errors.head())

main data:


Unnamed: 0,mpg
4 Cylinders,26.663636
6 Cylinders,19.742857
8 Cylinders,15.1


names:


Unnamed: 0,mpg
4 Cylinders,rq
6 Cylinders,sb
8 Cylinders,tb


errors:


Unnamed: 0,mpg
4 Cylinders,2.5
6 Cylinders,2.5
8 Cylinders,2.5


A simple graph can be quickly created to verify that the data is as expected:

In [280]:
# create input data for graph 
args = dict(
    in_data=in_data
)

# view plot inline 
fig = barplot.create_graph(**args)

Now that the graph appears to be as expected, more characteristics can be added by adding them to `args`:

In [281]:
# add additional characteristics to graph
title = '<b>Fuel Mileage by Number of Cylinders</b>'
title += '<br><i>for mtcars data</i>'
args['title'] = title
args['names'] = names
args['errors'] = errors
args['xlab'] = 'Number of Cylinders'
args['ylab'] = 'Miles Per Gallon'
args['annotations'] = [{'text':'More cylinders correlates to lower fuel mileage', 'x':1.5, 'y':24.5, 'showarrow':False}]

Preview the results again:

In [282]:
# view plot inline 
fig = barplot.create_graph(**args)

After creating a graph, it can be written to an html file by passing `fig` to `barplot.output_graph`:

In [283]:
# write graph to html file 
fp = 'barplot-example.html'
barplot.output_graph(fp, fig)

# Create Grouped Barplot

A grouped barplot compares the effect of the same treatment across multiple categories.

The next graph will show the relationship between fuel mileage, the number of cylinders and the number of gears for cars.

For grouped barplots, dataframes can be passed where the rows represent the x-axis categories and the columns represent each bar in each category. 

In [284]:
# create data for grouped barplot
in_data = df.groupby(['cyl', 'gear']).mean()[['mpg']].reset_index()

in_data = pd.pivot_table(
    data=in_data,
    columns=['gear'],
    index=['cyl']
)
in_data.columns = ['3 gears', '4 gears', '5 gears']

in_data = in_data.fillna(in_data.loc[8].mean())
in_data.index = in_data.index.astype(str) + ' Cylinders'
print('main data:')
display(in_data)

# create names
names = in_data.copy()

for row in names.index:
    for col in names.columns:
        names.loc[row, col] = f()+f()
        
print('names:')
display(names)

# create error bars 
errors = in_data.copy()

for col in errors.columns:
    errors[col] = 0.75
    
print('errors:')
display(errors)

main data:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,21.5,26.925,28.2
6 Cylinders,19.75,19.75,19.7
8 Cylinders,15.05,15.225,15.4


names:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,jd,lu,dr
6 Cylinders,ot,wc,da
8 Cylinders,dk,nw,is


errors:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,0.75,0.75,0.75
6 Cylinders,0.75,0.75,0.75
8 Cylinders,0.75,0.75,0.75


Get a quick visual of the data:

In [285]:
# create args
args = {'in_data':in_data}

In [286]:
fig = barplot.create_graph(**args)

Now add more detail by adding elements to `args`:

In [287]:
# add additional characteristics to graph
title = '<b>Fuel Mileage by Number of Cylinders and Number of Gears</b>'
title += '<br><i>for mtcars data</i>'
args['title'] = title
args['names'] = names
args['errors'] = errors
args['xlab'] = 'Number of Cylinders'
args['ylab'] = 'Miles Per Gallon'
args['annotations'] = [{'text':'More gears correlate to better fuel mileage for cars with 4 cylinder engines',
                        'x':0.45, 'y':28, 'ax':250, 'ay':25, 'showarrow':True}]

fig = barplot.create_graph(**args)

This looks okay with the default colors, but the main point of the graph would be more immediately visible if the "4 Cylinder" bargroup was a different shade of color than the other bargroups. 

New colors were generated using [coolors.co](https://coolors.co) and tints of the new colors were created on [color-hex.com](www.color-hex.com).

A new dataframe `colors` can be created in a similar fashion to `in_data`, `names` and `errors`:

In [288]:
# create new colors
colors = pd.DataFrame({
           '3 gears':['#9195b2']*3,
           '4 gears':['#969694']*3,
           '5 gears':['#c1c991']*3
       }, index=in_data.index)

colors.loc['4 Cylinders'] = ['#232C65', '#2D2D2A', '#849324']

args['colors'] = colors
print('colors:')
colors

colors:


Unnamed: 0,3 gears,4 gears,5 gears
4 Cylinders,#232C65,#2D2D2A,#849324
6 Cylinders,#9195b2,#969694,#c1c991
8 Cylinders,#9195b2,#969694,#c1c991


In [289]:
fig = barplot.create_graph(**args)

In [290]:
# write graph to html file 
fp = 'grouped-barplot-example.html'
barplot.output_graph(fp, fig)

# Create Scatterplot

In [291]:
os.chdir('../')
import scatterplot
imp.reload(scatterplot)
os.chdir('examples')

In [292]:
# create main data 
in_data = df[['hp', 'mpg']].copy()
in_data.set_index('hp', inplace=True)

print('main data:')
display(in_data.head())

# create names
n = (df[['cyl', 'carb', 'gear', 'wt']].apply(
      lambda x: '# Cylinders: %s<br># Carbs: %s<br># Gears: %s<br>Weight: %s' % (x['cyl'], x['carb'], 
                                                                                   x['gear'], x['wt']),
      axis=1
    )
  ).copy()

n = n.rename('mpg')

names = in_data.reset_index().copy()
names['mpg'] = n
names.set_index('hp', inplace=True)

print('names: ')
display(names.head())

# create colors 
colors = in_data.copy()
colors.loc[:, :] = '#C14953'

print('colors: ')
display(colors.head())

main data:


Unnamed: 0_level_0,mpg
hp,Unnamed: 1_level_1
110,21.0
110,21.0
93,22.8
110,21.4
175,18.7


names: 


Unnamed: 0_level_0,mpg
hp,Unnamed: 1_level_1
110,# Cylinders: 6.0<br># Carbs: 4.0<br># Gears: 4...
110,# Cylinders: 6.0<br># Carbs: 4.0<br># Gears: 4...
93,# Cylinders: 4.0<br># Carbs: 1.0<br># Gears: 4...
110,# Cylinders: 6.0<br># Carbs: 1.0<br># Gears: 3...
175,# Cylinders: 8.0<br># Carbs: 2.0<br># Gears: 3...


colors: 


Unnamed: 0_level_0,mpg
hp,Unnamed: 1_level_1
110,#C14953
110,#C14953
93,#C14953
110,#C14953
175,#C14953


In [293]:
args = {'in_data':in_data}
fig = scatterplot.create_graph(**args)

In [294]:
# build graph args 
args['names'] = names
args['colors'] = colors
args['title'] = '<b>Fuel Mileage as a Function of Horsepower</b><br><i>for mtcars data</i>'
args['xlab'] = 'Horsepower'
args['ylab'] = 'Fuel Mileage (mpg)'

# display plot
fig = scatterplot.create_graph(**args)

In [295]:
# create main data 
in_data = df[['hp', 'mpg']].copy()
in_data.set_index('hp', inplace=True)
in_data['mpg_treat'] = (in_data['mpg'] + 
                        np.random.normal(loc=(5+in_data['mpg'].mean()), 
                                         scale=in_data['mpg'].std()*2, 
                                         size=len(in_data)))

print('main data:')
display(in_data.head())

# create names
n = (df.reset_index()[['cyl', 'carb', 'gear', 'wt', 'index']].apply(
      lambda x: 'Car ID %s<br># Cylinders: %s<br># Carbs: %s<br># Gears: %s<br>Weight: %s' % (int(x['index']), x['cyl'],
                                                                                              x['carb'], 
                                                                                              x['gear'], x['wt']),
      axis=1
    )
  ).copy()

n = n.rename('mpg')

names = in_data.reset_index().copy()
names['mpg'] = n
names['mpg_treat'] = n
names.set_index('hp', inplace=True)

print('names: ')
display(names.head())

# create colors 
colors = in_data.copy()
colors.loc[:, 'mpg'] = '#C14953'
colors.loc[:, 'mpg_treat'] = '#232C65'

print('colors: ')
display(colors.head())

main data:


Unnamed: 0_level_0,mpg,mpg_treat
hp,Unnamed: 1_level_1,Unnamed: 2_level_1
110,21.0,56.332648
110,21.0,47.536327
93,22.8,44.873699
110,21.4,59.011567
175,18.7,47.625962


names: 


Unnamed: 0_level_0,mpg,mpg_treat
hp,Unnamed: 1_level_1,Unnamed: 2_level_1
110,Car ID 0<br># Cylinders: 6.0<br># Carbs: 4.0<b...,Car ID 0<br># Cylinders: 6.0<br># Carbs: 4.0<b...
110,Car ID 1<br># Cylinders: 6.0<br># Carbs: 4.0<b...,Car ID 1<br># Cylinders: 6.0<br># Carbs: 4.0<b...
93,Car ID 2<br># Cylinders: 4.0<br># Carbs: 1.0<b...,Car ID 2<br># Cylinders: 4.0<br># Carbs: 1.0<b...
110,Car ID 3<br># Cylinders: 6.0<br># Carbs: 1.0<b...,Car ID 3<br># Cylinders: 6.0<br># Carbs: 1.0<b...
175,Car ID 4<br># Cylinders: 8.0<br># Carbs: 2.0<b...,Car ID 4<br># Cylinders: 8.0<br># Carbs: 2.0<b...


colors: 


Unnamed: 0_level_0,mpg,mpg_treat
hp,Unnamed: 1_level_1,Unnamed: 2_level_1
110,#C14953,#232C65
110,#C14953,#232C65
93,#C14953,#232C65
110,#C14953,#232C65
175,#C14953,#232C65


In [312]:
# build graph args 
args['in_data'] = in_data
args['names'] = names
args['colors'] = colors
args['title'] = '<b>Fuel Mileage as a Function of Horsepower</b><br><i>for mtcars data</i>'
args['xlab'] = 'Horsepower'
args['ylab'] = 'Fuel Mileage (mpg)'
text = 'Fuel mileage upgrade radically improved mpg of most cars'
c1 = {'text':'Car 27 before upgrade', 'x':113, 'y':30.4, 'showarrow':True, 'ax':50, 'ay':20}
c2 = {'text':'Car 27 after upgrade', 'x':113, 'y':64.30, 'showarrow':True, 'ax':100, 'ay':0}
args['annotations'] = [{'text':text, 'x':275, 'y':57, 'showarrow':False}, c1, c2]

# display plot
fig = scatterplot.create_graph(**args)

In [297]:
# write graph to html file 
fp = 'scatterplot-example.html'
barplot.output_graph(fp, fig)