In [1]:
import os

from pandas import DataFrame, date_range
import numpy as np

from idealreport.settings import load_settings
from idealreport import create_html
from idealreport.reporter import Reporter
import htmltag

In [2]:
# load settings from settings.hjson
settings = load_settings()

# instantiate reporter object and set output location
output_file = os.path.join(settings['output_path'], 'sample_plots.html')
r = Reporter(title='Sample Plots', output_file=output_file)

# report start
r.h += htmltag.h3('This report provides an overview of the different plot types.')

In [3]:
# vertical bar chart

ch = DataFrame({'Entity': ['Entity 1', 'Entity 2', 'Entity 3', 'Entity 4', 'Entity 5'],
                  'Stat 1': np.random.randn(5).tolist(),
                  'Stat 2': np.random.randn(5).tolist(),
                })

# note the x values are generally assumed to be in the index, with each non index column being a series of y values
# a few exceptions are outlined below
ch = ch.set_index('Entity')
ch = ch.sort_values(by='Stat 1', ascending=False)

# color choices for bars
markers = [
    dict(color = 'rgb(49, 105, 196)'),
    dict(color = 'rgb(195, 197, 201)')
]

r.h += htmltag.h4('Bar Charts')

# plotting calls are of the form r.plot.plot_type
# a look through the plotter class in reporter.py will show the available plot types and arguments for each
# in all single plot type cases, the only required argument is df, though for multi a types array with plot types is also required
r.plot.bar(df=ch, title='Vertical Bar Chart', xlabel='Entity', ylabel='%', markers=markers,)

{'data': [{'df':             Stat 1    Stat 2
   Entity                      
   Entity 3  1.183069  0.891145
   Entity 5  0.623211  1.838676
   Entity 1  0.182284 -0.931104
   Entity 2 -0.396161 -0.070686
   Entity 4 -1.035964 -0.306904, 'orientation': 'v', 'type': 'bar'}],
 'markers': [{'color': 'rgb(49, 105, 196)'}, {'color': 'rgb(195, 197, 201)'}],
 'title': 'Vertical Bar Chart',
 'x': {'label': 'Entity'},
 'y': {'label': '%'}}

In [4]:
# stacked horizontal bar chart, notice additional entry in the data field for orientation

ch = DataFrame({'Entity': ['Entity 1', 'Entity 2', 'Entity 3aaaaaaaaaaaaaaaa', 'Entity 4', 'Entity 5'],
                'Foo': np.random.randn(5).tolist(),
                'Bar': np.random.randn(5).tolist(),
               })
ch = ch.set_index('Entity')
ch = ch[['Foo', 'Bar']] # make sure columns have desired order

markers = [
    dict(color = 'rgb(49, 105, 196)'),
    dict(color = 'rgb(195, 197, 201)')
]

# stacked bars behave oddly with mixed positive / negative numbers, so only use when all numbers are positive or negative within a series
ch = ch.abs()
ch = ch*100

# all reporter plots take a layout argument which allows you to specify any element that would appear in plotly's api
# here we're adjusting the left margin of the graph (layout['margin']['l']) to 150 to make room for the long Entity 3 name.
r.plot.barh(df=ch, title='Horizontal Stacked Bar Chart', stacked=True, xlabel='$', markers=markers, layout={'margin':{'l':150}})

{'data': [{'df':                                  Foo         Bar
   Entity                                          
   Entity 1                   31.322985   66.191138
   Entity 2                  194.987613  147.660242
   Entity 3aaaaaaaaaaaaaaaa   94.302829  136.662043
   Entity 4                  227.417545    8.358532
   Entity 5                   51.465991  273.755179,
   'orientation': 'h',
   'type': 'stackedBar'}],
 'layout': {'margin': {'l': 150}},
 'markers': [{'color': 'rgb(49, 105, 196)'}, {'color': 'rgb(195, 197, 201)'}],
 'title': 'Horizontal Stacked Bar Chart',
 'x': {'label': '$'}}

In [5]:
# horizontal overlay bar chart

ch = DataFrame({'Entity': ['Entity 1', 'Entity 2', 'Entity 3', 'Entity 4', 'Entity 5'],
                'Foo': np.random.randn(5).tolist(),
                'Bar': np.random.randn(5).tolist(),
               })
ch = ch.set_index('Entity')
ch = ch[['Foo', 'Bar']] # make sure columns have desired order

markers = [
    dict(color = 'rgb(195, 197, 201)'),
    dict(color = 'rgb(49, 105, 196)'),
]

# bar charts take in an argument called width which determines the size of the bars
# for line charts, shown below, this is controlled by an entry in the lines dict for width
# for scatter plots, this is controlled by an entry in the markers dict for size
widths = [.4, .2]

# the overlay bar charts also takes opacity an argument,
# generally the "focus" bar should be the second column so that it is drawn over the first bar
opacities = [.6, 1]

r.plot.baro(df=ch, title='Vertical Overlay Bar Chart', orientation='v', xlabel='$', markers=markers, widths=widths, opacities=opacities)

{'data': [{'df':                Foo       Bar
   Entity                      
   Entity 1  0.014073  0.135831
   Entity 2 -0.769031 -2.317524
   Entity 3 -0.547689 -1.341522
   Entity 4  0.262557 -0.390635
   Entity 5 -0.698583  1.238023, 'orientation': 'v', 'type': 'overlayBar'}],
 'markers': [{'color': 'rgb(195, 197, 201)'}, {'color': 'rgb(49, 105, 196)'}],
 'opacities': [0.6, 1],
 'title': 'Vertical Overlay Bar Chart',
 'widths': [0.4, 0.2],
 'x': {'label': '$'}}

In [6]:
# histogram, note that we don't use the index for x values here because there is no x value other than the series

ch = DataFrame({'Foo': np.random.randn(50).tolist(),
                'Bar': np.random.randn(50).tolist(),
               })
# offset Bar so that there's two different distros to observe
ch['Bar'] = ch['Bar'] + 2

markers = [
    dict(color = 'rgb(49, 105, 196)'),
    dict(color = 'rgb(195, 197, 201)')
]

r.plot.histo(df=ch, title='Histogram', ylabel='Observations', markers=markers)

{'data': [{'df':          Bar       Foo
   0   3.073037  0.904087
   1   1.473410  1.015603
   2   2.577424  0.700009
   3   0.819632 -1.231333
   4   1.132809 -0.197329
   5   0.427007  0.533504
   6   2.136522 -0.412389
   7  -0.093505  0.220879
   8   2.167225 -0.453532
   9   0.881848 -1.738121
   10  2.158311 -1.023290
   11  1.429552 -0.804124
   12  2.176125 -1.135443
   13  0.855590  1.878641
   14  0.592761  1.358759
   15  3.017704 -0.565086
   16  2.765584  2.052057
   17  1.369307  1.496466
   18  1.598849  0.303340
   19  2.788679 -0.615061
   20  1.092686 -2.836726
   21 -0.116466 -0.366771
   22  1.870622  0.046790
   23  3.386059  1.559299
   24  1.280819 -1.819805
   25  1.797441  0.900713
   26  1.567624  0.090130
   27  2.792769 -1.829891
   28  1.540394 -1.315953
   29  2.019702 -2.228283
   30  1.707993 -0.046544
   31  1.954276  0.260083
   32  1.286379  0.929812
   33  1.842721  2.665109
   34  1.778914  0.563152
   35  2.679064  0.166228
   36  1.344468  0.52841

In [7]:
# line chart

ch = DataFrame(np.random.randn(20, 3))
ch.columns = ['a', 'b', 'c']
ch.a = range(20)
ch = ch.set_index('a')

# as mentioned above, the lines are governed by a lines attribute instead of markers, width is set in the dict
lines = [
    dict(width = 7, color = 'rgb(49, 105, 196)'),
    dict(width = 3, color = 'rgb(195, 197, 201)')
]

r.h += htmltag.h4('Line Charts')
r.plot.line(df=ch, title='Line Chart', xlabel='Entity', ylabel='%', lines=lines)

{'data': [{'df':            b         c
   a                     
   0   0.990402  1.632861
   1   0.481092  0.024608
   2  -1.318514 -1.331186
   3   0.703543  0.478658
   4  -0.814278 -0.853375
   5   1.160725 -0.717977
   6   0.892805  1.026317
   7  -0.478367  0.571194
   8   0.145783 -0.680056
   9   2.383244 -1.916373
   10  0.780324 -0.215694
   11 -0.632555 -0.756455
   12  0.314141 -1.027768
   13  0.101850 -0.045293
   14 -0.318608 -0.687100
   15 -1.731567 -0.126542
   16 -0.867152 -0.106821
   17 -0.038730 -1.369861
   18  0.111096  2.769952
   19  0.214725 -0.559248, 'type': 'line'}],
 'lines': [{'color': 'rgb(49, 105, 196)', 'width': 7},
  {'color': 'rgb(195, 197, 201)', 'width': 3}],
 'title': 'Line Chart',
 'x': {'label': 'Entity'},
 'y': {'label': '%'}}

In [8]:
# time series

ind = date_range('2017-11-02 9:00', periods=20, freq='T')
ch = DataFrame(data=np.random.randn(20,2), index=ind)
ch.columns = ['a', 'b']

lines = [
    dict(width = 7, color = 'rgb(49, 105, 196)'),
    dict(width = 3, color = 'rgb(195, 197, 201)')
]

# just like a regular line except the index is timestamps
r.plot.time(df=ch, title='Time Series', xlabel='Time', ylabel='%', lines=lines)

{'data': [{'df':                             a         b
   2017-11-02 09:00:00 -0.709932  0.393868
   2017-11-02 09:01:00  0.459014  1.484748
   2017-11-02 09:02:00 -0.345662 -0.243558
   2017-11-02 09:03:00  0.343510 -0.136734
   2017-11-02 09:04:00 -0.337133  1.180340
   2017-11-02 09:05:00 -0.408412 -1.430739
   2017-11-02 09:06:00  1.022540 -1.064204
   2017-11-02 09:07:00 -0.610088  0.431854
   2017-11-02 09:08:00  0.219384  0.143303
   2017-11-02 09:09:00  0.054792  0.359751
   2017-11-02 09:10:00  0.567646 -1.514159
   2017-11-02 09:11:00  1.668475  0.195816
   2017-11-02 09:12:00  0.201720  0.662347
   2017-11-02 09:13:00 -0.733905  0.500265
   2017-11-02 09:14:00  0.335266  0.294612
   2017-11-02 09:15:00  0.483769 -0.158266
   2017-11-02 09:16:00 -0.455206 -1.348918
   2017-11-02 09:17:00 -1.314095 -0.593311
   2017-11-02 09:18:00 -0.271803 -0.253321
   2017-11-02 09:19:00 -0.667933  1.972306, 'type': 'line'}],
 'lines': [{'color': 'rgb(49, 105, 196)', 'width': 7},
  {'color

In [9]:
# scatter plot

ch = DataFrame(np.random.randn(20, 3))
ch.columns = ['a', 'b', 'c']
ch = ch.set_index('a')

# marker sizes can also be set like the line widths, though the parameter name is different
markers = [
    dict(size = 7, color = 'rgb(49, 105, 196)'),
    dict(size = 3, color = 'rgb(195, 197, 201)')
]

# example of a more customized layout
layout = dict(font=dict(family='Arial', color='#77797c'),
    xaxis=dict(
        showgrid=False,
        showline=False,
        zerolinecolor='#acadaf',
        ticks='outside',
        tickcolor='#acadaf',
        hoverformat='.2f',
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=True,
        showline=False,
        zerolinecolor='#acadaf',
        linewidth=10,
        ticks='',
        tickcolor='#acadaf',
        hoverformat='.2f',
    ),
    width=500,
    height=500,
    showlegend=False,
)

r.h += htmltag.h4('Scatter Plots')
r.plot.scatter(df=ch, title='Scatter Plot', xlabel='alpha', ylabel='beta', markers=markers, layout=layout)

{'data': [{'df':                   b         c
   a                            
    1.144012 -2.177856  0.609624
   -2.291964 -0.277785 -0.542865
   -1.194985 -0.067735  1.072220
    0.285138 -0.534796  0.106205
    0.906407  0.351988 -0.390174
    0.154964 -0.746625 -0.646705
   -0.089955 -0.168454 -0.799726
    1.253099  0.873778 -0.678101
   -0.109073  0.215700  0.606628
   -0.174019 -0.845089 -0.311641
    0.148059  0.127886  0.372320
    0.614554 -0.700881  1.687844
   -0.713188 -1.974645 -0.273551
   -1.461865 -0.345786  0.193840
   -0.208409  1.610228  0.000320
   -0.465931 -0.464113  0.864026
    0.573212 -0.535887  0.022201
    0.898636  0.245686 -1.903328
   -0.102525  0.093603  0.058836
   -1.009491 -1.682843 -2.114481, 'type': 'scatter'}],
 'layout': {'font': {'color': '#77797c', 'family': 'Arial'},
  'height': 500,
  'showlegend': False,
  'width': 500,
  'xaxis': {'hoverformat': '.2f',
   'showgrid': False,
   'showline': False,
   'tickcolor': '#acadaf',
   'ticks': 'out

In [10]:
# univariate plots
# note these are just scatter plots with x or y values of 0 and the mode of markers+text (and accompanying text and text position)

ch = DataFrame(np.random.randn(1,5))
ch.columns = ['a', 'b', 'c', 'd', 'e']
ch = ch + 5
# note index (x) value is 0

# here we use data_static, which has attributes which are the same across each trace (column of a df), note these can be arrays
data_static={'mode':'markers+text', 'textposition':'right',}
# here we use data_to_iterate, which has attributes which differ by trace (column of a df), note these must be arrays, but can be arrays of arrays or dicts
data_to_iterate={'text':['a', 'b', 'c', 'd', 'e']}

# for vertical univariate plots, turn off the xaxis autotick and set height/width appropriately
layout = dict(
    xaxis=dict(
        autorange=True,
        showgrid=False,
        zeroline=True,
        showline=False,
        zerolinecolor='#acadaf',
        linewidth=10,
        autotick=False,
        showticklabels=False        
    ),
    yaxis=dict(
        autorange=True,
        showgrid=False,
        showline=False,
        zerolinecolor='#acadaf',
        ticks='',
        tickcolor='#acadaf',
        showticklabels=True
    ),
    autosize=False,
    width=250,
    height=500,
    showlegend=False
)

r.plot.scatter(df=ch, title='Vertical Univariate Plot', xlabel='alpha', data_static=data_static, data_to_iterate=data_to_iterate, layout=layout)

# flip xaxis / yaxis attriutes and height/width for horizontal version, pass through orientation flag to flip x and y values as well
(layout['xaxis'], layout['yaxis']) = (layout['yaxis'], layout['xaxis'])
(layout['width'], layout['height']) = (layout['height'], layout['width'])
data_static.update({'orientation':'h', 'textposition':'top'})

r.plot.scatter(df=ch, title='Horizontal Univariate Plot', xlabel='alpha', data_static=data_static, data_to_iterate=data_to_iterate, layout=layout)

{'data': [{'data_static': {'mode': 'markers+text',
    'orientation': 'h',
    'textposition': 'top'},
   'data_to_iterate': {'text': ['a', 'b', 'c', 'd', 'e']},
   'df':           a         b         c         d         e
   0  4.343641  4.993759  5.960554  5.234805  5.625695,
   'type': 'scatter'}],
 'layout': {'autosize': False,
  'height': 250,
  'showlegend': False,
  'width': 500,
  'xaxis': {'autorange': True,
   'showgrid': False,
   'showline': False,
   'showticklabels': True,
   'tickcolor': '#acadaf',
   'ticks': '',
   'zerolinecolor': '#acadaf'},
  'yaxis': {'autorange': True,
   'autotick': False,
   'linewidth': 10,
   'showgrid': False,
   'showline': False,
   'showticklabels': False,
   'zeroline': True,
   'zerolinecolor': '#acadaf'}},
 'title': 'Horizontal Univariate Plot',
 'x': {'label': 'alpha'}}

In [11]:
# mixed plot types on one graph

ch1 = DataFrame(np.random.randn(20, 2))
ch1.columns = ['a', 'b1']
ch1.sort_values(by='a', ascending=True, inplace=True)
ch1 = ch1.set_index('a')

ch2 = DataFrame(np.random.randn(20, 2))
ch2.columns = ['a', 'b2']
ch2.sort_values(by='a', ascending=True, inplace=True)
ch2 = ch2.set_index('a')

# set lines attributes, which should have an appropriate entry for each chart type passed through
# note the None second element corresponding to the bar chart which doesn't need a lines attribute
lines = [[dict(color = 'rgb(49, 105, 196)')],None]

# set markers attributes, which should have an appropriate entry for each chart type passed through
# note the leading None element corresponding to the line chart which doesn't need a markers attribute
markers = [None,[dict(color = 'rgb(195, 197, 201)')]]

r.h += htmltag.h4('Mixed Type Plots')
r.plot.multi(dfs=[ch1, ch2], types=['line', 'bar'], title='Mixed Line and Bar Plot', xlabel='x label', ylabel='y label', lines=lines, markers=markers)

{'data': [{'df':                  b1
   a                  
   -1.556704  0.338127
   -1.526207 -0.399867
   -1.203515  0.349654
   -1.092345 -0.630430
   -0.864434  0.319077
   -0.774115 -1.413352
   -0.758220 -0.119254
   -0.644360 -0.457629
   -0.613573  0.502287
   -0.600446 -0.450095
   -0.335596 -1.354252
   -0.216962 -0.255778
   -0.063051 -1.238890
    0.039131 -0.831818
    0.109765  0.089989
    0.202463  1.002452
    0.315407 -0.200002
    0.507911 -0.436506
    0.632426  0.886987
    1.388573  0.051769, 'type': 'line'}, {'df':                  b2
   a                  
   -1.745183  1.294786
   -1.428685 -0.834645
   -1.089194 -2.106651
   -1.072974 -0.006109
   -0.819255  2.033608
   -0.770045  0.545413
   -0.556767  1.601609
   -0.368161  0.181634
   -0.294978 -2.097888
   -0.189504 -0.023087
   -0.144621  0.690123
   -0.139089 -0.028018
    0.223583  0.706060
    0.537057  1.619831
    0.643247  1.016048
    0.716212 -1.371661
    1.027602 -0.085792
    1.043342  1.50416

In [12]:
# different axes

ch1 = DataFrame(np.random.randn(20, 2))
ch1.columns = ['x', 'a']
ch1.x = range(20)
ch1 = ch1.set_index('x')

ch2 = DataFrame(np.random.randn(20, 2))
ch2.columns = ['x', 'b']
ch2.x = range(20)
ch2 = ch2.set_index('x')
ch2.b *= 100

# given the y2 axis, move the legend to the bottom of the plot
layout={'legend':{'orientation':'h', 'x':0.8, 'y':-0.1}}

# y2_axis is a list of booleans indicating whether each df should be tied to the y2 axis or not
r.plot.multi(dfs=[ch1, ch2], types=['line', 'line'], title='Lines with Different Y Axes', xlabel='x', ylabel='ya', y2_axis=[False,True], y2label='yb', layout=layout)

{'data': [{'df':            a
   x           
   0   0.777716
   1  -1.195198
   2   0.485539
   3  -1.419560
   4  -0.625303
   5   1.188285
   6  -0.127215
   7  -2.465663
   8   1.651970
   9  -0.365875
   10 -0.609638
   11 -0.477852
   12 -0.495180
   13 -2.038578
   14  0.187433
   15  1.055543
   16 -1.828510
   17  0.178395
   18 -0.241278
   19 -0.754175, 'type': 'line', 'y2': False}, {'df':              b
   x             
   0    27.696337
   1   -44.669625
   2     8.586555
   3   153.486705
   4  -114.827079
   5    24.244918
   6   232.502448
   7   -16.179824
   8   -92.102167
   9    84.565841
   10   88.384751
   11   63.444970
   12  180.494609
   13   28.815288
   14  145.659864
   15   12.591084
   16   56.879621
   17  -54.504108
   18 -107.513047
   19  112.373336, 'type': 'line', 'y2': True}],
 'layout': {'legend': {'orientation': 'h', 'x': 0.8, 'y': -0.1}},
 'title': 'Lines with Different Y Axes',
 'x': {'label': 'x'},
 'y': {'label': 'ya'},
 'y2': {'label': 'yb

In [13]:
# ohlc

ch = DataFrame(np.random.randn(4, 4))
ch.columns = ['open', 'high', 'low', 'close']
ch['high'] = ch['high'] + 3
ch['low'] = ch['low'] - 3

ch['dealer'] = range(4)
ch = ch.set_index('dealer')

r.h += htmltag.h4('Open High Low Close (OHCL) Plot')
r.plot.ohlc(df=ch, title='OHLC Plot by Reporter', xlabel='price', ylabel='instrument')

{'data': [{'df':             open      high       low     close
   dealer                                        
   0      -0.471603  3.717396 -3.078676 -0.083486
   1      -0.571329  1.825175 -5.005782 -0.563960
   2       2.346187  2.068748 -4.367850 -0.012152
   3       0.005386  3.414091 -2.778425  0.818738, 'type': 'ohlc'}],
 'name': '',
 'title': 'OHLC Plot by Reporter',
 'x': {'label': 'price'},
 'y': {'label': 'instrument'}}

In [14]:
# pie

ch = DataFrame(data=[1,2,3,4], index=['Factor %s' % x for x in range(4)])

r.h += htmltag.h4('Pie Charts')
r.plot.pie(df=ch, title='Pie')

layout = {'height':300}

# only difference for a donut is the hole argument
r.plot.pie(df=ch, title='Donut', hole=.4, layout=layout)

{'data': [{'df':           0
   Factor 0  1
   Factor 1  2
   Factor 2  3
   Factor 3  4, 'hole': 0.4, 'type': 'pie'}],
 'layout': {'height': 300},
 'title': 'Donut'}

In [15]:
# error bar charts

ch = DataFrame(np.random.randn(20, 4))
ch.columns = ['x','y','e1','e2']
ch = ch.set_index('x')
ch['e1'] = ch['e1'].abs()*0.1
ch['e2'] = ch['e2'].abs()*0.1

r.h += htmltag.h4('Error Bar Charts')
# note only first 3 columns are used for symmetric error bars
r.plot.errbar(df=ch, title='Symmetric Error Bars')

# just add the flag symmetric=False, otherwise it defaults to True like in the call above
r.plot.errbar(df=ch, title='Asymmetric Error Bars', symmetric=False)

{'data': [{'df':                   y        e1        e2
   x                                      
    0.033498 -0.432750  0.185082  0.161364
    0.705030 -0.176986  0.055781  0.047916
    0.577655  0.448229  0.047247  0.029080
   -0.099371 -0.663133  0.065437  0.033329
   -1.211581  0.638346  0.221227  0.004522
    0.493397  2.282158  0.065245  0.031686
    1.791339  1.255547  0.204097  0.087693
   -1.507846 -0.693120  0.193778  0.151070
    2.561167 -0.751644  0.017517  0.016506
    1.719107 -0.120300  0.040813  0.101391
   -0.044097  1.263569  0.027598  0.089069
   -0.374768 -1.127725  0.187264  0.080834
    1.134644 -0.093139  0.204749  0.073417
   -0.588315 -0.161157  0.092309  0.012966
    1.200572  0.804095  0.114941  0.003133
   -0.188030 -0.659842  0.040360  0.158333
    0.495575 -0.190960  0.085785  0.044980
    0.208484  0.410602  0.008812  0.000374
    0.596538 -0.084695  0.109602  0.126610
    0.102068  0.442582  0.208447  0.188481,
   'errorBars': {'symmetric': False},
 

In [16]:
# error line plots

ch = DataFrame({'day': ['1', '2', '3', '4', '5', '6', '7', '8'],
                'mean': [15, 13, 7, 18, 19, 24, 25, 28],
                'stdev': [1, 2, 1.3, 1, 2.5, .5, 1.5, .75]})
ch = ch[['day', 'mean', 'stdev']] # make sure columns have desired order
ch = ch.set_index('day')

r.h += htmltag.h4('Error Line Plots')
r.plot.errline(df=ch, title='Symmetric Error Bars')

{'data': [{'df':      mean  stdev
   day             
   1      15   1.00
   2      13   2.00
   3       7   1.30
   4      18   1.00
   5      19   2.50
   6      24   0.50
   7      25   1.50
   8      28   0.75,
   'fillcolor': 'rgba(0,100,80,0.2)',
   'type': 'continuousErrorBars'}],
 'title': 'Symmetric Error Bars'}

In [17]:
# generate and save the report HTML
r.generate()

saved report to /home/jason/ideal/reports/sample_report/output/sample_plots.html
