In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
from hcplot import loadLibraries, Figure, Points, mapping, single, grid, matrix, wrap, scales
from hcplot.scale import brewer
from hcplot.color import ColorBrewer, D3Colors

In [4]:
loadLibraries()

<IPython.core.display.Javascript object>

In [5]:
from ggplot import mpg, mtcars, diamonds

In [None]:
%%sh 
cd /tmp
wget -q https://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv

In [6]:
import pandas as pd
iris = pd.read_csv('/tmp/iris.csv')

In [7]:
fig1 = Figure(mpg, 
              mapping("displ", "hwy", color="cyl"),
              wrap(["class"], ncols=3, scales="fixed"),
              width=1024)
fig1 + Points(size=3)
fig1

class : 2seater,class : compact,class : midsize
,,
class : minivan,class : pickup,class : subcompact
,,
class : suv,,
,,


In [8]:
fig2 = Figure(mpg, 
              mapping("displ", "cty"), 
              layout=grid(["drv"], ["cyl"], labels=True, scales="fixed"), 
              width=1024)
fig2 + Points()
fig2

cyl : 4,cyl : 5,cyl : 6,cyl : 8,Unnamed: 4
,,,,drv : 4
,,,,drv : f
,,,,drv : r


In [9]:
fig3 = Figure(mtcars, 
              mapping("mpg", "wt"), 
              layout=grid(["vs","am"], ["gear"], scales="fixed"), 
              width=800, ratio=0.75)
fig3 + Points()
fig3

gear : 3,gear : 4,gear : 5,Unnamed: 3,Unnamed: 4
,,,vs : 0,am : 0
,,,vs : 0,am : 1
,,,vs : 1,am : 0
,,,vs : 1,am : 1


In [18]:
fig4 = Figure(iris, 
              mapping(["PetalLength", "PetalWidth", "SepalWidth", "SepalLength"], color="Name", shape="Name"),
              matrix(scales="free"),
              width=960, ratio=1)
fig4 + Points(scales=scales(color=brewer("qual", "Accent")), size=3)
fig4

x = PetalLength,x = PetalWidth,x = SepalWidth,x = SepalLength,Unnamed: 4
,,,,y = SepalLength
,,,,y = SepalWidth
,,,,y = PetalWidth
,,,,y = PetalLength


In [12]:
fig5 = Figure(iris, 
              mapping(x="PetalLength", y="SepalWidth", color="Name"),
              width=256, ratio=1)
fig5 + Points()
fig5

# LAYER, POINT

In [None]:
from hcplot.components import Components

class Layer(object):
    
    def __init__(self, data, mapping, scales, position, dropNa, showLegend, **kwargs):
        self.data = data
        self.mapping = mapping
        self.scales = scales
        self.position = position
        self.dropNa = dropNa
        self.showLegend = showLegend
        for k,v in kwargs.items():
            setattr(self, k, v)
        
    def setFigure(self, figure):
        self.figure = figure

        for attr in ["mapping", "scales"]:
            if getattr(self, attr) is None:
                setattr(self, attr, getattr(figure, attr))
            else:
                tmpAttr = getattr(figure, attr).copy()
                tmpAttr.update(getattr(self, attr))
                setattr(self, attr, tmpAttr)

class Points(Layer):

    def __init__(self, data=None, mapping=None, scales=None, position=None, dropNa=False, showLegend=False, color=None, size=None, shape=None):
        super(__class__, self).__init__(data, mapping, scales, position, dropNa, showLegend, color=color, size=size, shape=shape)
        self.options = { "type": "scatter", "marker": {"radius": 2 if size is None else size}}

        
    def prepareData(self, df, mx, my):
        mapping = {"x":mx, "y":my}
        for k,v in self.mapping.items():
            if k not in ["x", "y"] and v is not None:
                mapping[k] = v
        names = list(mapping.keys())
        cols = list(mapping.values())
                
        if df.shape[0] > self.figure.performanceTreshold:
            df2 = df[cols]
            return df2.to_dict("split")["data"]
        else:
            df2 = df[cols].copy()
            df2.columns = names

            for attr in ["color", "shape", "size"]:
                if attr in names:
                    df2[attr] = df2[attr].astype('category')
                    count = df2[attr].cat.categories.size
                    df2[attr] = df2[attr].cat.rename_categories(self.scales[attr](count))

            data = df2.to_dict("split")["data"]

            result = [Components.point(names, values) for values in data]

            return result



# FIGURE

In [None]:
from hcplot.data import GroupedData
from hcplot.utils import ScipyEncoder, update
from hcplot.utils import scales as defaultScales
from hcplot.utils import single as defaultSingle

from hcplot.components import Components
from hcplot.templates import createGrid, createWrap

from IPython.display import Javascript, HTML, display
from uuid import uuid4
from textwrap import dedent
import math
import json
from operator import itemgetter


class Figure(object):
    
    def __init__(self, data, mapping, layout=None, scales=None, coord=None,
                 width=1024, ratio=2/3, performanceTreshold=1000):
        self.id = str(uuid4())
        self.rawData = data

        self.mapping = mapping        
        self.scales = update(scales, defaultScales())
        self.layout = update(layout, defaultSingle())

        layoutType = self.layout.get("type")
        
        if layoutType == "single":
            self.data = GroupedData(data)
            self.count = 1
            self.rows = 1
            self.cols = 1

        elif layoutType == "wrap":
            nrows = self.layout.get("nrows")
            ncols = self.layout.get("ncols")

            assert nrows is None or ncols is None, "Set either nrows or ncols in wrap layout"

            if nrows is None and ncols is None:
                nrows = 1

            self.data = GroupedData(data, colDims=self.layout.get("y"))
            self.count = self.data.getShape()[1]

            if ncols is None:
                self.rows = nrows
                self.cols = math.ceil(self.count / nrows)
            elif nrows is None:
                self.rows = math.ceil(self.count / ncols)
                self.cols = ncols

        elif layoutType == "grid":
            assert  isinstance(self.mapping["x"], str), "'x' has to be a column name"
            assert  isinstance(self.mapping["y"], str), "'y' has to be a column name"

            self.data = GroupedData(data, rowDims=self.layout.get("x"), colDims=self.layout.get("y"))
            self.rows = self.data.getShape()[0]
            self.cols = self.data.getShape()[1]
            self.count = self.rows * self.cols

        elif layoutType == "matrix":
            if self.mapping["y"] is None:
                self.mapping["y"] = self.mapping["x"].copy()
                self.mapping["y"].reverse()
                
            assert  isinstance(self.mapping["x"], (list, tuple)), "'x' has to be a list of column names [..., ]"
            assert  isinstance(self.mapping["y"], (list, tuple)), "'y' has to be a list of column names [..., ]"

            self.data = GroupedData(data)
            self.cols = len(self.mapping.get("x"))
            self.rows = len(self.mapping.get("y"))
            self.count = self.rows * self.cols

        else:
            raise ValueError("Unknow layout type %s", layoutType)

        self.coord = coord
        
        self.width = width
        self.ratio = ratio
        self.performanceTreshold = performanceTreshold

        self.layers = []


    def __add__(self, layers):
        layers.setFigure(self)
        self.layers.append(layers)
        return self


    def createContainer(self, layoutType, xScaleFree, yScaleFree, labelHeight):
        if layoutType == "single":
            html = createGrid(self.id, self.width, self.ratio, 1, 1)
                
        elif layoutType in ["grid", "wrap", "matrix"]:
            
            if self.layout.get("labels"):
                if layoutType == "matrix":
                    rowLabels = [["y = %s" % el] for el in self.mapping.get("y")]
                    colLabels = [["x = %s" % el] for el in self.mapping.get("x")]
                else:
                    rowLabels = self.data.getRowLabels() if layoutType == "grid" else []
                    colLabels = self.data.getColLabels()

                if layoutType == "wrap":
                    html = createWrap(self.id, self.width, self.ratio, self.rows, self.cols, self.count,
                                      colLabels, labelHeight, allYAxisLabels=yScaleFree)
                else:
                    html = createGrid(self.id, self.width, self.ratio, self.rows, self.cols,
                                      rowLabels, colLabels, labelHeight, 
                                      allXAxisLabels=xScaleFree, allYAxisLabels=yScaleFree)
            else:
                html = createGrid(self.id, self.width, self.ratio, self.rows, self.cols, 
                                  allXAxisLabels=xScaleFree, allYAxisLabels=yScaleFree)
        
        else:
            raise NotImplementedError("Not implemented yet")

        return html

    
    def getDataSlice(self, row, col):
        return self.data.getDataByIndex(col, row)

    
    def getMinMax(self, layoutType, xScaleFree, yScaleFree):
        xmin = ymin = xmax = ymax = None
        if not (xScaleFree and yScaleFree):
            if layoutType == "matrix":
                allCols = list(set(self.mapping["x"]).union(set(self.mapping["y"])))
                minMax = [self.data.getMinMax(col) for col in allCols]
                if not xScaleFree:
                    xmin = ymin = min(minMax, key=itemgetter(0))[0]
                if not yScaleFree:
                    xmax = ymax = max(minMax, key=itemgetter(1))[1]
            else:
                if not xScaleFree:
                    xmin, xmax = self.data.getMinMax(self.mapping["x"])
                if not yScaleFree:
                    ymin, ymax = self.data.getMinMax(self.mapping["y"])
        return xmin, ymin, xmax, ymax


    def createChart(self):

        layoutType = self.layout.get("type", "single")
        scaleType  = self.layout.get("scales", "fixed")
        labelHeight = self.layout.get("labelHeight", 20)

        xScaleFree = (scaleType in ["free", "free_x"]) # if layoutType != "wrap" else True
        yScaleFree = (scaleType in ["free", "free_y"])

        xmin, ymin, xmax, ymax = self.getMinMax(layoutType, xScaleFree, yScaleFree)

        html = self.createContainer(layoutType, xScaleFree, yScaleFree, labelHeight)

        html += dedent("""
        <script>
            window.hc_charts.promise.then(function(HC) {
        """)

        i = 0

        for row in range(self.rows):
            for col in range(self.cols):

                fig = Components().figure(zoomType="xy", exporting=False, title=None, legend=False)

                fig.addXAxis(title=None, max=xmax, min=xmin, lineWidth=1, tickWidth=1, gridLineWidth=1)
                fig.addYAxis(title=None, max=ymax, min=ymin, lineWidth=1, tickWidth=1, gridLineWidth=1)
                    
                if xScaleFree or row == self.rows - 1 or layoutType == "wrap":
                    fig.updateChart(marginBottom=40, height=(self.width//self.cols)*self.ratio+30)
                else:
                    fig.updateXAxis(labels=False)
                
                if yScaleFree or col == 0:
                    fig.updateChart(marginLeft=50,   width=self.width//self.cols+40)
                else:
                    fig.updateYAxis(labels=False)

                if i != self.count - 1:
                    fig.updateFigure(credits=False)

                if i < self.count:
                    mx, my = self.mapping["x"], self.mapping["y"]

                    if layoutType == "single":
                        data = self.getDataSlice(None, None)

                    elif layoutType == "wrap":
                        data = self.getDataSlice(0, i)

                    elif layoutType == "grid":
                        data = self.getDataSlice(row, col)

                    elif layoutType == "matrix":
                        mx = mx[col]
                        my = my[row]
                        data = self.getDataSlice(None, None)

                    isEmpty = not (len(data) > 0)
                    for layer in self.layers:
                        data = layer.prepareData(data, mx, my)
                        fig.addSeries(my, data, layer.options)

                    container = "hc_%s_%d-%d" % (self.id, row, col)
                    if isEmpty and xScaleFree and yScaleFree:
                        html += dedent("""
                        //      console.log("%d-%d")
                        """) % (row, col)
                    else:
                        html += dedent("""
                        //      console.log("%d-%d")
                                HC.chart("%s", %s);
                        """) % (row, col, container, json.dumps(fig.get(), cls=hcplot.utils.ScipyEncoder))

                    i += 1

        html += dedent("""
            });
        </script>
        """)

        return html
    
    
    def _repr_html_(self):
        return self.createChart()



# SCALE

In [None]:
from hcplot.colors import ColorBrewer, D3Colors
from hcplot.shapes import Shape

def brewer(typ, palette):
    return lambda size: getattr(ColorBrewer, typ)(palette, size)

def d3(typ):
    return getattr(D3Colors, typ)

def shapes():
    return Shape.get

In [None]:
# brewer("seq", "OrRd")(4)
shapes()(5)

# UTILS

In [None]:

def update(d, defaults):
    def2 = defaults.copy()
    if d is not None:
        def2.update(d)
    return def2

# Mapping Helpers

def mapping(x, y=None, color=None, shape=None, size=None):
    return {"x":x, "y":y, "color":color, "shape":shape, "size":size}


# Layout Helpers

def single():
    return {"type":"single"}

def grid(x, y, labels=True, scales="fixed", labelHeight=20):
    return {"type":"grid", "x":x, "y":y, "scales":scales, "labels":labels, "labelHeight":labelHeight}

def wrap(y, nrows=None, ncols=None, labels=True, scales="fixed", labelHeight=20):
    return {"type":"wrap", "y":y, "nrows":nrows, "ncols":ncols, "scales":scales, "labels":labels, "labelHeight":labelHeight}

def matrix(labels=True, scales="fixed", labelHeight=20):
    return {"type":"matrix", "scales":scales, "labels":labels, "labelHeight":labelHeight}


# Scales Helper

def scales(color=brewer("qual", "Accent"),
           fill=brewer("qual", "Accent"),
           shape=shapes(),
           size=None, area=None, lineType=None):
    return {"color":color, "shape":shape, "size":size, "area":area, "lineType":lineType, "fill":fill}


In [None]:
#scales()
update(None, scales())

# ===========================================================================================

# DATA

In [None]:
import pandas as pd
import numpy as np
import itertools
from functools import reduce

# TODO: Add Spark DataFrame

class GroupedData(object):

    def __init__(self, data, colDims=[], rowDims=[]):
        assert isinstance(data, (dict, pd.DataFrame)), "Data must be eiter ColumnDataList or a Pandas DataFrame"
        
        self.colDims = colDims
        self.rowDims = rowDims
        self.allDims = colDims + rowDims
        self.multiIndex = len(self.allDims) > 1
        self.noIndex = len(self.allDims) == 0
        
        self.df = data if isinstance(data, pd.DataFrame) else pd.DataFrame(data)
        self.levels = {}
        
        self.colCategories = self.rowCategories = []
        
        if not self.noIndex:
            self.df, self.levels, self.colLevels, self.rowLevels = self._indexData()
            self.colCategories, self.rowCategories = self._createCategories()

        self.colCount = len(self.colCategories)
        self.rowCount = len(self.rowCategories)

        self.minMax = {}
    
    
    def _indexData(self):
        df = self.df.set_index(self.allDims)
        df.sortlevel(inplace=True)
        if self.multiIndex:
            levels = [x.tolist() for x in df.index.unique().levels]
        else:
            levels = [sorted(df.index.unique().tolist())]
        return df, levels, levels[:len(self.colDims)], levels[len(self.colDims):]

    
    def _createCategories(self):
        result = []
        for levels in [self.colLevels, self.rowLevels]:
            if len(levels) == 1:
                result.append([(l,) for l in levels[0]])
            else:
                result.append(list(itertools.product(*levels)))
        return result

    
    def _getLabels(self, dims, categories, func):
        if func is None:
            func = lambda k,v: "%s : %s" % (k,v)
        if len(dims) == 0:
            return []
        else:
            return [[func(k,v) for k,v in zip(dims, val)] for val in categories]

        
    def getRowLabels(self, func=None):
        return self._getLabels(self.rowDims, self.rowCategories, func)

    
    def getColLabels(self, func=None):
        return self._getLabels(self.colDims, self.colCategories, func)
    
    
    def getShape(self):
        return (self.rowCount, self.colCount)

        
    def getMinMax(self, col):
        if self.minMax.get(col) is None:
            self.minMax[col] = (self.df[col].min(), self.df[col].max())
        return self.minMax[col]
    
    
    def getCategoriesByIndex(self, colIndex, rowIndex=None):
        categories = []
        if colIndex is not None:
            categories += self.colCategories[colIndex]
        if rowIndex is not None:
            categories += self.rowCategories[rowIndex]            
        return categories


    def getDataByIndex(self, colIndex, rowIndex=None):
        categories = self.getCategoriesByIndex(colIndex, rowIndex)   
        if self.noIndex:
            return self.df
        elif self.multiIndex:
            return self.df.xs(categories, level=self.allDims)
        else:
            return self.df.loc[categories[0]]

# TEMPLATES

In [None]:
from textwrap import indent, dedent


defaultRowMargin = 25
defaultColMargin = 50

def style(width, height, rowMargin, colMargin, rightMargin=0):
    css = "width:%dpx; height:%dpx;" % (width, height)
    css += " padding:1px %dpx %dpx %dpx;" % (rightMargin, max(1, rowMargin), max(1, colMargin))
    return css

def indent2(text):
    return indent(dedent(text), "  ")
    
def indent4(text):
    return indent(dedent(text), "    ")

In [None]:
def createGrid(containerId, width, ratio, rows, cols, rowLabels=[], colLabels=[], 
               labelHeight=20, allXAxisLabels=False, allYAxisLabels=False):
    
    w = width // cols
    h = w * ratio
    
    rowHeaders = len(rowLabels[0]) if len(rowLabels) > 0 else 0
    colHeaders = len(colLabels[0]) if len(colLabels) > 0 else 0
    
    html = """<table id="hc_%s" class="hcTable">\n""" % containerId

    for row in range(rows):
        
        if colHeaders > 0 and row == 0:
            # print all column header hierarchies
            for c in range(colHeaders):
                html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)

                for col in range(cols):
                    colMargin = defaultColMargin if ((col == 0 and row == 0) or allYAxisLabels) else 0
                    html += indent4("""
                    <th class="hcTable colHeader" style="%s">
                        <div>%s</div>
                    </th>""" % (style(w + colMargin, labelHeight, 0, colMargin, 10), colLabels[col][c]))

                # if there are row headers, print empty boxes
                for r in range(rowHeaders):
                    html += indent2("""  <th class="hcTable" style="%s">
                    </th>""" % style(labelHeight, labelHeight, 0, 0))

                html += "  </tr>\n"
        
        html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)
        for col in range(cols):
            colMargin = defaultColMargin if ((col == 0) or allYAxisLabels       ) else 0
            rowMargin = defaultRowMargin if ((row == rows - 1) or allXAxisLabels) else 0

            html += indent4("""    
            <td class="hcTable">
                <div id="hc_%s_%d-%d" style="%s"></div>
            </td>
            """ % (containerId, row, col, style(w + colMargin, h + rowMargin, rowMargin, 0)))
        
        # print row header hierarchies
        for r in range(rowHeaders):
            rowMargin = defaultRowMargin if ((row == rows - 1) or allYAxisLabels) else 0
            html += indent4("""
            <th class="hcTable rowHeader" style="%s">
                <div>%s</div>
            </th>""" % (style(labelHeight, h + rowMargin, rowMargin, 0), rowLabels[row][r]))
                
        html += "\n  </tr>\n"
    html += "</table>\n"
    return html

In [None]:
def createWrap(containerId, width, ratio, rows, cols, count, colLabels=[], 
               labelHeight=20, allYAxisLabels=False):
    w = width // cols
    h = w * ratio
    
    colHeaders = len(colLabels[0]) if len(colLabels) > 0 else 0
    
    html = """<table id="hc_%s" class="hcTable">\n""" % containerId

    i = 0
    j = 0
    for row in range(rows):
        # print all column header hierarchies
        for c in range(colHeaders):
            html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)
            j2 = j
            for col in range(cols):
                colMargin = defaultColMargin if ((col == 0) or allYAxisLabels) else 0
                if j2 < count:
                    html += indent4("""
                    <th class="hcTable colHeader" style="%s">
                        <div>%s</div>
                    </th>""" % (style(w + colMargin, labelHeight, 0, colMargin, 10), colLabels[j2][c]))
                    j2 += 1
                else:
                    html += indent4("""
                    <th class="hcTable" style="%s">
                        <div></div>
                    </th>""" % (style(w + colMargin, labelHeight, 0, colMargin, 10)))
            html += "\n  </tr>\n"
        j = j2

        html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)
        for col in range(cols):
            colMargin = defaultColMargin if ((col == 0 and row == 0) or allYAxisLabels) else 0
            if i < count:
                html += indent4("""    
                <td class="hcTable">
                    <div id="hc_%s_%d-%d" style="%s"></div>
                </td>
                """ % (containerId, row, col, style(w + colMargin, h + defaultRowMargin + 5, defaultRowMargin + 5, 0)))
            else:
                html += indent4("""    
                <td class="hcTable">
                    <div style="%s"></div>
                </td>
                """ % style(w + colMargin, h, defaultRowMargin, colMargin))
                
            i += 1

        html += "\n  </tr>\n"
    html += "</table>\n"
 
    return html

In [None]:
mpg # ["displ"].astype('category')