In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

In [2]:
from hcplot import loadLibraries, Figure, Points, mapping, single, grid, matrix, wrap

In [3]:
loadLibraries()

<IPython.core.display.Javascript object>

In [4]:
from ggplot import mpg, mtcars

In [None]:
%%sh 
cd /tmp
wget -q https://raw.githubusercontent.com/pandas-dev/pandas/master/pandas/tests/data/iris.csv

In [5]:
import pandas as pd
iris = pd.read_csv('/tmp/iris.csv')

In [11]:
fig1 = Figure(mpg, 
              mapping("displ", "hwy"),
              wrap(["class"], ncols=4, scales="free"),
              width=1024)
fig1 + Points()
fig1

class : 2seater,class : compact,class : midsize,class : minivan
,,,
class : pickup,class : subcompact,class : suv,
,,,


In [12]:
fig2 = Figure(mpg, 
              mapping("displ", "cty"), 
              layout=grid(["drv"], ["cyl"], labels=True), 
              width=1024)
fig2 + Points()
fig2

cyl : 4,cyl : 5,cyl : 6,cyl : 8,Unnamed: 4
,,,,drv : 4
,,,,drv : f
,,,,drv : r


In [13]:
fig3 = Figure(mtcars, 
              mapping("mpg", "wt"), 
              layout=grid(["vs","am"], ["gear"], scales="fixed"), 
              width=800, ratio=0.75)
fig3 + Points()
fig3

gear : 3,gear : 4,gear : 5,Unnamed: 3,Unnamed: 4
,,,vs : 0,am : 0
,,,vs : 0,am : 1
,,,vs : 1,am : 0
,,,vs : 1,am : 1


In [14]:
fig4 = Figure(iris, 
              mapping(["PetalLength", "PetalWidth", "SepalWidth", "SepalLength"]),
              matrix(scales="free_y"),
              width=960, ratio=1)
fig4 + Points()
fig4

x = PetalLength,x = PetalWidth,x = SepalWidth,x = SepalLength,Unnamed: 4
,,,,y = SepalLength
,,,,y = SepalWidth
,,,,y = PetalWidth
,,,,y = PetalLength


In [15]:
fig5 = Figure(iris, 
              mapping(x="PetalLength", y="SepalWidth"),
              width=256, ratio=1)
fig5 + Points()
fig5

# DATA

In [None]:
import pandas as pd
import numpy as np
import itertools
from functools import reduce

# TODO: Add Spark DataFrame

class GroupedData(object):

    def __init__(self, data, rowDims=None, colDims=None):
        
        self.df = None
        if isinstance(data, dict):
            self.df = pd.DataFrame(data)
        elif isinstance(data, pd.DataFrame):
            self.df = data

        self.rowDims = rowDims
        self.colDims = colDims

        self.rowCategories = []
        self.rowCount = None
        if rowDims is not None:
            self.rowCategories = self.explodeCategories(rowDims)
            self.rowCount = len(self.rowCategories)

        self.colCategories = []
        self.colCount = None
        if colDims is not None:
            self.colCategories = self.explodeCategories(colDims)
            self.colCount = len(self.colCategories)

        self.minMax = {}


    def getMinMax(self, col):
        if self.minMax.get(col) is None:
            self.minMax[col] = (self.df[col].min(), self.df[col].max())
        return self.minMax[col]


    def explodeCategories(self, dims):
        categoryValues = [ sorted(self.df.groupby(dim).groups.keys()) for dim in dims ]
        categories =     [ list(zip(dims, cats)) for cats in itertools.product(*categoryValues) ]
        return categories


    def getShape(self):
        return (1 if self.rowCount is None else self.rowCount,
                1 if self.colCount is None else self.colCount)

    
    def getCategoriesByIndex(self, rowIndex=None, colIndex=None):
        if rowIndex is not None and colIndex is not None:
            return (self.rowCategories[rowIndex], self.colCategories[colIndex])
        
        elif rowIndex is not None:
            return self.rowCategories[rowIndex]
        
        elif colIndex is not None:
            return self.colCategories[colIndex]
        
        else:
            raise ValueError("Access type unknown")

        
    def getDataByIndex(self, rowIndex=None, colIndex=None):

        def makeDict(data, row=False, col=False):
            return {"rowCategories": self.rowCategories[rowIndex] if row else {}, 
                    "colCategories": self.colCategories[colIndex] if col else {},
                    "data": data}
        
        if rowIndex == 0 and colIndex == 0 and \
           self.rowCount is None and self.colCount is None:                   # layout type single, matrix
            return makeDict(self.df)
        
        elif rowIndex is not None and colIndex is not None and \
           self.rowCount is not None and self.colCount is not None:           # layout type grid
            index = self.rowCategories[rowIndex] + self.colCategories[colIndex]
            return makeDict(self.getDataByCategories(index), row=True, col=True)
        
        elif colIndex is not None and self.colCount is not None:               # layout type wrap
            return makeDict(self.getDataByCategories(self.colCategories[colIndex]), col=True)
        
        else:
            return makeDict(self.df)


    def getDataByCategories(self, categories):
        cond = reduce(np.logical_and, [self.df[cat[0]] == cat[1] for cat in categories])
        return self.df[cond]
  
    
    def getRowLabels(self):
        if self.rowCount == 0:
            return []
        else:
            return [["%s : %s" % (val[0], val[1]) for val in vals] for vals in self.rowCategories]


    def getColLabels(self):
        if self.colCount == 0:
            return []
        else:
            return [["%s : %s" % (val[0], val[1]) for val in vals] for vals in self.colCategories]


# TEMPLATES

In [None]:
from textwrap import indent, dedent


defaultRowMargin = 25
defaultColMargin = 50

def style(width, height, rowMargin, colMargin, rightMargin=0):
    css = "width:%dpx; height:%dpx;" % (width, height)
    css += " padding:1px %dpx %dpx %dpx;" % (rightMargin, max(1, rowMargin), max(1, colMargin))
    return css

def indent2(text):
    return indent(dedent(text), "  ")
    
def indent4(text):
    return indent(dedent(text), "    ")

In [None]:
def createGrid(containerId, width, ratio, rows, cols, rowLabels=[], colLabels=[], 
               labelHeight=20, allXAxisLabels=False, allYAxisLabels=False):
    
    w = width // cols
    h = w * ratio
    
    rowHeaders = len(rowLabels[0]) if len(rowLabels) > 0 else 0
    colHeaders = len(colLabels[0]) if len(colLabels) > 0 else 0
    
    html = """<table id="hc_%s" class="hcTable">\n""" % containerId

    for row in range(rows):
        
        if colHeaders > 0 and row == 0:
            # print all column header hierarchies
            for c in range(colHeaders):
                html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)

                for col in range(cols):
                    colMargin = defaultColMargin if ((col == 0 and row == 0) or allYAxisLabels) else 0
                    html += indent4("""
                    <th class="hcTable colHeader" style="%s">
                        <div>%s</div>
                    </th>""" % (style(w + colMargin, labelHeight, 0, colMargin, 10), colLabels[col][c]))

                # if there are row headers, print empty boxes
                for r in range(rowHeaders):
                    html += indent2("""  <th class="hcTable" style="%s">
                    </th>""" % style(labelHeight, labelHeight, 0, 0))

                html += "  </tr>\n"
        
        html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)
        for col in range(cols):
            colMargin = defaultColMargin if ((col == 0) or allYAxisLabels       ) else 0
            rowMargin = defaultRowMargin if ((row == rows - 1) or allXAxisLabels) else 0

            html += indent4("""    
            <td class="hcTable">
                <div id="hc_%s_%d-%d" style="%s"></div>
            </td>
            """ % (containerId, row, col, style(w + colMargin, h + rowMargin, rowMargin, 0)))
        
        # print row header hierarchies
        for r in range(rowHeaders):
            rowMargin = defaultRowMargin if ((row == rows - 1) or allYAxisLabels) else 0
            html += indent4("""
            <th class="hcTable rowHeader" style="%s">
                <div>%s</div>
            </th>""" % (style(labelHeight, h + rowMargin, rowMargin, 0), rowLabels[row][r]))
                
        html += "\n  </tr>\n"
    html += "</table>\n"
    return html

In [None]:
def createWrap(containerId, width, ratio, rows, cols, count, colLabels=[], 
               labelHeight=20, allYAxisLabels=False):
    w = width // cols
    h = w * ratio
    
    colHeaders = len(colLabels[0]) if len(colLabels) > 0 else 0
    
    html = """<table id="hc_%s" class="hcTable">\n""" % containerId

    i = 0
    j = 0
    for row in range(rows):
        # print all column header hierarchies
        for c in range(colHeaders):
            html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)
            j2 = j
            for col in range(cols):
                colMargin = defaultColMargin if ((col == 0) or allYAxisLabels) else 0
                if j2 < count:
                    html += indent4("""
                    <th class="hcTable colHeader" style="%s">
                        <div>%s</div>
                    </th>""" % (style(w + colMargin, labelHeight, 0, colMargin, 10), colLabels[j2][c]))
                    j2 += 1
                else:
                    html += indent4("""
                    <th class="hcTable" style="%s">
                        <div></div>
                    </th>""" % (style(w + colMargin, labelHeight, 0, colMargin, 10)))
            html += "\n  </tr>\n"
        j = j2

        html += """  <tr id="hc_%s_%d" class="hcTable">\n""" % (containerId, row)
        for col in range(cols):
            colMargin = defaultColMargin if ((col == 0 and row == 0) or allYAxisLabels) else 0
            if i < count:
                html += indent4("""    
                <td class="hcTable">
                    <div id="hc_%s_%d-%d" style="%s"></div>
                </td>
                """ % (containerId, row, col, style(w + colMargin, h + defaultRowMargin + 5, defaultRowMargin + 5, 0)))
            else:
                html += indent4("""    
                <td class="hcTable">
                    <div style="%s"></div>
                </td>
                """ % style(w + colMargin, h, defaultRowMargin, colMargin))
                
            i += 1

        html += "\n  </tr>\n"
    html += "</table>\n"
 
    return html

# FIGURE

In [None]:
from hcplot.utils import ScipyEncoder, single
from hcplot.components import Components

from IPython.display import Javascript, HTML, display
from uuid import uuid4
from textwrap import dedent
import math
import json
from operator import itemgetter


class Figure(object):
    
    def __init__(self, data, mapping, layout=None, coord=None, scaleX=None, scaleY=None, width=1024, ratio=2/3):
        self.id = str(uuid4())
        self.rawData = data

        self.mapping = mapping        
        self.layout = single() if layout is None else layout

        layoutType = self.layout.get("type")
        
        if layoutType == "single":
            self.data = GroupedData(data)
            self.count = 1
            self.rows = 1
            self.cols = 1

        elif layoutType == "wrap":
            nrows = self.layout.get("nrows")
            ncols = self.layout.get("ncols")

            assert nrows is None or ncols is None, "Set either nrows or ncols in wrap layout"

            if nrows is None and ncols is None:
                nrows = 1

            self.data = GroupedData(data, colDims=self.layout.get("y"))
            self.count = self.data.getShape()[1]

            if ncols is None:
                self.rows = nrows
                self.cols = math.ceil(self.count / nrows)
            elif nrows is None:
                self.rows = math.ceil(self.count / ncols)
                self.cols = ncols

        elif layoutType == "grid":
            assert  isinstance(self.mapping["x"], str), "'x' has to be a column name"
            assert  isinstance(self.mapping["y"], str), "'y' has to be a column name"

            self.data = GroupedData(data, rowDims=self.layout.get("x"), colDims=self.layout.get("y"))
            self.rows = self.data.getShape()[0]
            self.cols = self.data.getShape()[1]
            self.count = self.rows * self.cols

        elif layoutType == "matrix":
            if self.mapping["y"] is None:
                self.mapping["y"] = self.mapping["x"].copy()
                self.mapping["y"].reverse()
                
            assert  isinstance(self.mapping["x"], (list, tuple)), "'x' has to be a list of column names [..., ]"
            assert  isinstance(self.mapping["y"], (list, tuple)), "'y' has to be a list of column names [..., ]"

            self.data = GroupedData(data)
            self.cols = len(self.mapping.get("x"))
            self.rows = len(self.mapping.get("y"))
            self.count = self.rows * self.cols

        else:
            raise ValueError("Unknow layout type %s", layoutType)

        self.coord = coord
        self.scaleX = scaleX
        self.scaleY = scaleY
        
        self.width = width
        self.ratio = ratio

        self.layers = []


    def __add__(self, layers):
        layers.setFigure(self)
        self.layers.append(layers)
        return self


    def createContainer(self, layoutType, xScaleFree, yScaleFree, labelHeight):
        if layoutType == "single":
            html = createGrid(self.id, self.width, self.ratio, 1, 1)
                
        elif layoutType in ["grid", "wrap", "matrix"]:
            
            if self.layout.get("labels"):
                if layoutType == "matrix":
                    rowLabels = [["y = %s" % el] for el in self.mapping.get("y")]
                    colLabels = [["x = %s" % el] for el in self.mapping.get("x")]
                else:
                    rowLabels = self.data.getRowLabels() if layoutType == "grid" else []
                    colLabels = self.data.getColLabels()

                if layoutType == "wrap":
                    html = createWrap(self.id, self.width, self.ratio, self.rows, self.cols, self.count,
                                      colLabels, labelHeight, allYAxisLabels=yScaleFree)
                else:
                    html = createGrid(self.id, self.width, self.ratio, self.rows, self.cols,
                                      rowLabels, colLabels, labelHeight, 
                                      allXAxisLabels=xScaleFree, allYAxisLabels=yScaleFree)
            else:
                html = createGrid(self.id, self.width, self.ratio, self.rows, self.cols, 
                                  allXAxisLabels=xScaleFree, allYAxisLabels=yScaleFree)
        
        else:
            raise NotImplementedError("Not implemented yet")

        return html

    
    def getDataSlice(self, row, col, mx, my):
        df = self.data.getDataByIndex(row, col)["data"]
        if mx == my:
            return [[val[0], val[0]] for val in df[[mx]].to_dict("split")["data"]]
        else:
            return df[[mx, my]].to_dict("split")["data"]

    
    def getMinMax(self, layoutType, xScaleFree, yScaleFree):
        xmin = ymin = xmax = ymax = None
        if not (xScaleFree and yScaleFree):
            if layoutType == "matrix":
                allCols = list(set(self.mapping["x"]).union(set(self.mapping["y"])))
                minMax = [self.data.getMinMax(col) for col in allCols]
                if not xScaleFree:
                    xmin = ymin = min(minMax, key=itemgetter(0))[0]
                if not yScaleFree:
                    xmax = ymax = max(minMax, key=itemgetter(1))[1]
            else:
                if not xScaleFree:
                    xmin, xmax = self.data.getMinMax(self.mapping["x"])
                if not yScaleFree:
                    ymin, ymax = self.data.getMinMax(self.mapping["y"])
        return xmin, ymin, xmax, ymax


    def createChart(self):

        layoutType = self.layout.get("type", "single")
        scaleType  = self.layout.get("scales", "fixed")
        labelHeight = self.layout.get("labelHeight", 20)

        xScaleFree = (scaleType in ["free", "free_x"]) if layoutType != "wrap" else True
        yScaleFree = (scaleType in ["free", "free_y"])

        xmin, ymin, xmax, ymax = self.getMinMax(layoutType, xScaleFree, yScaleFree)

        html = self.createContainer(layoutType, xScaleFree, yScaleFree, labelHeight)

        html += dedent("""
        <script>
            window.hc_charts.promise.then(function(HC) {
        """)

        i = 0

        for row in range(self.rows):
            for col in range(self.cols):

                fig = Components().figure(zoomType="xy", exporting=False, title=None, legend=False)

                fig.addXAxis(title=None, max=xmax, min=xmin, lineWidth=1, tickWidth=1, gridLineWidth=1)
                fig.addYAxis(title=None, max=ymax, min=ymin, lineWidth=1, tickWidth=1, gridLineWidth=1)
                    
                if xScaleFree or row == self.rows - 1:
                    fig.updateChart(marginBottom=40, height=(self.width//self.cols)*self.ratio+30)
                else:
                    fig.updateXAxis(labels=False)
                
                if yScaleFree or col == 0:
                    fig.updateChart(marginLeft=50,   width=self.width//self.cols+40)
                else:
                    fig.updateYAxis(labels=False)

                if i != self.count - 1:
                    fig.updateFigure(credits=False)

                if i < self.count:
                    isEmpty = True
                    for layer in self.layers:
                        mx, my = self.mapping["x"], self.mapping["y"]

                        if layoutType == "single":
                            data = self.getDataSlice(None, None, mx, my)
                            
                        elif layoutType == "wrap":
                            data = self.getDataSlice(0, i, mx, my)
                            
                        elif layoutType == "grid":
                            data = self.getDataSlice(row, col, mx, my)
                            
                        elif layoutType == "matrix":
                            mx = mx[col]
                            my = my[row]
                            data = self.getDataSlice(None, None, mx, my)

                        if isEmpty:
                            isEmpty = not (len(data) > 0)
                            
                        # TODO: clean to use layer data if exists
                        fig.addSeries(my, data, layer.options)

                    container = "hc_%s_%d-%d" % (self.id, row, col)
                    if isEmpty and xScaleFree and yScaleFree:
                        html += dedent("""
                        //      console.log("%d-%d")
                        """) % (row, col)
                    else:
                        html += dedent("""
                        //      console.log("%d-%d")
                                HC.chart("%s", %s);
                        """) % (row, col, container, json.dumps(fig.get(), cls=ScipyEncoder))

                    i += 1

        html += dedent("""
            });
        </script>
        """)

        return html
    
    
    def _repr_html_(self):
        return self.createChart()



# UTILS

In [None]:
def single():
    return {"type":"single"}

def grid(x, y, labels=True, scales="fixed", labelHeight=20):
    return {"type":"grid", "x":x, "y":y, "scales":scales, "labels":labels, "labelHeight":labelHeight}

def wrap(y, nrows=None, ncols=None, labels=True, scales="fixed", labelHeight=20):
    return {"type":"wrap", "y":y, "nrows":nrows, "ncols":ncols, "scales":scales, "labels":labels, "labelHeight":labelHeight}

def matrix(labels=True, scales="fixed", labelHeight=20):
    return {"type":"matrix", "scales":scales, "labels":labels, "labelHeight":labelHeight}
