# [Goulib](../notebook.ipynb).table
"mini pandas.DataFrame" Table class with Excel + CSV I/O, easy access to columns, HTML output, and much more.

In [1]:
from Goulib.notebook import *
from Goulib.table import *

In [2]:
small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data
small

0,1,2
1,2,3


In [3]:
#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)
from Goulib.image import Image
lena=Image('../tests/data/lena.png').resize((128,128))
Table([[small,'$$\\LaTeX : e=m.c^2$$'],[lena,lena]],titles=['complex','content'])

complex,content
123,$$\LaTeX : e=m.c^2$$

0,1,2
1,2,3


In [4]:
# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)
t=Table('../tests/data/test.xls') 

In [5]:
print(t.titles) #Tables have optional column headers

In [6]:
t.setcol('Total',None)
Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation
#notice the Order Dates are messy because of Excel representaion of dates

OrderDate,Région,Rep,Item,Unités,Cost,Total
41061,East,Jones,Pencil,95,1.99,
1/23/2012,Central,Kivell,Binder,50,19.99,
41154,Central,Jardine,Pencil,36,4.99,
2/26/2012,Central,Gill,Pen,27,19.99,


In [7]:
# handle the mess in Excel Cell types ...
t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...
t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats

True

In [8]:
# math between columns is still a bit tedious...
from Goulib.math2 import vecmul
t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))

In [9]:
#it's easy to make a "total" line from columns
#with a list of reduce-like functions applied to each column
from Goulib.stats import avg
from Goulib.itertools2 import count_unique
t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])
t.footer #result is stored in a separated footer field

[datetime.date(2013, 12, 21),
 3,
 11,
 5,
 2121,
 20.308604651162796,
 19627.88000000001]

In [15]:
print(t.html(start=5,stop=10)) # a way to shorten long tables

OrderDate,Région,Rep,Item,Unités,Cost,Total
...,...,...,...,...,...,...
2013-07-08,Central,Kivell,Pen Set,42,23.95,1005.90
2012-01-23,Central,Kivell,Binder,50,19.99,999.50
2013-08-24,West,Sorvino,Desk,3,275.00,825.00
2013-05-31,Central,Gill,Binder,80,8.99,719.20
2013-07-21,Central,Morgan,Pen Set,55,12.49,686.95
...,...,...,...,...,...,...
2013-12-21,3,11,5,2121,20.31,19627.88


In [16]:
t.sort('Total',reverse=True) # Tables can be sorted by column easily
print(t.html(stop=5)) # show only the 5 lines with highest total

OrderDate,Région,Rep,Item,Unités,Cost,Total
2013-04-12,Central,Jardine,Binder,94,19.99,1879.06
2012-07-29,East,Parent,Binder,81,19.99,1619.19
2013-01-02,Central,Smith,Binder,87,15.00,1305.00
2012-12-29,East,Parent,Pen Set,74,15.99,1183.26
2013-10-14,West,Thompson,Binder,57,19.99,1139.43
...,...,...,...,...,...,...
2013-12-21,3,11,5,2121,20.31,19627.88


In [17]:
#a row can be extracted as a dict where column titles are keys
t.rowasdict(1)

{'Cost': 19.99,
 'Item': 'Binder',
 'OrderDate': datetime.date(2012, 7, 29),
 'Rep': 'Parent',
 'Région': 'East',
 'Total': 1619.1899999999998,
 'Unités': 81}

In [22]:
t.json()[:250]+'...'

'[{"Cost": 19.99, "Total": 1879.06, "R\\u00e9gion": "Central", "OrderDate": "2013-04-12", "Rep": "Jardine", "Unit\\u00e9s": 94, "Item": "Binder"}, {"Cost": 19.99, "Total": 1619.1899999999998, "R\\u00e9gion": "East", "OrderDate": "2012-07-29", "Rep": "Par...'

In [None]:
t # HTML representation in Notebook. Note the title/header and total/footer rows and cell alignment

In [None]:
region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support
region['East'] # isn't it nice ?

In [None]:
small=
small

In [None]:
from Goulib.math2 import *
from Goulib.itertools2 import *
res=Table(t) #copy
s=len(res)
for i in range(s-1):
    line=res[i]
    d=[hamming(line,res[j]) for j in range(i+1,s)]
    j=index_min(d)[0]+i
    res[i+1],res[j]=res[j],res[i+1] #swap
res

In [None]:
hamming(t[1],t[2])