# [Goulib](../notebook.ipynb).table
"mini pandas.DataFrame" Table class with Excel + CSV I/O, easy access to columns, HTML output, and much more.

In [1]:
from Goulib.notebook import *
from Goulib.table import *

In [2]:
small=Table(Table([[1,2,3],(4,5)])) #tables can be constructed from any tabular data
small

AttributeError: 'int' object has no attribute 'tag'

Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]])

In [3]:
#Table cells can contain other tables, LaTeX expressions, Images... and more (soon...)
from Goulib.image import Image
lena=Image('../tests/data/lena.png').resize((128,128))
Table([[small,r'$\\LaTeX : \sqrt{\left(a+b\right)\left(a-b\right)}$'],[lena,lena]],titles=['complex','content'])

AttributeError: 'str' object has no attribute 'tag'

Table(len=2,titles=['complex', 'content'],data=[[Table(len=2,titles=[],data=[[1, 2, 3], [4, 5]]), '$\\\\LaTeX : \\sqrt{\\left(a+b\\right)\\left(a-b\\right)}$'], [Image(mode=RGB shape=(128, 128, 3) type=float64), Image(mode=RGB shape=(128, 128, 3) type=float64)]])

In [4]:
# Tables can be read from .csv, html, JSON and Excel files (requires xlrd http://www.python-excel.org/)
t=Table('../tests/data/test.xls') 

In [5]:
print(t.titles) #Tables have optional column headers

['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total']


In [6]:
t.setcol('Total',None)
Table(t[:5],titles=t.titles) #indexing lines, construction and default HTML representation
#notice the Order Dates are messy because of Excel representaion of dates

AttributeError: 'str' object has no attribute 'tag'

Table(len=5,titles=['OrderDate', 'Région', 'Rep', 'Item', 'Unités', 'Cost', 'Total'],data=[[41061, 'East', 'Jones', 'Pencil', 95, 1.99, None], ['1/23/2012', 'Central', 'Kivell', 'Binder', 50, 19.99, None], [41154, 'Central', 'Jardine', 'Pencil', 36, 4.99, None], ['2/26/2012', 'Central', 'Gill', 'Pen', 27, 19.99, None], ['3/15/2012', 'West', 'Sorvino', 'Pencil', 56, 2.99, None]])

In [7]:
t[2,5],t[2,'Cost'] # cells can be accessed by row,col index or title

(4.99, 4.99)

In [8]:
print(t[:5,'Cost']) # indexing supports slices too

[1.99, 19.99, 4.99, 19.99, 2.99]


In [9]:
# handle the mess in Excel Cell types ...
t.to_date('OrderDate',fmt=['%m/%d/%Y','Excel']) #converts column to date using several possible formats...
t.applyf('Cost',float) # apply a function to a column. Here force the column to contain floats

True

In [10]:
# math between columns is still a bit tedious...
from Goulib.math2 import vecmul
t.setcol('Total',vecmul(t.col('Cost'),t.col('Unités')))

In [11]:
#it's easy to make a "total" line from columns
#with a list of reduce-like functions applied to each column
from Goulib.stats import avg
from Goulib.itertools2 import count_unique
t.total([max,count_unique,count_unique,count_unique,sum,avg,sum])
t.footer #result is stored in a separated footer field

[datetime.date(2013, 12, 21),
 3,
 11,
 5,
 2121,
 20.308604651162796,
 19627.88000000001]

In [12]:
h(t.html(start=5,stop=10)) # a way to shorten long tables

AttributeError: 'str' object has no attribute 'tag'

In [None]:
t.sort('Total',reverse=True) # Tables can be sorted by column easily
h(t.html(stop=5)) # show only the 5 lines with highest total

In [None]:
region=t.groupby(u'Région') # dictionary of subtables grouped by a column. notice Unicode support
region['East'] # isn't it nice ?

In [None]:
#a row can be extracted as a dict where column titles are keys
t.rowasdict(1)

In [None]:
t.json()[:250]+'...' #rowasdict is handy to build json representation

In [None]:
from Goulib.math2 import *
from Goulib.itertools2 import *
res=Table(t) #copy
s=len(res)
for i in range(s-1):
    line=res[i]
    d=[hamming(line,res[j]) for j in range(i+1,s)]
    j=index_min(d)[0]+i
    res[i+1],res[j]=res[j],res[i+1] #swap
res

In [None]:
hamming(t[1],t[2])