In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
import collections
from timeit import timeit
from tf.app import use
from tf.applib.helpers import dm
from pack import deepSize, pack, Pack

In [None]:
def testValues(origData, packData, maxNode):
  isDict = packData.container == 'dict'
  error = 0
  for n in range(1, maxNode + 1):
    oe = origData.get(n, None) if isDict else origData[n] if n < len(origData) else None
    pe = packData.get(n)
    if oe != pe:
      error +=1
  return error
  
def testPerformance(origData, packData):
  isDict = packData.container == 'dict'
  times = 1000
  testMember = list(k for (k, v) in origData.items() if v is not None)[0] if isDict else 0
  origTime = timeit("origData[testMember]", globals=locals(), number=times)
  packedTime = timeit("packData.get(testMember)", globals=locals(), number=times)
  return packedTime / origTime if origTime else 0

CMAP = dict(
    oslots='tup',
    otype='tup',
    __levUp__='tup',
    __levDown__='tup',
)
EMAP = dict(
    oslots='tup',
    otype='str',
    __levUp__='tup',
    __levDown__='tup',
)
  
def testFeature(A, ft):
  api = A.api
  TF = api.TF
  F = api.F
  maxNode = F.otype.maxNode
  
  info = TF.features[ft]
  origData = info.data
  if ft == 'otype':
    origData = origData[0:-2]
  elif ft == 'oslots':
    origData = origData[0:-1]
  containerType = CMAP.get(ft, 'dict')
  elementType = EMAP.get(ft, getattr(info, 'metaData', {}).get('valueType', None))
  if ft != 'oslots' and info.isEdge:
    origData = {n: tuple(ns) for (n, ns) in origData.items() if ns is not None}
    elementType = 'tup'
  
  print(ft, containerType, elementType)
  packDataRaw = pack(origData, containerType, elementType)
  packData = Pack(packDataRaw)
  
  isDict = packData.container == 'dict'
  
  fType = f'{containerType}:{elementType}'
  origLen = len(origData)
  packLen = packData.length
  gapLen = packData.alength - packLen if isDict else 0
  origSize = deepSize(origData)
  packSize = deepSize(packDataRaw)
  sizeRatio = int(round(100 * packSize / origSize)) if origSize else 0
  origAvSize = int(round(origSize / origLen)) if origLen else 0
  packAvSize = int(round(packSize / packLen)) if packLen else 0
  perfRatio = testPerformance(origData, packData)
  errors = testValues(origData, packData, maxNode)
  
  return (
      ft,
      fType,
      origLen,
      packLen,
      origSize,
      origAvSize,
      packSize,
      packAvSize,
      sizeRatio,
      perfRatio,
      errors,
  )

In [None]:
FIELDS = '''
    name
    type
    lengthOrig
    lengthPack
    sizeOrig
    elemSizeOrig
    sizePack
    elemSizePack
    sizeRatio
    perfRatio
    errors
'''.strip().split()

SUMMABLE = set('''
  lengthOrig
  lengthPack
  sizeOrig
  sizePack
  perfRatio
  errors
'''.strip().split())

AVERAGEABLE = dict(
  elemSizeOrig = ('sizeOrig', 'lengthOrig'),
  elemSizePack = ('sizePack', 'lengthPack'),
  sizeRatio = ('sizePack', 'sizeOrig'),
)

ROWFMT = (
  '`{name}` | '
  '{type} | '
  '{lengthOrig:,} | '
  '{lengthPack:,} | '
  '{errors:,} '
  '{elemSizeOrig:,} | '
  '{elemSizePack:,} | '
  '{sizeOrig:,} | '
  '{sizePack:,} | '
  '{sizeRatio}% | '
  '{perfRatio:4.1f}x | '
  '\n'
)
HEADFMT = (
  '{name} | '
  '{type} | '
  '{lengthOrig} | '
  '{lengthPack} | '
  '{errors}'
  '{elemSizeOrig} | '
  '{elemSizePack} | '
  '{sizeOrig} | '
  '{sizePack} | '
  '{sizeRatio} | '
  '{perfRatio} | '
  '\n'
)


TABLEHDR =  (
  HEADFMT.format(**{f: f for f in FIELDS}) +
  HEADFMT.format(**{f: '---' for f in FIELDS})
)

In [None]:
def testFeatures(corpus):
  A = use(f'{corpus}:clone', checkout='clone', silent=True)
  api = A.api
  TF = api.TF
  F = api.F
  features = TF.features
  totals = collections.Counter()
  
  material = ''
  
  n = 0
  for ft in sorted(features):
    if ft.startswith('__') and not (ft in CMAP or ft in EMAP):
      continue
    if ft == 'otext':
      continue
    sys.stdout.write(f'\r{ft:<30}')
    n += 1
    results = testFeature(A, ft)
    hresults = dict(zip(FIELDS, results))
    material += ROWFMT.format(**hresults)
    for s in SUMMABLE:
      totals[s] += hresults[s]
  print(f'\r{"":>40}')
  for (a, (nm, dnm)) in AVERAGEABLE.items():
    theNm = totals[nm]
    theDnm = totals[dnm]
    totals[a] = theNm / theDnm if theDnm else 0
  totals['perfRatio'] = totals['perfRatio'] / n if n else 0
  for s in ('elemSizeOrig', 'elemSizePack'):
    totals[s] = int(round(totals[s]))
  for s in ('sizeRatio',):
    totals[s] = int(round(100 * totals[s]))
  material += ROWFMT.format(name='TOTAL', type='ALL', **totals)
  header = f'''
# {A.repo} {n} features

{TABLEHDR}'''
  
  dm(header + material)

In [None]:
testFeatures('banks')

In [None]:
testFeatures('dss')

In [None]:
testFeatures('oldbabylonian')

In [None]:
testFeatures('bhsa')