In [1]:
import pandas as pd
import datetime
import numpy as np
from tenzing.summary import get_template, traverse_config, summary_report
from tenzing.core.model_implementations.typesets import tenzing_standard

%load_ext autoreload
%autoreload 2

In [4]:
df = pd.DataFrame({'item_id': [1, 1, 3], 
                   'cost': [2.1, 3.5, 4], 
                   'item': ['orange', 'orange', 'apple'],
                   'sale_date': pd.to_datetime([datetime.date(2011,1,1), datetime.date(2012, 1, 1), datetime.date(2013,1,1)]),
                   'store_geography': pd.Series(['POINT (12 42)', 'POINT (100 42.723)', 'POINT (0 0)']),
                   'cogs': pd.Series([np.nan, 1.1, 2.1]).astype(str),
                   'still_available': [True, False, True],
                   'expired': ['True', 'false', 'False']
                   })

ts = tenzing_standard()
ts.prep(df)
summer = ts.summary_report(df)

In [5]:
class renderable_config:
    def __init__(self, data_name, template_name, extra_configs={}):
        self.data_name = data_name
        self.template_name = template_name
        self.extra_configs = extra_configs
    
    @property
    def template(self):
        return get_template(self.template_name)
    
    def render(self, summary):
        data = {}
        data['data'] = summary.get(self.data_name)
        data.update(self.extra_configs)
        res = self.template.render(data=data)
        return res
        
def traverse_config(config, summary):
    if isinstance(config, renderable_config):
        return config.render(summary)
    if isinstance(config, list):
        template = get_template('list_composition.html')
        data = [traverse_config(sub_config, summary) for sub_config in config]
        return template.render(data=data)
    elif isinstance(config, dict):
        raise Exception
    
    print('shouldnt get here')

def render_config(config, summary):
    base_template = get_template('base.html')
    html = {title: traverse_config(sub_config, summary) for title, sub_config in config.items()}
    return base_template.render(data=html)

class test:
    def __init__(self, summer):
        self.summary = {'general_summary': summer.general_summary,
                        'type_counts': summer.type_counts,
                        'column_summary': summer.column_summary,
                        'type_map': summer.col_type_map}
    
        self.template = {'Overview': [renderable_config('general_summary', 'overview.html', {'title': 'Dataset Info'}),
                                      renderable_config('type_counts', 'overview.html', {'title': 'Variable types'})
                                     ],
                         'Variable Statistics': [renderable_config('/'.join(['column_summary', title]), 
                                                                   'column_overview.html', 
                                                                   {'title': title, 'subtitle': self.summary['type_map'][title]})
                                                 for title in self.summary['column_summary'].keys()]
                         }
    def get(self, attr):
        for i, attr in enumerate(attr.split('/')):
            if i == 0:
                res = self.summary[attr]
            else:
                res = res[attr]
        return res
            
    
    def _repr_html_(self):
        return render_config(self.template, self)

test(summer)

0,1
Number of Observations,3
Number of Variables,8
tenzing_integer,1
tenzing_float,1
tenzing_string,4
tenzing_timestamp,1
tenzing_bool,1
nunique,2.0
mean,1.67
std,1.15

0,1
Number of Observations,3
Number of Variables,8

0,1
tenzing_integer,1
tenzing_float,1
tenzing_string,4
tenzing_timestamp,1
tenzing_bool,1
nunique,2.0
mean,1.67
std,1.15
max,3.0
min,1.0

0,1
tenzing_integer,1
tenzing_float,1
tenzing_string,4
tenzing_timestamp,1
tenzing_bool,1

0,1
nunique,2.0
mean,1.67
std,1.15
max,3.0
min,1.0
median,1.0
n_records,3
n_zeros,0
perc_zeros,0.0
na_count,0

0,1
nunique,2.0
mean,1.67
std,1.15
max,3.0
min,1.0
median,1.0
n_records,3.0
n_zeros,0.0
perc_zeros,0.0
na_count,0.0

0,1
nunique,3.0
mean,3.2
std,0.98
max,4.0
min,2.1
median,3.5
n_records,3
n_zeros,0
perc_zeros,0.0
na_count,0

0,1
nunique,3.0
mean,3.2
std,0.98
max,4.0
min,2.1
median,3.5
n_records,3.0
n_zeros,0.0
perc_zeros,0.0
na_count,0.0

0,1
nunique,2
n_records,3
frequencies,"{'orange': 2, 'apple': 1}"
na_count,0
perc_na,0.0
nunique,3
min,2011-01-01 00:00:00
max,2013-01-01 00:00:00
n_records,3
perc_unique,1.0

0,1
nunique,2
n_records,3
frequencies,"{'orange': 2, 'apple': 1}"
na_count,0
perc_na,0.0

0,1
nunique,3
min,2011-01-01 00:00:00
max,2013-01-01 00:00:00
n_records,3
perc_unique,1.0
range,731 days 00:00:00
na_count,0
perc_na,0.0
nunique,3
n_records,3

0,1
nunique,3
min,2011-01-01 00:00:00
max,2013-01-01 00:00:00
n_records,3
perc_unique,1.0
range,731 days 00:00:00
na_count,0
perc_na,0.0

0,1
nunique,3
n_records,3
frequencies,"{'POINT (12 42)': 1, 'POINT (0 0)': 1, 'POINT (100 42.723)': 1}"
na_count,0
perc_na,0.0
nunique,3
n_records,3
frequencies,"{'nan': 1, '1.1': 1, '2.1': 1}"
na_count,0
perc_na,0.0

0,1
nunique,3
n_records,3
frequencies,"{'POINT (12 42)': 1, 'POINT (0 0)': 1, 'POINT (100 42.723)': 1}"
na_count,0
perc_na,0.0

0,1
nunique,3
n_records,3
frequencies,"{'nan': 1, '1.1': 1, '2.1': 1}"
na_count,0
perc_na,0.0
frequencies,"{True: 2, False: 1}"
num_True,2
num_False,1
n_records,3
perc_True,0.67

0,1
nunique,3
n_records,3
frequencies,"{'nan': 1, '1.1': 1, '2.1': 1}"
na_count,0
perc_na,0.0

0,1
frequencies,"{True: 2, False: 1}"
num_True,2
num_False,1
n_records,3
perc_True,0.67
perc_False,0.33
na_count,0
perc_na,0.0
nunique,3
n_records,3

0,1
frequencies,"{True: 2, False: 1}"
num_True,2
num_False,1
n_records,3
perc_True,0.67
perc_False,0.33
na_count,0
perc_na,0.0

0,1
nunique,3
n_records,3
frequencies,"{'True': 1, 'false': 1, 'False': 1}"
na_count,0
perc_na,0.0

0,1
nunique,3
n_records,3
frequencies,"{'True': 1, 'false': 1, 'False': 1}"
na_count,0
perc_na,0.0


In [6]:
summer.col_type_map

{'item_id': tenzing_integer,
 'cost': tenzing_float,
 'item': tenzing_string,
 'sale_date': tenzing_timestamp,
 'store_geography': tenzing_string,
 'cogs': tenzing_string,
 'still_available': tenzing_bool,
 'expired': tenzing_string}

In [7]:
summer.template

{'Overview': [{'Dataset Info': {'template': 'overview.html',
    'data': 'general_summary',
    'title': 'Dataset Info'}},
  {'Variable types': {'template': 'overview.html',
    'data': 'type_counts',
    'title': 'Variable types'}}],
 'Variable Statistics': [{'Variable Name': {'is_abstract_variable': True,
    'template': 'overview.html',
    'data': 'column_summary',
    'title': 'Variable Name'}}]}

In [8]:
{'Overview': {'data': [{'temp1': {'data': 'general_summary', 'template': 'overview.html'}},
                                               {'temp2':{'data': 'type_counts', 'template': 'overview.html'}}
                                               ],
                                      'template': 'list_composition.html'
                                      }}

{'Overview': {'data': [{'temp1': {'data': 'general_summary',
     'template': 'overview.html'}},
   {'temp2': {'data': 'type_counts', 'template': 'overview.html'}}],
  'template': 'list_composition.html'}}