In [None]:
import pandas as pd
import polars as pl
import numpy as np
from random import randint
import random
import time
import sys
sys.path.insert(1, '../framework') # base location of the .py classes
from racetrack import *
rt = RACETrack()
rt.co_mgr.str_to_color_lu['pandas'] = '#ffa0a0' # reddish
rt.co_mgr.str_to_color_lu['polars'] = '#a0a0ff' # bluish

In [None]:
# Create A Randomized Dataset
def createData(sz=1000):
    strs_20_set = ['ant', 'bat', 'cat', 'dog', 'elephant', 'fox', 'gecko', 'horse', 'izard', 'jackal', 'kite', 'lizard', 'mouse', 
                   'ostrich', 'parrot', 'rabbit', 'snake', 'turtle', 'zebra']
    strs_5_set  = ['blue', 'red', 'green', 'orange', 'yellow']
    strs_20, nums_small, nums_large, dats, strs_5, nums_float = [], [], [], [], [], []
    for i in range(sz):
        strs_20.append(strs_20_set[randint(0,len(strs_20_set)-1)])
        nums_small.append(randint(0, 9))
        nums_large.append(randint(0, 99)), nums_float.append(random.random())
        year, month, day, hour, minute, second  = randint(1900, 2020), randint(1, 12), randint(1, 28), randint(0, 23), randint(0, 59), randint(0, 59)
        dats.append(f'{year:04}-{month:02}-{day:02} {hour:02}:{minute:02}:{second:02}')
        strs_5.append(strs_5_set[randint(0,4)])
    df_pd = pd.DataFrame({'strs_20':strs_20, 'nums_small':nums_small, 'nums_large':nums_large, 'nums_float':nums_float, 'dats':dats, 'strs_5':strs_5})
    df_pd = rt.columnsAreTimestamps(df_pd, 'dats')
    df_pl = pl.DataFrame(df_pd)
    return df_pd, df_pl

# Create Datasets Of Various Sizes
pds, pls, szs = {}, {}, []
#for i in range(50000,1100000,100000):
for i in [10000, 100000, 1000000]:
    szs.append(i)
for sz in szs:
    df_pd, df_pl = createData(sz)
    pds[sz] = df_pd
    pls[sz] = df_pl
    
# Time Specific Method
def timeMethod(fn, params, component, count_by, samples=3):
    xs, ys, types, components, count_bys, groups = [], [], [], [], [], []
    for sz in pds.keys():
        accum_time = 0.0
        for _sample_ in range(samples):
            ts0 = time.time()
            fn(pds[sz], **params)._repr_svg_()
            ts1 = time.time()
            accum_time += (ts1 - ts0)
        pd_time = accum_time/samples
        accum_time = 0.0
        for _sample_ in range(samples):
            ts0 = time.time()
            fn(pls[sz], **params)._repr_svg_()
            ts1 = time.time()
            accum_time += (ts1 - ts0)
        pl_time = accum_time/samples
        xs.append(sz), ys.append(pd_time), types.append('pandas'), components.append(component), count_bys.append(count_by), groups.append('a')
        xs.append(sz), ys.append(pl_time), types.append('polars'), components.append(component), count_bys.append(count_by), groups.append('b')
    return pd.DataFrame({'size':xs, 'time':ys, 'type':types, 'component':components, 'count_by':count_bys, 'group':groups})
xy_params = {'x_field':'size', 'y_field':'time', 'line_groupby_field':'group', 'line_groupby_w':3, 'dot_size':None, 'color_by':'type', 'w':512}
all_dfs  = []

In [None]:
df  = timeMethod(rt.histogram, {'bin_by':'strs_20'},                          'histograms', 'rows')
df2 = timeMethod(rt.histogram, {'bin_by':'strs_20', 'count_by':'nums_large'}, 'histograms', 'ints')
df3 = timeMethod(rt.histogram, {'bin_by':'strs_20', 'count_by':'nums_float'}, 'histograms', 'floats')
all_dfs.extend([df, df2, df3])
rt.tile([rt.xy(df, **xy_params), rt.xy(df2, **xy_params), rt.xy(df3, **xy_params)])

In [None]:
df  = timeMethod(rt.temporalBarChart, {},                        'temporalBarChart', 'rows')
df2 = timeMethod(rt.temporalBarChart, {'count_by':'nums_large'}, 'temporalBarChart', 'ints')
df3 = timeMethod(rt.temporalBarChart, {'count_by':'nums_float'}, 'temporalBarChart', 'floats')
all_dfs.extend([df, df2, df3])
rt.tile([rt.xy(df, **xy_params), rt.xy(df2, **xy_params), rt.xy(df3, **xy_params)])

In [None]:
df = timeMethod(rt.xy, {'x_field':'nums_float', 'y_field':'nums_large'}, 'xy', 'default')
all_dfs.append(df)
rt.xy(df, **xy_params)

In [None]:
df  = timeMethod(rt.periodicBarChart, {},                        'periodicBarChart', 'rows')
df2 = timeMethod(rt.periodicBarChart, {'count_by':'nums_large'}, 'periodicBarChart', 'ints')
df3 = timeMethod(rt.periodicBarChart, {'count_by':'nums_float'}, 'periodicBarChart', 'floats')
all_dfs.extend([df, df2, df3])
rt.tile([rt.xy(df, **xy_params), rt.xy(df2, **xy_params), rt.xy(df3, **xy_params)])

In [None]:
relates = [('strs_20','strs_5')]
df = timeMethod(rt.linkNode, {'relationships':relates}, 'linkNode', 'default')
all_dfs.append(df)
rt.xy(df, **xy_params)

In [None]:
rt.smallMultiples(pd.concat(all_dfs), category_by=['component','count_by'], grid_view=True, show_df_multiple=False, sm_type='xy', 
                  sm_params={'x_field':'size', 'y_field':'time', 'line_groupby_field':'group', 'line_groupby_w':3, 'dot_size':'medium', 'color_by':'type'},
                  y_axis_independent=True, w_sm_override=384, h_sm_override=160)
