# Setup

In [1]:
from tabulate import tabulate
from perfcounters import PerfCounters
from IPython.core.display import HTML
from collections import defaultdict

In [2]:
import evoflow.backend.numpy as NP
import evoflow.backend.cupy as CP


Using cupy backend


In [3]:
import pandas as pd
import altair as alt

# Backend functions benchmarking

In [4]:
overall_cnts = PerfCounters()  # overall counters
NUM_REPS = 100
BACKENDS = {"Numpy": NP, "Cupy": CP}

# small shape are needed as there might be case where transfering to GPU is more costly than CPU
SHAPE1D = {'100': 100, '1000': 1000, '10000': 10000, '100000': 100000}
SHAPE2D = {'100x100': (100, 100), '1000x100': (1000, 100), '10000x100': (10000, 100), 
           '100000x10': (100000, 10)}

# generate related inputs
INPUTS1D = {}
INPUTS2D = {}
for backend_name, B in BACKENDS.items():
    INPUTS1D[backend_name] =  {}
    for shape_name, shape in SHAPE1D.items():
        INPUTS1D[backend_name][shape_name] = B.randint(0, 256, shape)
    
    INPUTS2D[backend_name] =  {}
    for shape_name, shape in SHAPE2D.items():
        INPUTS2D[backend_name][shape_name] = B.randint(0, 256, shape)

In [5]:
def display_results(cnts):
    data  = defaultdict(list)    
    # group results
    
    for cnt_name, value in cnts.get_all()['Timing counters']:
        backend_name, op_name, shape_name = cnt_name.split("_")
        data['shape'].append(shape_name)
        data['backend'].append(backend_name)
        data['time'].append(value * 1000) # ms
    
    data  = pd.DataFrame(data)
    chart = alt.Chart(data).mark_bar().encode(
    x=alt.X('backend', title=''),
    y=alt.Y('time', title='Time in ms', aggregate='sum'),
    column='shape:N',
    color='backend:N')
    return chart

In [6]:
# sanity check
rows = []
for backend_name, B in BACKENDS.items():
    for shape_name, inputs in INPUTS2D[backend_name].items():
        rows.append([backend_name, shape_name, inputs.shape])
HTML(tabulate(rows, tablefmt='html', headers=['backend', 'shape name', 'actual shape']))

backend,shape name,actual shape
Numpy,100x100,"(100, 100)"
Numpy,1000x100,"(1000, 100)"
Numpy,10000x100,"(10000, 100)"
Numpy,100000x10,"(100000, 10)"
Cupy,100x100,"(100, 100)"
Cupy,1000x100,"(1000, 100)"
Cupy,10000x100,"(10000, 100)"
Cupy,100000x10,"(100000, 10)"


## Initialization functions

### zeros

In [7]:
op_name = 'zeros'
cnts = PerfCounters()
for backend_name, B in BACKENDS.items():
    for shape_name, shape in SHAPE2D.items():
        cnt_name = "%s_%s_%s" % (backend_name, op_name, shape_name)
        cnts.start(cnt_name)
        for _ in range(NUM_REPS):
            B.zeros(shape)
        cnts.stop(cnt_name)

        
overall_cnts.merge(cnts)
display_results(cnts)

### ones

In [8]:
op_name = 'ones'
cnts = PerfCounters()
for backend_name, B in BACKENDS.items():
    for shape_name, shape in SHAPE2D.items():
        cnt_name = "%s_%s_%s" % (backend_name, op_name, shape_name)
        cnts.start(cnt_name)
        for _ in range(NUM_REPS):
            B.ones(shape)
        cnts.stop(cnt_name)
        
overall_cnts.merge(cnts)
display_results(cnts)

## Randomness

### Randint

In [9]:
op_name = 'randint'  # name here
cnts = PerfCounters()
for backend_name, B in BACKENDS.items():
    for shape_name, shape in SHAPE2D.items():
        cnt_name = "%s_%s_%s" % (backend_name, op_name, shape_name)
        cnts.start(cnt_name)
        for _ in range(NUM_REPS):
            B.randint(0, 256, shape)  # op here
        cnts.stop(cnt_name)
        
overall_cnts.merge(cnts)
display_results(cnts)

In [10]:
op_name = 'shuffle'  # name here
cnts = PerfCounters()
for backend_name, B in BACKENDS.items():
    for shape_name, inputs in INPUTS2D[backend_name].items():
        cnt_name = "%s_%s_%s" % (backend_name, op_name, shape_name)
        cnts.start(cnt_name)
        for _ in range(NUM_REPS):
            B.shuffle(inputs)  # op here
        cnts.stop(cnt_name)
        
overall_cnts.merge(cnts)
display_results(cnts)

## Reduce

### Sum

In [12]:
op_name = 'sum'  # name here
cnts = PerfCounters()
for backend_name, B in BACKENDS.items():
    for shape_name, inputs in INPUTS2D[backend_name].items():
        cnt_name = "%s_%s_%s" % (backend_name, op_name, shape_name)
        cnts.start(cnt_name)
        for _ in range(NUM_REPS):
            B.sum(inputs, axis=-1)  # op here
        cnts.stop(cnt_name)
        
overall_cnts.merge(cnts)
display_results(cnts)

In [17]:
op_name = 'mean'  # name here
cnts = PerfCounters()
for backend_name, B in BACKENDS.items():
    for shape_name, inputs in INPUTS2D[backend_name].items():
        cnt_name = "%s_%s_%s" % (backend_name, op_name, shape_name)
        cnts.start(cnt_name)
        for _ in range(NUM_REPS):
            B.mean(inputs, axis=-1)  # op here
        cnts.stop(cnt_name)
        
overall_cnts.merge(cnts)
display_results(cnts)

In [None]:
# utils

In [7]:
op_name = 'shuffle'  # name here
cnts = PerfCounters()
for backend_name, B in BACKENDS.items():
    for shape_name, inputs in INPUTS2D[backend_name].items():
        cnt_name = "%s_%s_%s" % (backend_name, op_name, shape_name)
        cnts.start(cnt_name)
        for _ in range(NUM_REPS):
            B.shuffle(inputs, axis=-1)  # op here
        cnts.stop(cnt_name)
        
overall_cnts.merge(cnts)
display_results(cnts)

## Overall backend performance

In [None]:
overall_cnts.report()

# backend benchmarking

## Sum

In [None]:
op_cnts = PerfCounters()


In [None]:
#TODO: test fitness function and look for cosine_similarity if tiling versus broadcast improve performance (can potentially cache the tiling)

# evoflow Ops benchmarking

In [None]:
# graveyard

def display_table_results(cnts):
    results  = defaultdict(dict)    
    # group results
    for cnt_name, value in cnts.get_all()['Timing counters']:
        backend_name, op_name, shape_name = cnt_name.split("_")
        results[shape_name][backend_name] = value * 1000 # ms
    
    # to pandas
    data = defaultdict(list)
    for shape_name, res in results.items():
        data['shape'].append(shape_name)
        for backend_name in BACKENDS.keys():
            data[backend_name].append(results[shape_name][backend_name])

    print(pd.DataFrame(data))

    #print(data)
#     results = 
