In [1]:
import pandas

from data_algebra.cdata import *

data = pandas.DataFrame({
    'record_id': [1, 1, 1, 2, 2, 2],
    'row': ['row1', 'row2', 'row3', 'row1', 'row2', 'row3'],
    'col1': [1, 4, 7, 11, 14, 17],
    'col2': [2, 5, 8, 12, 15, 18],
    'col3': [3, 6, 9, 13, 16, 19],
})

data

Unnamed: 0,record_id,row,col1,col2,col3
0,1,row1,1,2,3
1,1,row2,4,5,6
2,1,row3,7,8,9
3,2,row1,11,12,13
4,2,row2,14,15,16
5,2,row3,17,18,19


In [2]:
record_keys = ['record_id']

incoming_shape = pandas.DataFrame({
    'row': ['row1', 'row2', 'row3'],
    'col1': ['v11', 'v21', 'v31'],
    'col2': ['v12', 'v22', 'v32'],
    'col3': ['v13', 'v23', 'v33'],
})

incoming_shape

Unnamed: 0,row,col1,col2,col3
0,row1,v11,v12,v13
1,row2,v21,v22,v23
2,row3,v31,v32,v33


In [3]:
outgoing_shape = pandas.DataFrame({
    'column_label': ['rec_col1', 'rec_col2', 'rec_col3'],
    'c_row1': ['v11', 'v12', 'v13'],
    'c_row2': ['v21', 'v22', 'v23'],
    'c_row3': ['v31', 'v32', 'v33'],
})

outgoing_shape

Unnamed: 0,column_label,c_row1,c_row2,c_row3
0,rec_col1,v11,v21,v31
1,rec_col2,v12,v22,v32
2,rec_col3,v13,v23,v33


In [4]:
record_map = RecordMap(
    blocks_in=data_algebra.cdata.RecordSpecification(
        control_table=incoming_shape,
        record_keys=record_keys
    ),
    blocks_out=data_algebra.cdata.RecordSpecification(
        control_table=outgoing_shape,
        record_keys=record_keys
    ),
)

record_map

data_algebra.cdata.RecordMap(
    blocks_in=data_algebra.cdata.RecordSpecification(
    record_keys=['record_id'],
    control_table=pandas.DataFrame({
    'row': ['row1', 'row2', 'row3'],
    'col1': ['v11', 'v21', 'v31'],
    'col2': ['v12', 'v22', 'v32'],
    'col3': ['v13', 'v23', 'v33'],
    }),
    control_table_keys=['row']),
    blocks_out=data_algebra.cdata.RecordSpecification(
    record_keys=['record_id'],
    control_table=pandas.DataFrame({
    'column_label': ['rec_col1', 'rec_col2', 'rec_col3'],
    'c_row1': ['v11', 'v12', 'v13'],
    'c_row2': ['v21', 'v22', 'v23'],
    'c_row3': ['v31', 'v32', 'v33'],
    }),
    control_table_keys=['column_label']))

In [5]:
res = record_map.transform(data)

res

Unnamed: 0,record_id,column_label,c_row1,c_row2,c_row3
0,1,rec_col1,1,4,7
1,1,rec_col2,2,5,8
2,1,rec_col3,3,6,9
3,2,rec_col1,11,14,17
4,2,rec_col2,12,15,18
5,2,rec_col3,13,16,19


In [6]:
inv = record_map.inverse()

inv

data_algebra.cdata.RecordMap(
    blocks_in=data_algebra.cdata.RecordSpecification(
    record_keys=['record_id'],
    control_table=pandas.DataFrame({
    'column_label': ['rec_col1', 'rec_col2', 'rec_col3'],
    'c_row1': ['v11', 'v12', 'v13'],
    'c_row2': ['v21', 'v22', 'v23'],
    'c_row3': ['v31', 'v32', 'v33'],
    }),
    control_table_keys=['column_label']),
    blocks_out=data_algebra.cdata.RecordSpecification(
    record_keys=['record_id'],
    control_table=pandas.DataFrame({
    'row': ['row1', 'row2', 'row3'],
    'col1': ['v11', 'v21', 'v31'],
    'col2': ['v12', 'v22', 'v32'],
    'col3': ['v13', 'v23', 'v33'],
    }),
    control_table_keys=['row']))

In [7]:
inv.transform(res)

Unnamed: 0,record_id,row,col1,col2,col3
0,1,row1,1,2,3
1,1,row2,4,5,6
2,1,row3,7,8,9
3,2,row1,11,12,13
4,2,row2,14,15,16
5,2,row3,17,18,19


In [8]:
inp = record_map.example_input()

inp

Unnamed: 0,row,col1,col2,col3,record_id
0,row1,v11,v12,v13,record_id
1,row2,v21,v22,v23,record_id
2,row3,v31,v32,v33,record_id


In [9]:
out = record_map.transform(inp)

out

Unnamed: 0,record_id,column_label,c_row1,c_row2,c_row3
0,record_id,rec_col1,v11,v21,v31
1,record_id,rec_col2,v12,v22,v32
2,record_id,rec_col3,v13,v23,v33


Compose steps.

In [10]:

step1 = RecordMap(
    blocks_in=data_algebra.cdata.RecordSpecification(
        control_table=incoming_shape,
        record_keys=record_keys
    ),
)


step2 = RecordMap(
    blocks_out=data_algebra.cdata.RecordSpecification(
        control_table=outgoing_shape,
        record_keys=record_keys
    ),
)

step2.transform(step1.transform(data))

Unnamed: 0,record_id,column_label,c_row1,c_row2,c_row3
0,1,rec_col1,1,4,7
1,1,rec_col2,2,5,8
2,1,rec_col3,3,6,9
3,2,rec_col1,11,14,17
4,2,rec_col2,12,15,18
5,2,rec_col3,13,16,19


In [11]:
both = step2.compose(step1)

both

data_algebra.cdata.RecordMap(
    blocks_in=data_algebra.cdata.RecordSpecification(
    record_keys=['record_id'],
    control_table=pandas.DataFrame({
    'row': ['row1', 'row2', 'row3'],
    'col1': ['v11', 'v21', 'v31'],
    'col2': ['v12', 'v22', 'v32'],
    'col3': ['v13', 'v23', 'v33'],
    }),
    control_table_keys=['row']),
    blocks_out=data_algebra.cdata.RecordSpecification(
    record_keys=['record_id'],
    control_table=pandas.DataFrame({
    'column_label': ['rec_col1', 'rec_col2', 'rec_col3'],
    'c_row1': ['v11', 'v12', 'v13'],
    'c_row2': ['v21', 'v22', 'v23'],
    'c_row3': ['v31', 'v32', 'v33'],
    }),
    control_table_keys=['column_label']))

In [12]:
both.transform(data)

Unnamed: 0,record_id,column_label,c_row1,c_row2,c_row3
0,1,rec_col1,1,4,7
1,1,rec_col2,2,5,8
2,1,rec_col3,3,6,9
3,2,rec_col1,11,14,17
4,2,rec_col2,12,15,18
5,2,rec_col3,13,16,19
