The most common data conversions are converting into out of row-records. This is what is commonly called pivoting/un-pivot or melt/cast. In cdata terms it is exactly the cases where the control table has two columns.

In these cases we supply convenience functions for specifying the transforms.

In [1]:
import io
import pandas

from data_algebra.cdata import *

In [2]:
d = pandas.DataFrame({
    'n': [10000, 23778, 37556, 51335],
    'n_A': [9000.0, 21400.2, 33800.4, 46201.5],
    'n_B': [1000.0, 2377.8, 3755.6, 5133.5],
    'T': [0.005455, 0.003538, 0.002815, 0.002408],
    'E': [0.002913, 0.001889, 0.001503, 0.001286],
    'T+E': [0.008368, 0.005427, 0.004318, 0.003693],
    })

d

Unnamed: 0,n,n_A,n_B,T,E,T+E
0,10000,9000.0,1000.0,0.005455,0.002913,0.008368
1,23778,21400.2,2377.8,0.003538,0.001889,0.005427
2,37556,33800.4,3755.6,0.002815,0.001503,0.004318
3,51335,46201.5,5133.5,0.002408,0.001286,0.003693


In [3]:
mp = pivot_rowrecs_to_blocks(
    attribute_key_column="curve",  # name for new column itself naming measurements
    attribute_value_column="effect size", # name for new column containing measurement values
    record_keys=["n"], # columns defining an incoming record
    record_value_columns=["T", "T+E"],  # columns to take values from
)

mp

data_algebra.cdata.RecordMap(
    blocks_in=None,
    blocks_out=data_algebra.cdata.RecordSpecification(
    record_keys=['n'],
    control_table=pandas.DataFrame({
    'curve': ['T', 'T+E'],
    'effect size': ['T', 'T+E'],
    }),
    control_table_keys=['curve']))

In [4]:
d2 = mp.transform(d)
d2

Unnamed: 0,n,curve,effect size
0,10000,T,0.005455
1,10000,T+E,0.008368
2,23778,T,0.003538
3,23778,T+E,0.005427
4,37556,T,0.002815
5,37556,T+E,0.004318
6,51335,T,0.002408
7,51335,T+E,0.003693


We could invert the transform as follows.

In [5]:
mp.inverse()

data_algebra.cdata.RecordMap(
    blocks_in=data_algebra.cdata.RecordSpecification(
    record_keys=['n'],
    control_table=pandas.DataFrame({
    'curve': ['T', 'T+E'],
    'effect size': ['T', 'T+E'],
    }),
    control_table_keys=['curve']),
    blocks_out=None)

Or we can define the inverse (with respect to columns preserved) transform by hand.

In [6]:
inv = pivot_blocks_to_rowrecs(
    attribute_key_column="curve",
    attribute_value_column="effect size",
    record_keys=["n"],
    record_value_columns=["T", "T+E"],
)

inv

data_algebra.cdata.RecordMap(
    blocks_in=data_algebra.cdata.RecordSpecification(
    record_keys=['n'],
    control_table=pandas.DataFrame({
    'curve': ['T', 'T+E'],
    'effect size': ['T', 'T+E'],
    }),
    control_table_keys=['curve']),
    blocks_out=None)

In [7]:
inv.transform(d2)

Unnamed: 0,n,T,T+E
0,10000,0.005455,0.008368
1,23778,0.003538,0.005427
2,37556,0.002815,0.004318
3,51335,0.002408,0.003693
