In [1]:
import pandas

d = pandas.DataFrame({
    'ID': [1, 1, 2, 3, 4, 4, 4, 4, 5, 5, 6],
    'OP': ['A', 'B', 'A', 'D', 'C', 'A', 'D', 'B', 'A', 'B', 'B'],
    'DATE': ['2001-01-02 00:00:00', '2015-04-25 00:00:00', '2000-04-01 00:00:00', 
             '2014-04-07 00:00:00', '2012-12-01 00:00:00', '2005-06-16 00:00:00', 
             '2009-01-20 00:00:00', '2009-01-20 00:00:00', '2010-10-10 00:00:00', 
             '2003-11-09 00:00:00', '2004-01-09 00:00:00'],
    })

d

Unnamed: 0,ID,OP,DATE
0,1,A,2001-01-02 00:00:00
1,1,B,2015-04-25 00:00:00
2,2,A,2000-04-01 00:00:00
3,3,D,2014-04-07 00:00:00
4,4,C,2012-12-01 00:00:00
5,4,A,2005-06-16 00:00:00
6,4,D,2009-01-20 00:00:00
7,4,B,2009-01-20 00:00:00
8,5,A,2010-10-10 00:00:00
9,5,B,2003-11-09 00:00:00


In [2]:
diagram = pandas.DataFrame({
    'rank': ['1', '2', '3'],
    'DATE': ['DATE1', 'DATE2', 'DATE3'],
    'OP': ['OP1', 'OP2', 'OP3']
})

diagram

Unnamed: 0,rank,DATE,OP
0,1,DATE1,OP1
1,2,DATE2,OP2
2,3,DATE3,OP3


In [3]:
from data_algebra.cdata import *

record_map = RecordMap(
    blocks_in=RecordSpecification(
        control_table=diagram,
        record_keys=['ID']
    ))

print(str(record_map))

Transform block records of structure:
RecordSpecification
   record_keys: ['ID']
   control_table_keys: ['rank']
   control_table:
     rank   DATE   OP
   0    1  DATE1  OP1
   1    2  DATE2  OP2
   2    3  DATE3  OP3
to row records of the form:
  record_keys: ['ID']
 ['DATE1', 'DATE2', 'DATE3', 'OP1', 'OP2', 'OP3']



In [4]:
from data_algebra.data_ops import *

ops = describe_table(d, table_name='d'). \
    extend({'rank': '_row_number()'},
           partition_by=['ID'],
           order_by=['DATE', 'OP']). \
    convert_records(record_map). \
    order_rows(['ID'])

ops.transform(d)

Unnamed: 0,ID,DATE1,OP1,DATE2,OP2,DATE3,OP3
0,1,2001-01-02 00:00:00,A,2015-04-25 00:00:00,B,,
1,2,2000-04-01 00:00:00,A,,,,
2,3,2014-04-07 00:00:00,D,,,,
3,4,2005-06-16 00:00:00,A,2009-01-20 00:00:00,B,2009-01-20 00:00:00,D
4,5,2003-11-09 00:00:00,B,2010-10-10 00:00:00,A,,
5,6,2004-01-09 00:00:00,B,,,,


In [5]:
import data_algebra.SQLite

db_model = data_algebra.SQLite.SQLiteModel()

sql = ops.to_sql(db_model, pretty=True)

print(sql)

SELECT "OP2",
       "OP3",
       "OP1",
       "DATE3",
       "DATE2",
       "ID",
       "DATE1"
FROM
  (SELECT "OP2",
          "OP3",
          "OP1",
          "DATE3",
          "DATE2",
          "ID",
          "DATE1"
   FROM
     (SELECT "ID" AS "ID",
             MAX(CASE
                     WHEN (CAST("rank" AS VARCHAR) = '1') THEN "DATE"
                     ELSE NULL
                 END) AS "DATE1",
             MAX(CASE
                     WHEN (CAST("rank" AS VARCHAR) = '1') THEN "OP"
                     ELSE NULL
                 END) AS "OP1",
             MAX(CASE
                     WHEN (CAST("rank" AS VARCHAR) = '2') THEN "DATE"
                     ELSE NULL
                 END) AS "DATE2",
             MAX(CASE
                     WHEN (CAST("rank" AS VARCHAR) = '2') THEN "OP"
                     ELSE NULL
                 END) AS "OP2",
             MAX(CASE
                     WHEN (CAST("rank" AS VARCHAR) = '3') THEN "DATE"
                     EL

In [6]:
import sqlite3

con = sqlite3.connect(":memory:")
db_model.prepare_connection(con)
d.to_sql('d', con, if_exists='replace')

res_db = pandas.read_sql_query(sql, con)
con.close()

res_db

Unnamed: 0,OP2,OP3,OP1,DATE3,DATE2,ID,DATE1
0,B,,A,,2015-04-25 00:00:00,1,2001-01-02 00:00:00
1,,,A,,,2,2000-04-01 00:00:00
2,,,D,,,3,2014-04-07 00:00:00
3,B,D,A,2009-01-20 00:00:00,2009-01-20 00:00:00,4,2005-06-16 00:00:00
4,A,,B,,2010-10-10 00:00:00,5,2003-11-09 00:00:00
5,,,B,,,6,2004-01-09 00:00:00


In [7]:
ops2 = describe_table(d). \
    project({'OP': 'OP.max()'},
            group_by=['ID', 'DATE']). \
    extend({'rank': '_row_number()'},
           partition_by=['ID'],
           order_by=['DATE']). \
    convert_records(record_map). \
    order_rows(['ID'])

ops2.transform(d)

Unnamed: 0,ID,DATE1,OP1,DATE2,OP2,DATE3,OP3
0,1,2001-01-02 00:00:00,A,2015-04-25 00:00:00,B,,
1,2,2000-04-01 00:00:00,A,,,,
2,3,2014-04-07 00:00:00,D,,,,
3,4,2005-06-16 00:00:00,A,2009-01-20 00:00:00,D,2012-12-01 00:00:00,C
4,5,2003-11-09 00:00:00,B,2010-10-10 00:00:00,A,,
5,6,2004-01-09 00:00:00,B,,,,
