In [1]:
import sqlite3
import pandas
import data_algebra.cdata
import data_algebra.SQLite
from data_algebra.data_ops import *
from data_algebra.data_pipe import *

In [2]:
iris = pandas.read_csv('iris.csv')
iris = iris.iloc[range(3), :]
iris.reset_index(inplace=True, drop=True)
iris['id'] = [i for i in range(iris.shape[0])]
iris

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,id
0,5.1,3.5,1.4,0.2,setosa,0
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,2


In [3]:
# from:
#   https://github.com/WinVector/cdata/blob/master/vignettes/control_table_keys.Rmd

control_table = pandas.DataFrame({
    'Part': ["Sepal", "Sepal", "Petal", "Petal"],
    'Measure': ["Length", "Width", "Length", "Width"],
    'Value': ["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"]
})
record_spec = data_algebra.cdata.RecordSpecification(
    control_table,
    control_table_keys = ['Part', 'Measure'],
    record_keys = ['id', 'Species']
    )
record_spec

RecordSpecification
   record_keys: ['id', 'Species']
   control_table_keys: ['Part', 'Measure']
   control_table:
       Part Measure         Value
   0  Sepal  Length  Sepal.Length
   1  Sepal   Width   Sepal.Width
   2  Petal  Length  Petal.Length
   3  Petal   Width   Petal.Width

In [4]:
db_model = data_algebra.SQLite.SQLiteModel()

source_table = data_algebra.data_ops.TableDescription(
    'iris',
    ['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width', 'Species']
    )

temp_table = data_algebra.data_ops.TableDescription(
    'control_table',
    record_spec.control_table.columns
    )
    
conn = sqlite3.connect(':memory:')
cur = conn.cursor()

sql = db_model.row_recs_to_blocks_query(source_table, record_spec, temp_table)
print(sql)

SELECT
 a."id" AS "id",
 a."Species" AS "Species",
 b."Part" AS "Part",
 b."Measure" AS "Measure",
 CASE
  WHEN b."Value" = 'Sepal.Length' THEN a."Sepal.Length"
  WHEN b."Value" = 'Sepal.Width' THEN a."Sepal.Width"
  WHEN b."Value" = 'Petal.Length' THEN a."Petal.Length"
  WHEN b."Value" = 'Petal.Width' THEN a."Petal.Width"
  ELSE NULL END AS "Value"
FROM "iris" a
CROSS JOIN "control_table" b
 ORDER BY a."id", a."Species", b."Part", b."Measure"


In [5]:
db_model.insert_table(conn, iris, 'iris')
db_model.insert_table(conn, record_spec.control_table, temp_table.table_name)

res_blocks = db_model.read_query(conn, sql)
res_blocks

Unnamed: 0,id,Species,Part,Measure,Value
0,0,setosa,Petal,Length,1.4
1,0,setosa,Petal,Width,0.2
2,0,setosa,Sepal,Length,5.1
3,0,setosa,Sepal,Width,3.5
4,1,setosa,Petal,Length,1.4
5,1,setosa,Petal,Width,0.2
6,1,setosa,Sepal,Length,4.9
7,1,setosa,Sepal,Width,3.0
8,2,setosa,Petal,Length,1.3
9,2,setosa,Petal,Width,0.2


In [6]:
db_model.insert_table(conn, res_blocks, 'res_blocks')
source_table2 = data_algebra.data_ops.TableDescription(
    'res_blocks',
    ['Species', 'Part', 'Measure', 'Value']
    )

sql_back = db_model.blocks_to_row_recs_query(source_table2, record_spec)
print(sql_back)

SELECT
 "id" AS "id",
 "Species" AS "Species",
 MAX(CASE WHEN  ( "Part" = 'Sepal' )  AND  ( "Measure" = 'Length' )  THEN "Value" ELSE NULL END) AS "Sepal.Length",
 MAX(CASE WHEN  ( "Part" = 'Sepal' )  AND  ( "Measure" = 'Width' )  THEN "Value" ELSE NULL END) AS "Sepal.Width",
 MAX(CASE WHEN  ( "Part" = 'Petal' )  AND  ( "Measure" = 'Length' )  THEN "Value" ELSE NULL END) AS "Petal.Length",
 MAX(CASE WHEN  ( "Part" = 'Petal' )  AND  ( "Measure" = 'Width' )  THEN "Value" ELSE NULL END) AS "Petal.Width"
FROM "res_blocks"
 GROUP BY "id", "Species"
 ORDER BY "id", "Species"


In [7]:
res_rows = db_model.read_query(conn, sql_back)
res_rows

Unnamed: 0,id,Species,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
0,0,setosa,5.1,3.5,1.4,0.2
1,1,setosa,4.9,3.0,1.4,0.2
2,2,setosa,4.7,3.2,1.3,0.2


In [8]:
conn.close()