In [1]:
import sqlite3

import pandas

from data_algebra.data_ops import *
from data_algebra.cdata import *
import data_algebra.SQLite
from data_algebra.util import pandas_to_example_str


iris = pandas.read_csv('iris_small.csv')
# print it out
iris

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,id
0,5.1,3.5,1.4,0.2,setosa,0
1,4.9,3.0,1.4,0.2,setosa,1
2,4.7,3.2,1.3,0.2,setosa,2


In [2]:
print(pandas_to_example_str(iris))

pandas.DataFrame({
    'Sepal.Length': [5.1, 4.9, 4.7],
    'Sepal.Width': [3.5, 3.0, 3.2],
    'Petal.Length': [1.4, 1.4, 1.3],
    'Petal.Width': [0.2, 0.2, 0.2],
    'Species': ['setosa', 'setosa', 'setosa'],
    'id': [0, 1, 2],
    })


In [3]:
td = describe_table(iris, 'iris')
td

TableDescription(
 table_name='iris',
 column_names=[
   'Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',  
 'Species', 'id'])

In [4]:
control_table = pandas.DataFrame(
    {
        "Part": ["Sepal", "Sepal", "Petal", "Petal"],
        "Measure": ["Length", "Width", "Length", "Width"],
        "Value": ["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"],
    }
)

record_spec = data_algebra.cdata.RecordSpecification(
    control_table,
    control_table_keys = ['Part', 'Measure'],
    record_keys = ['id', 'Species']
    )
record_spec

data_algebra.cdata.RecordSpecification(
    record_keys=['id', 'Species'],
    control_table=pandas.DataFrame({
    'Part': ['Sepal', 'Sepal', 'Petal', 'Petal'],
    'Measure': ['Length', 'Width', 'Length', 'Width'],
    'Value': ['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'],
    }),
    control_table_keys=['Part', 'Measure'])

In [5]:
ops = td.convert_records(record_map=RecordMap(blocks_out=record_spec))
ops

TableDescription(
 table_name='iris',
 column_names=[
   'Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width',  
 'Species', 'id']) .\
   convert_record(data_algebra.cdata.RecordMap(
       blocks_in=None,
       blocks_out=data_algebra.cdata.RecordSpecification(
       record_keys=['id', 'Species'],
       control_table=pandas.DataFrame({
       'Part': ['Sepal', 'Sepal', 'Petal', 'Petal'],
       'Measure': ['Length', 'Width', 'Length', 'Width'],
       'Value': ['Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'],
       }),
       control_table_keys=['Part', 'Measure']))
,   blocks_out_table=TableDescription(
    table_name='cdata_temp_record',
    column_names=[
      'id', 'Species', 'Part', 'Measure', 'Value']))

In [6]:
type(ops)

data_algebra.data_ops.ConvertRecordsNode

In [7]:
db_model = data_algebra.SQLite.SQLiteModel()
sql_str = ops.to_sql(db_model, pretty=True)
print(sql_str)

SELECT a."id" AS "id",
       a."Species" AS "Species",
       b."Part" AS "Part",
       b."Measure" AS "Measure",
       CASE
           WHEN b."Value" = 'Sepal.Length' THEN a."Sepal.Length"
           WHEN b."Value" = 'Sepal.Width' THEN a."Sepal.Width"
           WHEN b."Value" = 'Petal.Length' THEN a."Petal.Length"
           WHEN b."Value" = 'Petal.Width' THEN a."Petal.Width"
           ELSE NULL
       END AS "Value"
FROM ("iris") a
CROSS JOIN ("cdata_temp_record") b
ORDER BY a."id",
         a."Species",
         b."Part",
         b."Measure"


In [8]:
conn = sqlite3.connect(":memory:")
db_model.prepare_connection(conn)

db_model.insert_table(conn, iris, 'iris')

db_model.insert_table(conn, ops.record_map.blocks_out.control_table, ops.blocks_out_table.table_name)

db_model.read_query(conn, sql_str)


Unnamed: 0,id,Species,Part,Measure,Value
0,0,setosa,Petal,Length,1.4
1,0,setosa,Petal,Width,0.2
2,0,setosa,Sepal,Length,5.1
3,0,setosa,Sepal,Width,3.5
4,1,setosa,Petal,Length,1.4
5,1,setosa,Petal,Width,0.2
6,1,setosa,Sepal,Length,4.9
7,1,setosa,Sepal,Width,3.0
8,2,setosa,Petal,Length,1.3
9,2,setosa,Petal,Width,0.2


In [9]:
conn.close()