Note: some special functions, such as `is_in` can not be used in
`select_row`, but instead must be landed as values.

In [1]:
import sqlite3

import numpy
import pandas

from data_algebra.data_ops import *
import data_algebra.util
import data_algebra.test_util
import data_algebra.SQLite

import pytest

# some example data
d = pandas.DataFrame({
    'ID': [1, 1, 2, 3, 4, 4, 4, 4, 5, 5, 6],
    'OP': ['A', 'B', 'A', 'D', 'C', 'A', 'D', 'B', 'A', 'B', 'B'],
})

d

Unnamed: 0,ID,OP
0,1,A
1,1,B
2,2,A
3,3,D
4,4,C
5,4,A
6,4,D
7,4,B
8,5,A
9,5,B


In [2]:
ops = describe_table(d, table_name='d'). \
    extend({'v': 'ID.is_in([3, 4])'})
ops_str = str(ops)  # see if this throws
d2 = ops.transform(d)

d2

Unnamed: 0,ID,OP,v
0,1,A,False
1,1,B,False
2,2,A,False
3,3,D,True
4,4,C,True
5,4,A,True
6,4,D,True
7,4,B,True
8,5,A,False
9,5,B,False


In [3]:
expect = pandas.DataFrame({
    'ID': [1, 1, 2, 3, 4, 4, 4, 4, 5, 5, 6],
    'OP': ['A', 'B', 'A', 'D', 'C', 'A', 'D', 'B', 'A', 'B', 'B'],
    'v': [False]*3 + [True]*5 + [False]*3,
})

assert data_algebra.test_util.equivalent_frames(expect, d2)

In [7]:
db_model = data_algebra.SQLite.SQLiteModel()
sql = ops.to_sql(db_model, pretty=True)

print(sql)

SELECT "ID" IN ('[',
                '3',
                ',',
                ' ',
                '4',
                ']') AS "v",
       "OP",
       "ID"
FROM "d"


In [5]:
with sqlite3.connect(':memory:') as con:
    db_model.prepare_connection(con)
    d.to_sql(name='d', con=con)
    res_db = pandas.read_sql(sql, con=con)

res_db


Unnamed: 0,v,OP,ID
0,0,A,1
1,0,B,1
2,0,A,2
3,1,D,3
4,1,C,4
5,1,A,4
6,1,D,4
7,1,B,4
8,0,A,5
9,0,B,5


In [6]:
assert data_algebra.test_util.equivalent_frames(expect, res_db)
