In [1]:
import pandas as pd
import numpy as np
from data_algebra.data_ops import descr
from data_algebra.solutions import def_multi_column_map
import data_algebra.test_util
import data_algebra.BigQuery

In [2]:
d = pd.DataFrame({
    'id': [1, 2, 3, 4],
    'va': ['a', 'b', 'a', 'c'],
    'vb': ['e', 'e', 'g', 'f'],
})

m = pd.DataFrame({
    'column_name': ['va', 'va', 'vb', 'vb'],
    'column_value': ['a', 'b', 'e', 'f'],
    'mapped_value': [1., 2., 3., 4.],
})

row_keys = ['id']
cols_to_map = ['va', 'vb']
ops = def_multi_column_map(
    descr(d=d),
    mapping_table=descr(m=m),
    row_keys=row_keys,
    cols_to_map=cols_to_map,
)
res = ops.eval({'d': d, 'm': m})

res

Unnamed: 0,id,va,vb
0,1,1.0,3.0
1,2,2.0,3.0
2,3,1.0,
3,4,,4.0


In [3]:
expect = pd.DataFrame({
    'id': [1, 2, 3, 4],
    'va': [1.0, 2.0, 1.0, np.nan],
    'vb': [3.0, 3.0, np.nan, 4.0],
})

assert data_algebra.test_util.equivalent_frames(res, expect)

In [4]:
db_model = data_algebra.BigQuery.BigQueryModel()
print(db_model.to_sql(ops))

-- data_algebra SQL https://github.com/WinVector/data_algebra
--  dialect: BigQueryModel
--       string quote: "
--   identifier quote: `
WITH
 `convert_records_blocks_out_0` AS (
  -- convert records blocks out
  SELECT
     a.`id` AS `id`,
     b.`column_name` AS `column_name`,
     CASE   WHEN CAST(b.`column_value` AS STRING) = "va" THEN a.`va`   WHEN CAST(b.`column_value` AS STRING) = "vb" THEN a.`vb`  ELSE NULL END AS `column_value`
   FROM ( SELECT * FROM
   `d`
    ) a
   CROSS JOIN (
    SELECT
     *
    FROM (
        (SELECT "va" AS `column_name`, "va" AS `column_value`)
        UNION ALL (SELECT "vb" AS `column_name`, "vb" AS `column_value`)
    ) `table_values`
    ) b
    ORDER BY
    a.`id`,
    b.`column_name`
 ) ,
 `natural_join_1` AS (
  SELECT  -- _0..natural_join(b= _1, by=['column_name', 'column_value'], jointype='LEFT')
   COALESCE(`convert_records_blocks_out_0`.`column_name`, `m`.`column_name`) AS `column_name` ,
   COALESCE(`convert_records_blocks_out_0`.`colum