From be919d86df45cc41b58626f5a70181f1e5616133 Mon Sep 17 00:00:00 2001 From: Ajinkya Ghonge Date: Wed, 5 Apr 2023 13:11:09 -0700 Subject: [PATCH] Integrate OramEncoder library with pc_translator. (#2269) Summary: Pull Request resolved: https://github.com/facebookresearch/fbpcs/pull/2269 # Context As per PC Translator design, we need a runtime library will be called during PC run. This library will be called at the beginning of PC run to encode specified fields in publisher side input into a encoded breakdown (aggregation) Ids based on active PC instruction sets for the run. The library will filter the active PC Instruction sets for the run based on parsing the pcs_features i.e. gatekeepers for the particular run. # Product decisions In this stack we would focus solely on functionality required for private lift runs. We would focus on the MVP implementation of the library and its integration with fbpcf ORAM encoder library in this stack. # Stack 1. Create runtime pc_translator library. 2. Add logic to retrieve and parse PC instruction set, filtered based on the active gatekeepers for the run. 3. Integrate pc_translator library with fbpcf ORAM encoder. 4. Add logic to generate transformed publisher output with encoded breakdown ID and write the output. # In this diff Integrate pc_translator library with fbpcf ORAM encoder. Differential Revision: D44634384 Privacy Context Container: L416713 fbshipit-source-id: efc8dc41918004f626a91aedd7ac6211cb1939d3 --- fbpcs/pc_translator/PCTranslator.cpp | 65 ++++++++++++++++--- fbpcs/pc_translator/PCTranslator.h | 22 +++---- .../pc_translator/tests/TestPCTranslator.cpp | 36 ++++++++++ .../input_processing/TestPCInstructionSet.cpp | 5 +- ...son => pc_instr_test_instruction_set.json} | 4 -- .../tests/publisher_unittest.csv | 13 ++++ 6 files changed, 118 insertions(+), 27 deletions(-) create mode 100644 fbpcs/pc_translator/tests/TestPCTranslator.cpp rename fbpcs/pc_translator/tests/input_processing/{test_instruction_set.json => pc_instr_test_instruction_set.json} (89%) create mode 100644 fbpcs/pc_translator/tests/publisher_unittest.csv diff --git a/fbpcs/pc_translator/PCTranslator.cpp b/fbpcs/pc_translator/PCTranslator.cpp index 6c6ee7e07..53381d5c0 100644 --- a/fbpcs/pc_translator/PCTranslator.cpp +++ b/fbpcs/pc_translator/PCTranslator.cpp @@ -10,7 +10,12 @@ #include #include +#include +#include +#include +#include #include +#include "fbpcs/emp_games/common/Csv.h" #include "folly/String.h" namespace pc_translator { @@ -20,8 +25,8 @@ std::string PCTranslator::encode(const std::string& inputDataset) { PCTranslator::retrieveInstructionSetNamesForRun(pcsFeatures_); auto pcInstructionSets = PCTranslator::retrieveInstructionSets(validInstructionSetNames); - PCTranslator::transformDataset(inputDataset, pcInstructionSets); - return ""; + return PCTranslator::transformDataset( + inputDataset, pcInstructionSets.front()); } std::string PCTranslator::decode( @@ -34,7 +39,13 @@ PCTranslator::retrieveInstructionSets( std::vector& instructionSetNames) { std::vector> pcInstructionSets; for (auto instructionSetName : instructionSetNames) { - auto file_path = instructionSetBasePath + instructionSetName + ".json"; + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), '\''), + instructionSetName.end()); + instructionSetName.erase( + remove(instructionSetName.begin(), instructionSetName.end(), ' '), + instructionSetName.end()); + auto file_path = instructionSetBasePath_ + instructionSetName + ".json"; auto contents = fbpcf::io::FileIOWrappers::readFile(file_path); pcInstructionSets.push_back(PCTranslator::parseInstructionSet(contents)); } @@ -55,16 +66,52 @@ std::vector PCTranslator::retrieveInstructionSetNamesForRun( enabledFeatureFlags.begin(), enabledFeatureFlags.end(), std::back_inserter(validPCInstructionSets), - [](const std::string& feature) { return feature.find("pc_instr") == 0; }); + [](const std::string& feature) { + return feature.find("pc_instr") != std::string::npos; + }); return validPCInstructionSets; } -void PCTranslator::transformDataset( - const std::string& /* inputData */, - const std::vector>& - pcInstructionSets) { - throw std::runtime_error("Unimplemented"); +std::string PCTranslator::transformDataset( + const std::string& inputData, + std::shared_ptr pcInstructionSet) { + // Parse the input CSV + auto lineNo = 0; + std::vector> inputColums; + private_measurement::csv::readCsv( + inputData, + [&](const std::vector& header, + const std::vector& parts) { + std::vector inputColumnPerRow; + for (std::vector::size_type i = 0; i < header.size(); + ++i) { + auto column = header[i]; + auto value = std::atoi(parts[i].c_str()); + auto iter = std::find( + pcInstructionSet->getGroupByIds().begin(), + pcInstructionSet->getGroupByIds().end(), + column); + if (iter != pcInstructionSet->getGroupByIds().end()) { + inputColumnPerRow.push_back(value); + } + } + + inputColums.push_back(inputColumnPerRow); + lineNo++; + }); + + auto filters = std::make_unique< + std::vector>>(0); + std::unique_ptr encoder = + std::make_unique( + std::move(filters)); + + auto encodedIndexes = encoder->generateORAMIndexes(inputColums); + + // TODO : Append the enodedIndexes at the end of publisher output and return + // output path. + return ""; } std::shared_ptr PCTranslator::parseInstructionSet( diff --git a/fbpcs/pc_translator/PCTranslator.h b/fbpcs/pc_translator/PCTranslator.h index ed59d6915..cebf5adb7 100644 --- a/fbpcs/pc_translator/PCTranslator.h +++ b/fbpcs/pc_translator/PCTranslator.h @@ -26,12 +26,13 @@ class PCTranslator { explicit PCTranslator(const std::string& pcsFeatures) : pcsFeatures_(pcsFeatures) {} - /* - * Method to encode the configurable fields in input dataset as per the active - * pc instruction sets for the run. This method will output the path of - * transformed input dataset, which can be used in further PC run. - */ - std::string encode(const std::string& inputDataset); + explicit PCTranslator( + const std::string& pcs_features, + const std::string& instruction_set_base_path) + : pcs_features_(pcs_features), + instruction_set_base_path_(instruction_set_base_path) {} + + std::string encode(const std::string& input_dataset); /* * Method to decode final aggregated output with the encoded breakdown Ids as @@ -42,8 +43,8 @@ class PCTranslator { std::string decode(const std::string& aggregatedOutputDataset); private: - std::string pcsFeatures_; - const std::string instructionSetBasePath = + std::string pcsfeatures_; + std::string instructionSetBasePath = "https://pc-translator.s3.us-west-2.amazonaws.com/"; std::vector> retrieveInstructionSets( std::vector& instructionSetNames); @@ -51,10 +52,9 @@ class PCTranslator { const std::string& pcsFeatures); std::shared_ptr parseInstructionSet( const std::string& instructionSet); - void transformDataset( + std::string transformDataset( const std::string& input_data, - const std::vector>& - pcInstructionSets); + std::shared_ptr pcInstructionSet); }; } // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/TestPCTranslator.cpp b/fbpcs/pc_translator/tests/TestPCTranslator.cpp new file mode 100644 index 000000000..f182efa9a --- /dev/null +++ b/fbpcs/pc_translator/tests/TestPCTranslator.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include "../../emp_games/common/TestUtil.h" +#include "fbpcs/pc_translator/PCTranslator.h" + +namespace pc_translator { +class TestPCTranslator : public ::testing::Test { + public: + protected: + std::string pcs_features_; + std::string test_instruction_set_base_path_; + std::string test_publisher_input_path_; + + void SetUp() override { + pcs_features_ = + "'num_mpc_container_mutation', 'private_lift_unified_data_process', 'pc_instr_test_instruction_set'"; + std::string baseDir = + private_measurement::test_util::getBaseDirFromPath(__FILE__); + test_instruction_set_base_path_ = baseDir + "input_processing/"; + test_publisher_input_path_ = baseDir + "publisher_unittest.csv"; + } +}; + +TEST_F(TestPCTranslator, TestEncode) { + auto pcTranslator = std::make_shared( + pcs_features_, test_instruction_set_base_path_); + auto outputPath = pcTranslator->encode(test_publisher_input_path_); + EXPECT_EQ(outputPath, ""); +} +} // namespace pc_translator diff --git a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp index 32a38ff3a..d4ea3fd57 100644 --- a/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp +++ b/fbpcs/pc_translator/tests/input_processing/TestPCInstructionSet.cpp @@ -13,7 +13,6 @@ #include #include "../../../emp_games/common/TestUtil.h" #include "fbpcs/pc_translator/input_processing/PCInstructionSet.h" -#include "folly/Random.h" namespace pc_translator { class TestPCInstructionSet : public ::testing::Test { @@ -24,7 +23,7 @@ class TestPCInstructionSet : public ::testing::Test { void SetUp() override { std::string baseDir = private_measurement::test_util::getBaseDirFromPath(__FILE__); - testInstructionSetPath_ = baseDir + "test_instruction_set.json"; + testInstructionSetPath_ = baseDir + "pc_instr_test_instruction_set.json"; } }; @@ -35,7 +34,7 @@ TEST_F(TestPCInstructionSet, TestStandardWorkflowTest) { auto groupByIds = pcInstructionSet->getGroupByIds(); auto filterConstraints = pcInstructionSet->getFilterConstraints(); EXPECT_EQ(groupByIds.size(), 2); - EXPECT_EQ(filterConstraints.size(), 4); + EXPECT_EQ(filterConstraints.size(), 3); EXPECT_EQ(filterConstraints[0].getName(), "gender"); EXPECT_EQ(filterConstraints[0].getType(), "EQ"); EXPECT_EQ(filterConstraints[0].getValue(), 0); diff --git a/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json similarity index 89% rename from fbpcs/pc_translator/tests/input_processing/test_instruction_set.json rename to fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json index 7a0008b19..7fa183d2f 100644 --- a/fbpcs/pc_translator/tests/input_processing/test_instruction_set.json +++ b/fbpcs/pc_translator/tests/input_processing/pc_instr_test_instruction_set.json @@ -30,10 +30,6 @@ { "constraint_type": "EQ", "value": "0" - }, - { - "constraint_type": "EQ", - "value": "1" } ] }, diff --git a/fbpcs/pc_translator/tests/publisher_unittest.csv b/fbpcs/pc_translator/tests/publisher_unittest.csv new file mode 100644 index 000000000..691ecff30 --- /dev/null +++ b/fbpcs/pc_translator/tests/publisher_unittest.csv @@ -0,0 +1,13 @@ +id_,opportunity,test_flag,opportunity_timestamp, age, gender +cfcd208495d565ef66e7dff9f98764da,1,0,1600000430, 25, 0 +c4ca4238a0b923820dcc509a6f75849b,1,1,1600000401, 26, 1 +c81e728d9d4c2f636f067f89cc14862c,0,0,0, 44, 0 +eccbc87e4b5ce2fe28308fd9f2a7baf3,0,0,0, 23, 0 +a87ff679a2f3e71d9181a67b7542122c,0,0,0, 25, 0 +e4da3b7fbbce2345d7772b0674a318d5,1,1,1600000461, 24, 1 +1679091c5a880faf6fb5e6087eb1b2dc,1,0,1600000052, 25, 1 +8f14e45fceea167a5a36dedd4bea2543,1,0,1600000831, 26, 0 +c9f0f895fb98ab9159f51fd0297e236d,1,0,1600000530, 50, 0 +45c48cce2e2d7fbdea1afc51c7c6ad26,1,0,1600000972, 25, 1 +d3d9446802a44259755d38e6d163e820,0,0,0, 25, 0 +6512bd43d9caa6e02c990b0a82652dca,0,0,0, 25, 0