From 137a18e4bd23c52fd26dfe8def1a4300b8e204c7 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Mon, 25 Jul 2016 09:41:43 -0400 Subject: [PATCH 01/10] intermediate efforts --- bin/run-querier.sh | 158 +++++++++++++++++++++++++++++++++++++++++++ bin/run-responder.sh | 0 2 files changed, 158 insertions(+) create mode 100644 bin/run-querier.sh create mode 100644 bin/run-responder.sh diff --git a/bin/run-querier.sh b/bin/run-querier.sh new file mode 100644 index 00000000..debbd8f3 --- /dev/null +++ b/bin/run-querier.sh @@ -0,0 +1,158 @@ +#!/bin/bash +# This script runs the QuerierDriver + +#Jar file - full path to jar +JAR="" +echo ${JAR} + +#QuerierDriver class +QUERIER_DRIVER="org.apache.pirk.querier.wideskies.QuerierDriver" +ile" +echo ${QUERIER_DRIVER} + + +#CLI Options + +#action -- required - 'encrypt' or 'decrypt' -- The action performed by the QuerierDriver +ACTION="" +echo ${ACTION} + +#bitset -- required for encryption -- Ensure that this bit position is set in the Paillier +#modulus (will generate Paillier moduli until finding one in which this bit is set) +BITSET="" +echo ${BITSET} + +#certainty -- required for encryption -- Certainty of prime generation for Paillier +#must be greater than or equal to 128 +CERTAINTY="" +echo ${CERTAINTY} + + + + + -c,--certainty required for encryption -- Certainty of prime generation for Paillier -- must + be greater than or equal to 128 + -dps,--dataPartitionBitSize required for encryption -- Partition bit size in data partitioning + -ds,--dataSchemas required -- Comma separated list of data schema file names + -embed,--embedSelector required for encryption -- 'true' or 'false' - Whether or not to embed the + selector in the results to reduce false positives + -embedQS,--embedQuerySchema optional (defaults to false) -- Whether or not to embed the QuerySchema in the + Query (via QueryInfo) + -h,--help Print out the help documentation for this command line execution + -hb,--hashBitSize required -- Bit size of keyed hash + -hk,--hashKey required for encryption -- String key for the keyed hash functionality + -i,--inputFile required - Fully qualified file containing input -- + The input is either: + (1) For Encryption: A query file - Contains the query selectors, one per line; + the first line must be the query number + OR + (2) For Decryption: A response file - Contains the serialized Response object + -lu,--useHDFSLookupTable required for encryption -- 'true' or 'false' -- Whether or not to generate and + use a hdfs modular exponentation lookup table + -mlu,--memLookupTable required for encryption -- 'true' or 'false' - Whether or not to generate and + use an in memory modular exponentation lookup table - only for + standalone/testing right now... + -nt,--numThreads required -- Number of threads to use for encryption/decryption + -o,--outputFile required - Fully qualified file for the result output. + The output file specifies either: + (1) For encryption: + (a) A file to contain the serialized Querier object named: -querier + AND + (b) A file to contain the serialized Query object named: -query + OR + (2) A file to contain the decryption results where each line is where each line + corresponds to one hit and is a JSON object with the schema QuerySchema + -pbs,--paillierBitSize required for encryption -- Paillier modulus size N + -qf,--querierFile required for decryption -- Fully qualified file containing the serialized + Querier object + -qn,--queryName required for encryption -- Name of the query + -qs,--querySchemas required -- Comma separated list of query schema file names + -qt,--queryType required for encryption -- Type of the query as defined in the 'schemaName' tag + of the corresponding query schema file + -srAlg,--secureRandomAlg optional - specify the SecureRandom algorithm, defaults to NativePRNG + -srProvider,--secureRandomProvider optional - specify the SecureRandom provider, defaults to SUN + +#Define the command +QUERIER_DRIVER_CMD="java -cp ${JAR} ${QUERIER_DRIVER} -a ${ACTION} -b ${BITSET} -c ${CERTAINTY} " +echo ${QUERIER_DRIVER_CMD} + +# Define the log file +LOG_FILE="LOG_${DATE}.txt" +echo ${LOG_FILE} + +#This command will execute each job and determine status. +#Results will be displayed in the log file. +{ +echo ${JOB_DISTRIBUTION_CMD} +${JOB_DISTRIBUTION_CMD} +if [ $? -ne 0 ] +then +echo "ERROR distribution JOB. SEE LOG." +${CLEANUP_CMD_DIST} +exit $? +fi +${CLEANUP_CMD_DIST} + + +#echo ${JOB_DOMAINALERT_CMD} +#${JOB_DOMAINALERT_CMD} +#if [ $? -ne 0 ] +#then +#echo "ERROR bfDomainAlert JOB. SEE LOG." +#${CLEANUP_CMD_DOMAINALERT} +#exit $? +#fi + +#echo ${JOB_DOMAINALERT_DIFF_CMD} +#${JOB_DOMAINALERT_DIFF_CMD} +#if [ $? -ne 0 ] +#then +#echo "ERROR bfDomainAlert diff JOB. SEE LOG." +#exit $? +#fi + + +echo ${JOB_MXALERT_CMD} +${JOB_MXALERT_CMD} +if [ $? -ne 0 ] +then +echo "ERROR mxAlert JOB. SEE LOG." +${CLEANUP_CMD_MXALERT} +fi + +echo ${JOB_MXALERT_DIFF_CMD} +${JOB_MXALERT_DIFF_CMD} +if [ $? -ne 0 ] +then +echo "ERROR mxDomainAlert diff JOB. SEE LOG." +fi + +echo ${JOB_DOMAINDORKING_CMD} +${JOB_DOMAINDORKING_CMD} +${CLEANUP_CMD_DOMAINDORKING} +if [ $? -ne 0 ] +then +echo "ERROR domainDorking JOB. SEE LOG." +${CLEANUP_CMD_DOMAINDORKING} +fi + +echo ${JOB_VPS_CMD} +${JOB_VPS_CMD} +${CLEANUP_CMD_VPS} +if [ $? -ne 0 ] +then +echo "ERROR VPS detection JOB. SEE LOG." +${CLEANUP_CMD_VPS} +fi + +echo ${JOB_MMG_CMD} +${JOB_MMG_CMD} +${CLEANUP_CMD_MMG} +if [ $? -ne 0 ] +then +echo "ERROR MMG profiling JOB. SEE LOG." +${CLEANUP_CMD_MMG} +fi + +exit 0 +} &> ${LOG_FILE} diff --git a/bin/run-responder.sh b/bin/run-responder.sh new file mode 100644 index 00000000..e69de29b From dbdc9694363f2b0a58628e136bb70bdb194c3b71 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Mon, 25 Jul 2016 19:13:44 -0400 Subject: [PATCH 02/10] adding scripts to aid in running the QuerierDriver and ResponderDriver --- bin/run-querier.sh | 310 +++++++++++------- bin/run-responder.sh | 268 +++++++++++++++ .../pirk/querier/wideskies/QuerierDriver.java | 1 + .../querier/wideskies/QuerierDriverCLI.java | 1 + .../responder/wideskies/ResponderDriver.java | 10 +- src/main/resources/pirk.properties | 2 +- 6 files changed, 472 insertions(+), 120 deletions(-) mode change 100644 => 100755 bin/run-querier.sh mode change 100644 => 100755 bin/run-responder.sh diff --git a/bin/run-querier.sh b/bin/run-querier.sh old mode 100644 new mode 100755 index debbd8f3..57cc5b51 --- a/bin/run-querier.sh +++ b/bin/run-querier.sh @@ -1,4 +1,23 @@ #!/bin/bash +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +############################################################################### + # This script runs the QuerierDriver #Jar file - full path to jar @@ -7,16 +26,60 @@ echo ${JAR} #QuerierDriver class QUERIER_DRIVER="org.apache.pirk.querier.wideskies.QuerierDriver" -ile" echo ${QUERIER_DRIVER} -#CLI Options +## +## CLI Options +## + +## +## Required Args +## #action -- required - 'encrypt' or 'decrypt' -- The action performed by the QuerierDriver ACTION="" echo ${ACTION} +#dataSchemas -- required -- Comma separated list of data schema file names +DATASCHEMAS="" +echo ${DATASCHEMAS} + +#inputFile - required - Fully qualified file containing input +#The input is either: +#(1) For Encryption: A query file - Contains the query selectors, one per line; +#the first line must be the query number +#OR +#(2) For Decryption: A response file - Contains the serialized Response object +INPUTFILE="" +echo ${INPUTFILE} + +#numThreads -- required -- Number of threads to use for encryption/decryption +NUMTHREADS="" +echo ${NUMTHREADS} + +#outputFile -- required - Fully qualified file for the result output. +#The output file specifies either: +#(1) For encryption: +#(a) A file to contain the serialized Querier object named: -querier +#AND +#(b) A file to contain the serialized Query object named: -query +#OR +#(2) A file to contain the decryption results where each line is where each line +#corresponds to one hit and is a JSON object with the schema QuerySchema +OUTPUTFILE="" +echo ${OUTPUTFILE} + +#querySchemas -- required -- Comma separated list of query schema file names +QUERYSCHEMAS="" +echo ${QUERYSCHEMAS} + +## +## Optional Args - Leave empty if not using/not changing default values +## + +## Optional, but required for Encryption (ignored if not encrypting) + #bitset -- required for encryption -- Ensure that this bit position is set in the Paillier #modulus (will generate Paillier moduli until finding one in which this bit is set) BITSET="" @@ -27,132 +90,147 @@ echo ${BITSET} CERTAINTY="" echo ${CERTAINTY} - - +#dataPartitionBitSize -- required for encryption -- Partition bit size in data partitioning +DATAPARTITIONBITSIZE="" +echo ${DATAPARTITIONBITSIZE} + +#embedSelector - required for encryption -- 'true' or 'false' +#Whether or not to embed the selector in the results to reduce false positives +#Defaults to 'true' +EMBEDSELECTOR="" +echo ${EMBEDSELECTOR} + +#embedQuerySchema - true or false +#Whether or not to embed the QuerySchema in the Query (via QueryInfo) +#Defaults to 'false' +EMBEDQUERYSCHEMA="" +echo ${EMBEDQUERYSCHEMA} - -c,--certainty required for encryption -- Certainty of prime generation for Paillier -- must - be greater than or equal to 128 - -dps,--dataPartitionBitSize required for encryption -- Partition bit size in data partitioning - -ds,--dataSchemas required -- Comma separated list of data schema file names - -embed,--embedSelector required for encryption -- 'true' or 'false' - Whether or not to embed the - selector in the results to reduce false positives - -embedQS,--embedQuerySchema optional (defaults to false) -- Whether or not to embed the QuerySchema in the - Query (via QueryInfo) - -h,--help Print out the help documentation for this command line execution - -hb,--hashBitSize required -- Bit size of keyed hash - -hk,--hashKey required for encryption -- String key for the keyed hash functionality - -i,--inputFile required - Fully qualified file containing input -- - The input is either: - (1) For Encryption: A query file - Contains the query selectors, one per line; - the first line must be the query number - OR - (2) For Decryption: A response file - Contains the serialized Response object - -lu,--useHDFSLookupTable required for encryption -- 'true' or 'false' -- Whether or not to generate and - use a hdfs modular exponentation lookup table - -mlu,--memLookupTable required for encryption -- 'true' or 'false' - Whether or not to generate and - use an in memory modular exponentation lookup table - only for - standalone/testing right now... - -nt,--numThreads required -- Number of threads to use for encryption/decryption - -o,--outputFile required - Fully qualified file for the result output. - The output file specifies either: - (1) For encryption: - (a) A file to contain the serialized Querier object named: -querier - AND - (b) A file to contain the serialized Query object named: -query - OR - (2) A file to contain the decryption results where each line is where each line - corresponds to one hit and is a JSON object with the schema QuerySchema - -pbs,--paillierBitSize required for encryption -- Paillier modulus size N - -qf,--querierFile required for decryption -- Fully qualified file containing the serialized - Querier object - -qn,--queryName required for encryption -- Name of the query - -qs,--querySchemas required -- Comma separated list of query schema file names - -qt,--queryType required for encryption -- Type of the query as defined in the 'schemaName' tag - of the corresponding query schema file - -srAlg,--secureRandomAlg optional - specify the SecureRandom algorithm, defaults to NativePRNG - -srProvider,--secureRandomProvider optional - specify the SecureRandom provider, defaults to SUN - -#Define the command -QUERIER_DRIVER_CMD="java -cp ${JAR} ${QUERIER_DRIVER} -a ${ACTION} -b ${BITSET} -c ${CERTAINTY} " -echo ${QUERIER_DRIVER_CMD} - -# Define the log file -LOG_FILE="LOG_${DATE}.txt" -echo ${LOG_FILE} +#hashBitSize - required for encryption-- Bit size of keyed hash +HASHBITSIZE="" +echo ${HASHBITSIZE} + +#hashKey -- required for encryption -- String key for the keyed hash functionality +HASHKEY="" +echo ${HASHKEY} + +#useHDFSLookupTable -- required for encryption -- 'true' or 'false' +#Whether or not to generate and use a hdfs modular exponentation lookup table +#Defaults to 'false' +USEHDFSLOOKUP="" +echo ${USEHDFSLOOKUP} + +#memLookupTable -- required for encryption -- 'true' or 'false' +#Whether or not to generate and use an in memory modular exponentation lookup table - only for +#standalone/testing right now... +#Defaults to 'false' +MEMLOOKUPTABLE="" +echo ${MEMLOOKUPTABLE} + +#paillierBitSize -- required for encryption -- Paillier modulus size N +PAILLIERBITSIZE="" +echo ${PAILLIERBITSIZE} + +#queryName -- required for encryption -- Name of the query +QUERYNAME="" +echo ${QUERYNAME} + +#queryType -- required for encryption +#Type of the query as defined in the 'schemaName' tag of the corresponding query schema file +QUERYTYPE="" +echo ${QUERYTYPE} + +#secureRandomAlg -- specify the SecureRandom algorithm +#Defaults to NativePRNG +SECURERANDOMALG="" +echo ${SECURERANDOMALG} + +#secureRandomProvider -- specify the SecureRandom provider +#Defaults to SUN +SECURERANDOMPROVIDER="" +echo ${SECURERANDOMPROVIDER} + +## Optional, but required for Decryption (ignored if not decrypting) + +#querierFile -- required for decryption +#Fully qualified file containing the serialized Querier object +QUERIERFILE="" +echo ${QUERIERFILE} -#This command will execute each job and determine status. -#Results will be displayed in the log file. -{ -echo ${JOB_DISTRIBUTION_CMD} -${JOB_DISTRIBUTION_CMD} -if [ $? -ne 0 ] -then -echo "ERROR distribution JOB. SEE LOG." -${CLEANUP_CMD_DIST} -exit $? -fi -${CLEANUP_CMD_DIST} + +## +## Define the command +## +QUERIER_DRIVER_CMD="java -cp ${JAR} ${QUERIER_DRIVER} -a ${ACTION} -ds ${DATASCHEMAS} \ +-i ${INPUTFILE} -nt ${NUMTHREADS} -o ${OUTPUTFILE} -qs ${QUERYSCHEMAS}" -#echo ${JOB_DOMAINALERT_CMD} -#${JOB_DOMAINALERT_CMD} -#if [ $? -ne 0 ] -#then -#echo "ERROR bfDomainAlert JOB. SEE LOG." -#${CLEANUP_CMD_DOMAINALERT} -#exit $? -#fi +# Add the optional args -#echo ${JOB_DOMAINALERT_DIFF_CMD} -#${JOB_DOMAINALERT_DIFF_CMD} -#if [ $? -ne 0 ] -#then -#echo "ERROR bfDomainAlert diff JOB. SEE LOG." -#exit $? -#fi +if [ -n "${BITSET}" ]; then + QUERIER_DRIVER_CMD+=" -b ${BITSET}" +fi +if [ -n "${CERTAINTY}" ]; then + QUERIER_DRIVER_CMD+=" -c ${CERTAINTY}" +fi +if [ -n "${DATAPARTITIONBITSIZE}" ]; then + QUERIER_DRIVER_CMD+=" -dps ${DATAPARTITIONBITSIZE}" +fi +if [ -n "${EMBEDSELECTOR}" ]; then + QUERIER_DRIVER_CMD+=" -embed ${EMBEDSELECTOR}" +fi +if [ -n "${EMBEDQUERYSCHEMA}" ]; then + QUERIER_DRIVER_CMD+=" -embedQS ${EMBEDQUERYSCHEMA}" +fi +if [ -n "${HASHBITSIZE}" ]; then + QUERIER_DRIVER_CMD+=" -hb ${HASHBITSIZE}" +fi +if [ -n "${HASHKEY}" ]; then + QUERIER_DRIVER_CMD+=" -hk ${HASHKEY}" +fi +if [ -n "${USEHDFSLOOKUP}" ]; then + QUERIER_DRIVER_CMD+=" -lu ${USEHDFSLOOKUP}" +fi +if [ -n "${MEMLOOKUPTABLE}" ]; then + QUERIER_DRIVER_CMD+=" -mlu ${MEMLOOKUPTABLE}" +fi +if [ -n "${PAILLIERBITSIZE}" ]; then + QUERIER_DRIVER_CMD+=" -pbs ${PAILLIERBITSIZE}" +fi +if [ -n "${QUERYNAME}" ]; then + QUERIER_DRIVER_CMD+=" -qn ${QUERYNAME}" +fi +if [ -n "${QUERYTYPE}" ]; then + QUERIER_DRIVER_CMD+=" -qt ${QUERYTYPE}" +fi +if [ -n "${SECURERANDOMALG}" ]; then + QUERIER_DRIVER_CMD+=" -srAlg ${SECURERANDOMALG}" +fi +if [ -n "${SECURERANDOMPROVIDER}" ]; then + QUERIER_DRIVER_CMD+=" -srProvider ${SECURERANDOMPROVIDER}" +fi +if [ -n "${QUERIERFILE}" ]; then + QUERIER_DRIVER_CMD+=" -qf ${QUERIERFILE}" +fi +echo ${QUERIER_DRIVER_CMD} -echo ${JOB_MXALERT_CMD} -${JOB_MXALERT_CMD} -if [ $? -ne 0 ] -then -echo "ERROR mxAlert JOB. SEE LOG." -${CLEANUP_CMD_MXALERT} -fi -echo ${JOB_MXALERT_DIFF_CMD} -${JOB_MXALERT_DIFF_CMD} -if [ $? -ne 0 ] -then -echo "ERROR mxDomainAlert diff JOB. SEE LOG." -fi -echo ${JOB_DOMAINDORKING_CMD} -${JOB_DOMAINDORKING_CMD} -${CLEANUP_CMD_DOMAINDORKING} -if [ $? -ne 0 ] -then -echo "ERROR domainDorking JOB. SEE LOG." -${CLEANUP_CMD_DOMAINDORKING} -fi +## +## Execute the QuerierDriver +## Results will be displayed in the log file. +## -echo ${JOB_VPS_CMD} -${JOB_VPS_CMD} -${CLEANUP_CMD_VPS} -if [ $? -ne 0 ] -then -echo "ERROR VPS detection JOB. SEE LOG." -${CLEANUP_CMD_VPS} -fi +LOG_FILE="LOG_QUERIER.txt" +echo ${LOG_FILE} -echo ${JOB_MMG_CMD} -${JOB_MMG_CMD} -${CLEANUP_CMD_MMG} +{ +echo ${QUERIER_DRIVER_CMD} +${QUERIER_DRIVER_CMD} if [ $? -ne 0 ] then -echo "ERROR MMG profiling JOB. SEE LOG." -${CLEANUP_CMD_MMG} -fi - +echo "ERROR QuerierDriver. SEE LOG." exit 0 +fi } &> ${LOG_FILE} diff --git a/bin/run-responder.sh b/bin/run-responder.sh old mode 100644 new mode 100755 index e69de29b..da16125b --- a/bin/run-responder.sh +++ b/bin/run-responder.sh @@ -0,0 +1,268 @@ +#!/bin/bash +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +############################################################################### + +#This script runs the ResponderDriver + +#Jar file - full path to jar +JAR="" +echo ${JAR} + +#ResponderDriver class +RESPONDER_DRIVER="org.apache.pirk.responder.wideskies.ResponderDriver" +echo ${RESPONDER_DRIVER} + +## +## CLI Options +## + +## +## Required Args +## + +#dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format +DATAINPUTFORMAT="" +echo ${DATAINPUTFORMAT} + +#dataSchemas -- required -- Comma separated list of data schema file names +DATASCHEMAS="" +echo ${DATASCHEMAS} + +#inputData -- required +#Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base' +INPUTDATA="" +echo ${INPUTDATA} + +#outputFile -- required -- Fully qualified name of output file in hdfs +OUTPUTFILE="" +echo ${OUTPUTFILE} + +#platform -- required -- 'mapreduce', 'spark', or 'standalone' +#Processing platform technology for the responder +PLATFORM="" +echo ${PLATFORM} + +#queryInput -- required -- Fully qualified dir in hdfs of Query files +QUERYINPUT="" +echo ${QUERYINPUT} + +#querySchemas -- required -- Comma separated list of query schema file names +QUERYSCHEMAS="" +echo ${QUERYSCHEMAS} + +## +## Optional Args - Leave empty if not using/not changing default values +## + +#allowAdHocQuerySchemas -- 'true' or 'false' +#If true, allows embedded QuerySchemas for a query. +#Defaults to 'false' +ALLOWADHOCQUERYSCHEMAS="" +echo ${ALLOWADHOCQUERYSCHEMAS} + +#colMultReduceByKey -- 'true' or 'false' -- Spark only +#If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce +#Defaults to 'false' +COLMULTREDUCEBYKEY="" +echo ${COLMULTREDUCEBYKEY} + +#baseInputFormat -- required if baseInputFormat = 'base' +#Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat +BASEINPUTFORMAT="" +echo ${BASEINPUTFORMAT} + +#esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query +#if using 'elasticsearch' input format +ESQUERY="" +echo ${ESQUERY} + +#esResource -- required if baseInputFormat = 'elasticsearch' +#Requires the format / : Elasticsearch resource where data is read and written to +ESRESOURCE="" +echo ${ESRESOURCE} + +#useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the +#hdfs lookup table for modular exponentiation +#Defaults to 'false' +HDFSEXP="" +echo ${HDFSEXP} + +#baseQuery -- ElasticSearch-like query if using 'base' input format - +#used to filter records in the RecordReader +#Defaults to ?q=* +BASEQUERY="" +echo ${BASEQUERY} + +#limitHitsPerSelector -- 'true' or 'false' +#Whether or not to limit the number of hits per selector +#Defaults to 'true' +LIMITHITSPERSELECTOR="" +echo ${LIMITHITSPERSELECTOR} + +#mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task +#Defaults to -Xmx2800m +MRMAPJAVAOPTS="" +echo ${MRMAPJAVAOPTS} + +#mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task +#Defaults to 3000 +MRMAPMEMORYMB="" +echo ${MRMAPMEMORYMB} + +#mapreduceReduceJavaOpts +#Amount of heap (in MB) to allocate per reduce task +#Defaults to -Xmx2800m +MRREDUCEJAVAOPTS="" +echo ${MRREDUCEJAVAOPTS} + +#mapreduceReduceMemoryMb +#Amount of memory (in MB) to allocate per reduce task +#Defaults to 3000 +MRREDUCEMEMORYMB="" +echo ${MRREDUCEMEMORYMB} + +#stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs +#containing stoplist terms; used by the StopListFilter +STOPLISTFILE="" +echo ${STOPLISTFILE} + +#useLocalCache -- 'true' or 'false' +#Whether or not to use the local cache for modular exponentiation +#Defaults to 'true' +USELOCALCACHE="" +echo ${USELOCALCACHE} + +#useModExpJoin -- 'true' or 'false' -- Spark only +#Whether or not to pre-compute the modular exponentiation table and join it to the data +#partitions when performing the encrypted row calculations +#Defaults to 'false' +USEMODEXPJOIN="" +echo ${USEMODEXPJOIN} + +#numReduceTasks -- optional -- Number of reduce tasks +NUMREDUCETASKS="" +echo ${NUMREDUCETASKS} + +#numColMultPartitions -- optional, Spark only +#Number of partitions to use when performing column multiplication +NUMCOLMULTPARTS="" +echo ${NUMCOLMULTPARTS} + +#maxHitsPerSelector -- optional -- Max number of hits encrypted per selector +MAXHITSPERSELECTOR="" +echo ${MAXHITSPERSELECTOR} + +#dataParts -- optional -- Number of partitions for the input data +DATAPARTS="" +echo ${DATAPARTS} + +#numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table +EXPPARTS="" +echo ${EXPPARTS} + +## +## Define the command +## + +RESPONDER_DRIVER_CMD="java -cp ${JAR} ${RESPONDER_DRIVER} -d ${DATAINPUTFORMAT} \ + -ds ${DATASCHEMAS} -i ${INPUTDATA} -o ${OUTPUTFILE} -p ${PLATFORM} \ + -q ${QUERYINPUT} -qs ${QUERYSCHEMAS}" + +# Add the optional args + +if [ -n "${NUMREDUCETASKS}" ]; then + RESPONDER_DRIVER_CMD+=" -nr ${NUMREDUCETASKS}" +fi +if [ -n "${NUMCOLMULTPARTS}" ]; then + RESPONDER_DRIVER_CMD+=" -numColMultParts ${NUMCOLMULTPARTS}" +fi +if [ -n "${MAXHITSPERSELECTOR}" ]; then + RESPONDER_DRIVER_CMD+=" -mh ${MAXHITSPERSELECTOR}" +fi +if [ -n "${DATAPARTS}" ]; then + RESPONDER_DRIVER_CMD+=" -dataParts ${DATAPARTS}" +fi +if [ -n "${EXPPARTS}" ]; then + RESPONDER_DRIVER_CMD+=" -expParts ${EXPPARTS}" +fi +if [ -n "${ESQUERY}" ]; then + RESPONDER_DRIVER_CMD+=" -eq ${ESQUERY}" +fi +if [ -n "${ESRESOURCE}" ]; then + RESPONDER_DRIVER_CMD+=" -er ${ESRESOURCE}" +fi +if [ -n "${HDFSEXP}" ]; then + RESPONDER_DRIVER_CMD+=" -hdfsExp ${HDFSEXP}" +fi +if [ -n "${BASEQUERY}" ]; then + RESPONDER_DRIVER_CMD+=" -j ${BASEQUERY}" +fi +if [ -n "${LIMITHITSPERSELECTOR}" ]; then + RESPONDER_DRIVER_CMD+=" -lh ${LIMITHITSPERSELECTOR}" +fi +if [ -n "${MRMAPJAVAOPTS}" ]; then + RESPONDER_DRIVER_CMD+=" -mjo ${MRMAPJAVAOPTS}" +fi +if [ -n "${MRMAPMEMORYMB}" ]; then + RESPONDER_DRIVER_CMD+=" -mm ${MRMAPMEMORYMB}" +fi +if [ -n "${MRREDUCEJAVAOPTS}" ]; then + RESPONDER_DRIVER_CMD+=" -rjo ${MRREDUCEJAVAOPTS}" +fi +if [ -n "${MRREDUCEMEMORYMB}" ]; then + RESPONDER_DRIVER_CMD+=" -rm ${MRREDUCEMEMORYMB}" +fi +if [ -n "${STOPLISTFILE}" ]; then + RESPONDER_DRIVER_CMD+=" -sf ${STOPLISTFILE}" +fi +if [ -n "${USELOCALCACHE}" ]; then + RESPONDER_DRIVER_CMD+=" -ulc ${USELOCALCACHE}" +fi +if [ -n "${USEMODEXPJOIN}" ]; then + RESPONDER_DRIVER_CMD+=" -useModExpJoin ${USEMODEXPJOIN}" +fi +if [ -n "${ALLOWADHOCQUERYSCHEMAS}" ]; then + RESPONDER_DRIVER_CMD+=" -allowEmbeddedQS ${ALLOWADHOCQUERYSCHEMAS}" +fi +if [ -n "${COLMULTREDUCEBYKEY}" ]; then + RESPONDER_DRIVER_CMD+=" -colMultRBK ${COLMULTREDUCEBYKEY}" +fi +if [ -n "${BASEINPUTFORMAT}" ]; then + RESPONDER_DRIVER_CMD+=" -bif ${BASEINPUTFORMAT}" +fi +echo ${RESPONDER_DRIVER_CMD} + + +## +## Execute the ResponderDriver +## Results will be displayed in the log file. +## + +LOG_FILE="LOG_RESPONDER.txt" +echo ${LOG_FILE} + +{ +echo ${RESPONDER_DRIVER_CMD} +${RESPONDER_DRIVER_CMD} +if [ $? -ne 0 ] +then +echo "ERROR ResponderDriver. SEE LOG." +exit 0 +fi +} &> ${LOG_FILE} diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java index efa05332..9efced07 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java @@ -73,6 +73,7 @@ public class QuerierDriver implements Serializable { private static final long serialVersionUID = 1L; + private static final Logger logger = LoggerFactory.getLogger(QuerierDriver.class); public static void main(String... args) throws IOException, InterruptedException, PIRException diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java index 193617ae..371f68ca 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java @@ -313,6 +313,7 @@ private boolean parseOptions() // Load the new local query and data schemas logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " + SystemConfiguration.getProperty("query.schemas")); + try { LoadDataSchemas.initialize(); diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java index 61dbb230..d10f2e7f 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java @@ -27,6 +27,8 @@ import org.apache.pirk.responder.wideskies.standalone.Responder; import org.apache.pirk.serialization.LocalFileSystemStore; import org.apache.pirk.utils.SystemConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Driver class for the responder @@ -42,20 +44,22 @@ */ public class ResponderDriver { + private static final Logger logger = LoggerFactory.getLogger(ResponderDriver.class); + public static void main(String[] args) throws Exception { ResponderCLI responderCLI = new ResponderCLI(args); if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("mapreduce")) { - System.out.println("Launching MapReduce ResponderTool:"); + logger.info("Launching MapReduce ResponderTool:"); ComputeResponseTool pirWLTool = new ComputeResponseTool(); ToolRunner.run(pirWLTool, new String[] {}); } else if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("spark")) { - System.out.println("Launching Spark ComputeResponse:"); + logger.info("Launching Spark ComputeResponse:"); FileSystem fs = FileSystem.get(new Configuration()); ComputeResponse computeResponse = new ComputeResponse(fs); @@ -63,7 +67,7 @@ else if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("spark")) } else if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("standalone")) { - System.out.println("Launching Standalone Responder:"); + logger.info("Launching Standalone Responder:"); String queryInput = SystemConfiguration.getProperty("pir.queryInput"); Query query = new LocalFileSystemStore().recall(queryInput, Query.class); diff --git a/src/main/resources/pirk.properties b/src/main/resources/pirk.properties index d6ea68d2..d42f16f1 100755 --- a/src/main/resources/pirk.properties +++ b/src/main/resources/pirk.properties @@ -22,7 +22,7 @@ ## # Name of log4j properties file (relative to current folder) -log4jPropertiesFile=logging/log4j2.properties +log4jPropertiesFile=log4j2.properties #Name of the local properties file - used when running with the #hadoop jar command From 97f859ec074a53357386e2da5bb20e41e229f3dd Mon Sep 17 00:00:00 2001 From: eawilliams Date: Tue, 26 Jul 2016 15:47:28 -0400 Subject: [PATCH 03/10] intermediate changes to Querier properties --- bin/run-querier.sh | 2 - .../pirk/querier/wideskies/QuerierDriver.java | 36 +- .../querier/wideskies/QuerierDriverCLI.java | 320 ++++-------------- .../pirk/querier/wideskies/QuerierProps.java | 182 ++++++++++ .../pirk/utils/SystemConfiguration.java | 133 +++++--- src/main/resources/pirk.properties | 6 +- src/main/resources/querier.properties | 120 +++++++ src/main/resources/responder.properties | 0 8 files changed, 491 insertions(+), 308 deletions(-) create mode 100644 src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java create mode 100644 src/main/resources/querier.properties create mode 100644 src/main/resources/responder.properties diff --git a/bin/run-querier.sh b/bin/run-querier.sh index 57cc5b51..f2d4c708 100755 --- a/bin/run-querier.sh +++ b/bin/run-querier.sh @@ -215,8 +215,6 @@ fi echo ${QUERIER_DRIVER_CMD} - - ## ## Execute the QuerierDriver ## Results will be displayed in the log file. diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java index 9efced07..a6d259d5 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java @@ -105,26 +105,26 @@ public static void main(String... args) throws IOException, InterruptedException QuerierDriverCLI qdriverCLI = new QuerierDriverCLI(args); // Set the variables - action = qdriverCLI.getOptionValue(QuerierDriverCLI.ACTION); - inputFile = qdriverCLI.getOptionValue(QuerierDriverCLI.INPUTFILE); - outputFile = qdriverCLI.getOptionValue(QuerierDriverCLI.OUTPUTFILE); - numThreads = Integer.parseInt(qdriverCLI.getOptionValue(QuerierDriverCLI.NUMTHREADS)); + action = SystemConfiguration.getProperty(QuerierProps.ACTION); + inputFile = SystemConfiguration.getProperty(QuerierProps.INPUTFILE); + outputFile = SystemConfiguration.getProperty(QuerierProps.OUTPUTFILE); + numThreads = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.NUMTHREADS)); if (action.equals("encrypt")) { - queryType = qdriverCLI.getOptionValue(QuerierDriverCLI.TYPE); - queryName = qdriverCLI.getOptionValue(QuerierDriverCLI.QUERYNAME); - hashBitSize = Integer.parseInt(qdriverCLI.getOptionValue(QuerierDriverCLI.HASHBITSIZE)); - hashKey = qdriverCLI.getOptionValue(QuerierDriverCLI.HASHBITSIZE); - dataPartitionBitSize = Integer.parseInt(qdriverCLI.getOptionValue(QuerierDriverCLI.DATAPARTITIONSIZE)); - paillierBitSize = Integer.parseInt(qdriverCLI.getOptionValue(QuerierDriverCLI.PAILLIERBITSIZE)); - certainty = Integer.parseInt(qdriverCLI.getOptionValue(QuerierDriverCLI.CERTAINTY)); - embedSelector = SystemConfiguration.getProperty(QuerierDriverCLI.EMBEDSELECTOR, "true").equals("true"); - useMemLookupTable = SystemConfiguration.getProperty(QuerierDriverCLI.USEMEMLOOKUPTABLE, "false").equals("true"); - useHDFSLookupTable = SystemConfiguration.getProperty(QuerierDriverCLI.USEHDFSLOOKUPTABLE, "false").equals("true"); - - if (qdriverCLI.hasOption(QuerierDriverCLI.BITSET)) + queryType = SystemConfiguration.getProperty(QuerierProps.TYPE); + queryName = SystemConfiguration.getProperty(QuerierProps.QUERYNAME); + hashBitSize = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.HASHBITSIZE)); + hashKey = SystemConfiguration.getProperty(QuerierProps.HASHBITSIZE); + dataPartitionBitSize = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.DATAPARTITIONSIZE)); + paillierBitSize = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.PAILLIERBITSIZE)); + certainty = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.CERTAINTY)); + embedSelector = SystemConfiguration.getProperty(QuerierProps.EMBEDSELECTOR, "true").equals("true"); + useMemLookupTable = SystemConfiguration.getProperty(QuerierProps.USEMEMLOOKUPTABLE, "false").equals("true"); + useHDFSLookupTable = SystemConfiguration.getProperty(QuerierProps.USEHDFSLOOKUPTABLE, "false").equals("true"); + + if (SystemConfiguration.hasProperty(QuerierProps.BITSET)) { - bitSet = Integer.parseInt(qdriverCLI.getOptionValue(QuerierDriverCLI.BITSET)); + bitSet = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.BITSET)); logger.info("bitSet = " + bitSet); } @@ -144,7 +144,7 @@ public static void main(String... args) throws IOException, InterruptedException } if (action.equals("decrypt")) { - querierFile = qdriverCLI.getOptionValue(QuerierDriverCLI.QUERIERFILE); + querierFile = SystemConfiguration.getProperty(QuerierProps.QUERIERFILE); } // Perform the action diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java index 371f68ca..59bd73f2 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java @@ -40,32 +40,7 @@ public class QuerierDriverCLI private Options cliOptions = null; private CommandLine commandLine = null; - // General variables - public static String ACTION = "action"; - public static String INPUTFILE = "inputFile"; - public static String OUTPUTFILE = "outputFile"; - public static String TYPE = "queryType"; - public static String NUMTHREADS = "numThreads"; - public static String EMBEDQUERYSCHEMA = "embedQuerySchema"; - - // Encryption variables - public static String HASHBITSIZE = "hashBitSize"; - public static String HASHKEY = "hashKey"; - public static String DATAPARTITIONSIZE = "dataPartitionBitSize"; - public static String PAILLIERBITSIZE = "paillierBitSize"; - public static String BITSET = "bitSet"; - public static String CERTAINTY = "certainty"; - public static String QUERYNAME = "queryName"; - public static String QUERYSCHEMAS = "querySchemas"; - public static String DATASCHEMAS = "dataSchemas"; - public static String EMBEDSELECTOR = "embedSelector"; - public static String USEMEMLOOKUPTABLE = "memLookupTable"; - public static String USEHDFSLOOKUPTABLE = "useHDFSLookupTable"; - public static String SR_ALGORITHM = "secureRandomAlg"; - public static String SR_PROVIDER = "secureRandomProvider"; - - // Decryption variables - public static String QUERIERFILE = "querierFile"; + private static final String LOCALPROPFILE = "local.querier.properties"; /** * Create and parse allowable options @@ -134,194 +109,42 @@ public String getOptionValue(String option) */ private boolean parseOptions() { - boolean valid = true; + boolean valid = true; - // Parse general required options - if (!hasOption(ACTION)) + //If we have a local.querier.properties file specified, load it + if(hasOption(LOCALPROPFILE)) { - logger.info("Must have the option " + ACTION); - return false; + SystemConfiguration.loadPropsFromFile(getOptionValue(LOCALPROPFILE)); } - String action = getOptionValue(ACTION).toLowerCase(); - if (!action.equals("encrypt") && !action.equals("decrypt")) + else { - logger.info("Unsupported action: " + action); - } - SystemConfiguration.setProperty(ACTION, action); - - if (!hasOption(INPUTFILE)) - { - logger.info("Must have the option " + INPUTFILE); - return false; - } - SystemConfiguration.setProperty(INPUTFILE, getOptionValue(INPUTFILE)); - - if (!hasOption(OUTPUTFILE)) - { - logger.info("Must have the option " + OUTPUTFILE); - return false; - } - SystemConfiguration.setProperty(OUTPUTFILE, getOptionValue(OUTPUTFILE)); - - if (!hasOption(NUMTHREADS)) - { - logger.info("Must have the option " + NUMTHREADS); - return false; - } - SystemConfiguration.setProperty(NUMTHREADS, getOptionValue(NUMTHREADS)); - - if (!hasOption(QUERYSCHEMAS)) - { - logger.info("Must have the option " + QUERYSCHEMAS); - return false; - } - SystemConfiguration.setProperty("query.schemas", getOptionValue(QUERYSCHEMAS)); - - if (!hasOption(DATASCHEMAS)) - { - logger.info("Must have the option " + DATASCHEMAS); - return false; - } - SystemConfiguration.setProperty("data.schemas", getOptionValue(DATASCHEMAS)); - - // Parse general optional args - if (hasOption(EMBEDQUERYSCHEMA)) - { - SystemConfiguration.setProperty("pir.embedQuerySchema", getOptionValue(EMBEDQUERYSCHEMA)); - } - else - { - SystemConfiguration.setProperty("pir.embedQuerySchema", "true"); - } - - // Parse encryption args - if (action.equals("encrypt")) - { - if (!hasOption(TYPE)) - { - logger.info("Must have the option " + TYPE); - return false; - } - SystemConfiguration.setProperty(TYPE, getOptionValue(TYPE)); - - if (!hasOption(HASHBITSIZE)) - { - logger.info("Must have the option " + HASHBITSIZE); - return false; - } - SystemConfiguration.setProperty(HASHBITSIZE, getOptionValue(HASHBITSIZE)); - - if (!hasOption(HASHKEY)) - { - logger.info("Must have the option " + HASHKEY); - return false; - } - SystemConfiguration.setProperty(HASHKEY, getOptionValue(HASHKEY)); - - if (!hasOption(DATAPARTITIONSIZE)) - { - logger.info("Must have the option " + DATAPARTITIONSIZE); - return false; - } - SystemConfiguration.setProperty(DATAPARTITIONSIZE, getOptionValue(DATAPARTITIONSIZE)); - - if (!hasOption(PAILLIERBITSIZE)) - { - logger.info("Must have the option " + PAILLIERBITSIZE); - return false; - } - SystemConfiguration.setProperty(PAILLIERBITSIZE, getOptionValue(PAILLIERBITSIZE)); - - if (!hasOption(CERTAINTY)) - { - logger.info("Must have the option " + CERTAINTY); - return false; - } - SystemConfiguration.setProperty(CERTAINTY, getOptionValue(CERTAINTY)); - - if (!hasOption(QUERYNAME)) + //Pull options, set as properties + for(String prop: QuerierProps.PROPSLIST) { - logger.info("Must have the option " + QUERYNAME); - return false; - } - SystemConfiguration.setProperty(QUERYNAME, getOptionValue(QUERYNAME)); - - if (!hasOption(BITSET)) - { - logger.info("Must have the option " + BITSET); - return false; - } - SystemConfiguration.setProperty(BITSET, getOptionValue(BITSET)); - - if (!hasOption(EMBEDSELECTOR)) - { - SystemConfiguration.setProperty(EMBEDSELECTOR, "true"); - } - else - { - SystemConfiguration.setProperty(EMBEDSELECTOR, getOptionValue(EMBEDSELECTOR)); - } - - if (!hasOption(USEMEMLOOKUPTABLE)) - { - SystemConfiguration.setProperty(USEMEMLOOKUPTABLE, "false"); - } - else - { - SystemConfiguration.setProperty(USEMEMLOOKUPTABLE, getOptionValue(USEMEMLOOKUPTABLE)); + if(hasOption(prop)) + { + SystemConfiguration.setProperty(prop, getOptionValue(prop)); + } } + } - if (!hasOption(USEHDFSLOOKUPTABLE)) - { - SystemConfiguration.setProperty(USEHDFSLOOKUPTABLE, "false"); - } - else - { - SystemConfiguration.setProperty(USEHDFSLOOKUPTABLE, getOptionValue(USEHDFSLOOKUPTABLE)); - } + //Validate properties + valid = QuerierProps.validateQuerierProperties(); - if (!hasOption(SR_ALGORITHM)) - { - SystemConfiguration.setProperty("pallier.secureRandom.algorithm", "NativePRNG"); - } - else - { - SystemConfiguration.setProperty("pallier.secureRandom.algorithm", getOptionValue(SR_ALGORITHM)); - } - - if (!hasOption(SR_PROVIDER)) - { - SystemConfiguration.setProperty("pallier.secureRandom.provider", "SUN"); - } - else + // Load the new local query and data schemas + if(valid) + { + logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " + + SystemConfiguration.getProperty("query.schemas")); + try { - SystemConfiguration.setProperty("pallier.secureRandom.provider", getOptionValue(SR_PROVIDER)); - } - } + LoadDataSchemas.initialize(); + LoadQuerySchemas.initialize(); - // Parse decryption args - if (action.equals("decrypt")) - { - if (!hasOption(QUERIERFILE)) + } catch (Exception e) { - logger.info("Must have the option " + QUERIERFILE); - return false; + e.printStackTrace(); } - SystemConfiguration.setProperty(QUERIERFILE, QUERIERFILE); - } - - // Load the new local query and data schemas - logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " - + SystemConfiguration.getProperty("query.schemas")); - - try - { - LoadDataSchemas.initialize(); - LoadQuerySchemas.initialize(); - - } catch (Exception e) - { - e.printStackTrace(); } return valid; @@ -341,165 +164,170 @@ private Options createOptions() optionHelp.setRequired(false); options.addOption(optionHelp); + // local.querier.properties + Option optionLocalPropFile = new Option("localPropFile", LOCALPROPFILE, false, "Optional local properties file"); + optionLocalPropFile.setRequired(false); + options.addOption(optionLocalPropFile); + // ACTION - Option optionACTION = new Option("a", ACTION, true, "required - 'encrypt' or 'decrypt' -- The action performed by the QuerierDriver"); + Option optionACTION = new Option("a", QuerierProps.ACTION, true, "required - 'encrypt' or 'decrypt' -- The action performed by the QuerierDriver"); optionACTION.setRequired(false); - optionACTION.setArgName(ACTION); + optionACTION.setArgName(QuerierProps.ACTION); optionACTION.setType(String.class); options.addOption(optionACTION); // INPUTFILE - Option optionINPUTFILE = new Option("i", INPUTFILE, true, "required - Fully qualified file containing input " + Option optionINPUTFILE = new Option("i", QuerierProps.INPUTFILE, true, "required - Fully qualified file containing input " + "-- \n The input is either: \n (1) For Encryption: A query file - Contains the query selectors, one per line; " + "the first line must be the query number \n OR \n (2) For Decryption: A response file - Contains the serialized Response object"); optionINPUTFILE.setRequired(false); - optionINPUTFILE.setArgName(INPUTFILE); + optionINPUTFILE.setArgName(QuerierProps.INPUTFILE); optionINPUTFILE.setType(String.class); options.addOption(optionINPUTFILE); // OUTPUTFILE - Option optionOUTPUTFILE = new Option("o", OUTPUTFILE, true, "required - Fully qualified file for the result output. " + Option optionOUTPUTFILE = new Option("o", QuerierProps.OUTPUTFILE, true, "required - Fully qualified file for the result output. " + "\n The output file specifies either: \n (1) For encryption: \n \t (a) A file to contain the serialized Querier object named: " + "-" + QuerierConst.QUERIER_FILETAG + " AND \n \t " + "(b) A file to contain the serialized Query object named: -" + QuerierConst.QUERY_FILETAG + "\n " + "OR \n (2) A file to contain the decryption results where each line is where each line " + "corresponds to one hit and is a JSON object with the schema QuerySchema"); optionOUTPUTFILE.setRequired(false); - optionOUTPUTFILE.setArgName(OUTPUTFILE); + optionOUTPUTFILE.setArgName(QuerierProps.OUTPUTFILE); optionOUTPUTFILE.setType(String.class); options.addOption(optionOUTPUTFILE); // NUMTHREADS - Option optionNUMTHREADS = new Option("nt", NUMTHREADS, true, "required -- Number of threads to use for encryption/decryption"); + Option optionNUMTHREADS = new Option("nt", QuerierProps.NUMTHREADS, true, "required -- Number of threads to use for encryption/decryption"); optionNUMTHREADS.setRequired(false); - optionNUMTHREADS.setArgName(NUMTHREADS); + optionNUMTHREADS.setArgName(QuerierProps.NUMTHREADS); optionNUMTHREADS.setType(String.class); options.addOption(optionNUMTHREADS); - // data.schemas - Option optionDataSchemas = new Option("ds", DATASCHEMAS, true, "required -- Comma separated list of data schema file names"); + // DATASCHEMAS + Option optionDataSchemas = new Option("ds", QuerierProps.DATASCHEMAS, true, "optional -- Comma separated list of data schema file names"); optionDataSchemas.setRequired(false); - optionDataSchemas.setArgName(DATASCHEMAS); + optionDataSchemas.setArgName(QuerierProps.DATASCHEMAS); optionDataSchemas.setType(String.class); options.addOption(optionDataSchemas); - // query.schemas - Option optionQuerySchemas = new Option("qs", QUERYSCHEMAS, true, "required -- Comma separated list of query schema file names"); + // QUERYSCHEMAS + Option optionQuerySchemas = new Option("qs", QuerierProps.QUERYSCHEMAS, true, "optional -- Comma separated list of query schema file names"); optionQuerySchemas.setRequired(false); - optionQuerySchemas.setArgName(QUERYSCHEMAS); + optionQuerySchemas.setArgName(QuerierProps.QUERYSCHEMAS); optionQuerySchemas.setType(String.class); options.addOption(optionQuerySchemas); // TYPE - Option optionTYPE = new Option("qt", TYPE, true, "required for encryption -- Type of the query as defined " + Option optionTYPE = new Option("qt", QuerierProps.TYPE, true, "required for encryption -- Type of the query as defined " + "in the 'schemaName' tag of the corresponding query schema file"); optionTYPE.setRequired(false); - optionTYPE.setArgName(TYPE); + optionTYPE.setArgName(QuerierProps.TYPE); optionTYPE.setType(String.class); options.addOption(optionTYPE); // NAME - Option optionNAME = new Option("qn", QUERYNAME, true, "required for encryption -- Name of the query"); + Option optionNAME = new Option("qn", QuerierProps.QUERYNAME, true, "required for encryption -- Name of the query"); optionNAME.setRequired(false); - optionNAME.setArgName(QUERYNAME); + optionNAME.setArgName(QuerierProps.QUERYNAME); optionNAME.setType(String.class); options.addOption(optionNAME); // HASHBITSIZE - Option optionHASHBITSIZE = new Option("hb", HASHBITSIZE, true, "required -- Bit size of keyed hash"); + Option optionHASHBITSIZE = new Option("hb", QuerierProps.HASHBITSIZE, true, "required -- Bit size of keyed hash"); optionHASHBITSIZE.setRequired(false); - optionHASHBITSIZE.setArgName(HASHBITSIZE); + optionHASHBITSIZE.setArgName(QuerierProps.HASHBITSIZE); optionHASHBITSIZE.setType(String.class); options.addOption(optionHASHBITSIZE); // HASHKEY - Option optionHASHKEY = new Option("hk", HASHKEY, true, "required for encryption -- String key for the keyed hash functionality"); + Option optionHASHKEY = new Option("hk", QuerierProps.HASHKEY, true, "required for encryption -- String key for the keyed hash functionality"); optionHASHKEY.setRequired(false); - optionHASHKEY.setArgName(HASHKEY); + optionHASHKEY.setArgName(QuerierProps.HASHKEY); optionHASHKEY.setType(String.class); options.addOption(optionHASHKEY); // DATAPARTITIONSIZE - Option optionDATAPARTITIONSIZE = new Option("dps", DATAPARTITIONSIZE, true, "required for encryption -- Partition bit size in data partitioning"); + Option optionDATAPARTITIONSIZE = new Option("dps", QuerierProps.DATAPARTITIONSIZE, true, "required for encryption -- Partition bit size in data partitioning"); optionDATAPARTITIONSIZE.setRequired(false); - optionDATAPARTITIONSIZE.setArgName(DATAPARTITIONSIZE); + optionDATAPARTITIONSIZE.setArgName(QuerierProps.DATAPARTITIONSIZE); optionDATAPARTITIONSIZE.setType(String.class); options.addOption(optionDATAPARTITIONSIZE); // PAILLIERBITSIZE - Option optionPAILLIERBITSIZE = new Option("pbs", PAILLIERBITSIZE, true, "required for encryption -- Paillier modulus size N"); + Option optionPAILLIERBITSIZE = new Option("pbs", QuerierProps.PAILLIERBITSIZE, true, "required for encryption -- Paillier modulus size N"); optionPAILLIERBITSIZE.setRequired(false); - optionPAILLIERBITSIZE.setArgName(PAILLIERBITSIZE); + optionPAILLIERBITSIZE.setArgName(QuerierProps.PAILLIERBITSIZE); optionPAILLIERBITSIZE.setType(String.class); options.addOption(optionPAILLIERBITSIZE); // CERTAINTY - Option optionCERTAINTY = new Option("c", CERTAINTY, true, + Option optionCERTAINTY = new Option("c", QuerierProps.CERTAINTY, true, "required for encryption -- Certainty of prime generation for Paillier -- must be greater than or " + "equal to " + SystemConfiguration.getProperty("pir.primeCertainty") + ""); optionCERTAINTY.setRequired(false); - optionCERTAINTY.setArgName(CERTAINTY); + optionCERTAINTY.setArgName(QuerierProps.CERTAINTY); optionCERTAINTY.setType(String.class); options.addOption(optionCERTAINTY); // BITSET - Option optionBITSET = new Option("b", BITSET, true, "required for encryption -- Ensure that this bit position is set in the " + Option optionBITSET = new Option("b", QuerierProps.BITSET, true, "required for encryption -- Ensure that this bit position is set in the " + "Paillier modulus (will generate Paillier moduli until finding one in which this bit is set)"); optionBITSET.setRequired(false); - optionBITSET.setArgName(BITSET); + optionBITSET.setArgName(QuerierProps.BITSET); optionBITSET.setType(String.class); options.addOption(optionBITSET); // embedSelector - Option optionEmbedSelector = new Option("embed", EMBEDSELECTOR, true, "required for encryption -- 'true' or 'false' - Whether or not to embed " + Option optionEmbedSelector = new Option("embed", QuerierProps.EMBEDSELECTOR, true, "required for encryption -- 'true' or 'false' - Whether or not to embed " + "the selector in the results to reduce false positives"); optionEmbedSelector.setRequired(false); - optionEmbedSelector.setArgName(EMBEDSELECTOR); + optionEmbedSelector.setArgName(QuerierProps.EMBEDSELECTOR); optionEmbedSelector.setType(String.class); options.addOption(optionEmbedSelector); // useMemLookupTable - Option optionUseMemLookupTable = new Option("mlu", USEMEMLOOKUPTABLE, true, + Option optionUseMemLookupTable = new Option("mlu", QuerierProps.USEMEMLOOKUPTABLE, true, "required for encryption -- 'true' or 'false' - Whether or not to generate and use " + "an in memory modular exponentation lookup table - only for standalone/testing right now..."); optionUseMemLookupTable.setRequired(false); - optionUseMemLookupTable.setArgName(USEMEMLOOKUPTABLE); + optionUseMemLookupTable.setArgName(QuerierProps.USEMEMLOOKUPTABLE); optionUseMemLookupTable.setType(String.class); options.addOption(optionUseMemLookupTable); // useHDFSLookupTable - Option optionUseHDFSLookupTable = new Option("lu", USEHDFSLOOKUPTABLE, true, + Option optionUseHDFSLookupTable = new Option("lu", QuerierProps.USEHDFSLOOKUPTABLE, true, "required for encryption -- 'true' or 'false' -- Whether or not to generate and use " + "a hdfs modular exponentation lookup table"); optionUseHDFSLookupTable.setRequired(false); - optionUseHDFSLookupTable.setArgName(USEHDFSLOOKUPTABLE); + optionUseHDFSLookupTable.setArgName(QuerierProps.USEHDFSLOOKUPTABLE); optionUseHDFSLookupTable.setType(String.class); options.addOption(optionUseHDFSLookupTable); // QUERIERFILE - Option optionQUERIERFILE = new Option("qf", QUERIERFILE, true, "required for decryption -- Fully qualified file containing the serialized Querier object"); + Option optionQUERIERFILE = new Option("qf", QuerierProps.QUERIERFILE, true, "required for decryption -- Fully qualified file containing the serialized Querier object"); optionQUERIERFILE.setRequired(false); - optionQUERIERFILE.setArgName(QUERIERFILE); + optionQUERIERFILE.setArgName(QuerierProps.QUERIERFILE); optionQUERIERFILE.setType(String.class); options.addOption(optionQUERIERFILE); // embedQuerySchema - Option optionEMBEDQUERYSCHEMA = new Option("embedQS", EMBEDQUERYSCHEMA, true, + Option optionEMBEDQUERYSCHEMA = new Option("embedQS", QuerierProps.EMBEDQUERYSCHEMA, true, "optional (defaults to false) -- Whether or not to embed the QuerySchema in the Query (via QueryInfo)"); optionEMBEDQUERYSCHEMA.setRequired(false); - optionEMBEDQUERYSCHEMA.setArgName(EMBEDQUERYSCHEMA); + optionEMBEDQUERYSCHEMA.setArgName(QuerierProps.EMBEDQUERYSCHEMA); optionEMBEDQUERYSCHEMA.setType(String.class); options.addOption(optionEMBEDQUERYSCHEMA); // SR_ALGORITHM - Option optionSR_ALGORITHM = new Option("srAlg", SR_ALGORITHM, true, "optional - specify the SecureRandom algorithm, defaults to NativePRNG"); + Option optionSR_ALGORITHM = new Option("srAlg", QuerierProps.SR_ALGORITHM, true, "optional - specify the SecureRandom algorithm, defaults to NativePRNG"); optionSR_ALGORITHM.setRequired(false); - optionSR_ALGORITHM.setArgName(SR_ALGORITHM); + optionSR_ALGORITHM.setArgName(QuerierProps.SR_ALGORITHM); optionSR_ALGORITHM.setType(String.class); options.addOption(optionSR_ALGORITHM); // SR_PROVIDERS - Option optionSR_PROVIDER = new Option("srProvider", SR_PROVIDER, true, "optional - specify the SecureRandom provider, defaults to SUN"); + Option optionSR_PROVIDER = new Option("srProvider", QuerierProps.SR_PROVIDER, true, "optional - specify the SecureRandom provider, defaults to SUN"); optionSR_PROVIDER.setRequired(false); - optionSR_PROVIDER.setArgName(SR_PROVIDER); + optionSR_PROVIDER.setArgName(QuerierProps.SR_PROVIDER); optionSR_PROVIDER.setType(String.class); options.addOption(optionSR_PROVIDER); diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java new file mode 100644 index 00000000..c10da551 --- /dev/null +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java @@ -0,0 +1,182 @@ +package org.apache.pirk.querier.wideskies; + +import java.util.Arrays; +import java.util.List; + +import org.apache.pirk.utils.SystemConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Properties constants for the Querier + */ +public class QuerierProps +{ + private static final Logger logger = LoggerFactory.getLogger(QuerierProps.class); + + //General properties + public static final String ACTION = "querier.action"; + public static final String INPUTFILE = "querier.inputFile"; + public static final String OUTPUTFILE = "querier.outputFile"; + public static final String TYPE = "querier.queryType"; + public static final String NUMTHREADS = "querier.numThreads"; + public static final String EMBEDQUERYSCHEMA = "querier.embedQuerySchema"; + + // Encryption properties + public static final String HASHBITSIZE = "querier.hashBitSize"; + public static final String HASHKEY = "querier.hashKey"; + public static final String DATAPARTITIONSIZE = "querier.dataPartitionBitSize"; + public static final String PAILLIERBITSIZE = "querier.paillierBitSize"; + public static final String BITSET = "querier.bitSet"; + public static final String CERTAINTY = "querier.certainty"; + public static final String QUERYNAME = "querier.queryName"; + public static final String QUERYSCHEMAS = "querier.querySchemas"; + public static final String DATASCHEMAS = "querier.dataSchemas"; + public static final String EMBEDSELECTOR = "querier.embedSelector"; + public static final String USEMEMLOOKUPTABLE = "querier.memLookupTable"; + public static final String USEHDFSLOOKUPTABLE = "querier.useHDFSLookupTable"; + public static final String SR_ALGORITHM = "querier.secureRandomAlg"; + public static final String SR_PROVIDER = "querier.secureRandomProvider"; + + // Decryption properties + public static final String QUERIERFILE = "querier.querierFile"; + + public static final List PROPSLIST = Arrays.asList(ACTION, INPUTFILE, OUTPUTFILE, TYPE, NUMTHREADS, + EMBEDQUERYSCHEMA, HASHBITSIZE, HASHKEY, DATAPARTITIONSIZE, PAILLIERBITSIZE, BITSET, + CERTAINTY, QUERYNAME, QUERYSCHEMAS, DATASCHEMAS, EMBEDSELECTOR, USEMEMLOOKUPTABLE, + USEHDFSLOOKUPTABLE, SR_ALGORITHM, SR_PROVIDER); + + /** + * Validates the querier properties + * + */ + public static boolean validateQuerierProperties() + { + boolean valid = true; + + // Parse general required options + if (!SystemConfiguration.hasProperty(ACTION)) + { + logger.info("Must have the option " + ACTION); + return false; + } + String action = SystemConfiguration.getProperty(ACTION).toLowerCase(); + if (!action.equals("encrypt") && !action.equals("decrypt")) + { + logger.info("Unsupported action: " + action); + } + + if (!SystemConfiguration.hasProperty(INPUTFILE)) + { + logger.info("Must have the option " + INPUTFILE); + return false; + } + + if (!SystemConfiguration.hasProperty(OUTPUTFILE)) + { + logger.info("Must have the option " + OUTPUTFILE); + return false; + } + + if (!SystemConfiguration.hasProperty(NUMTHREADS)) + { + logger.info("Must have the option " + NUMTHREADS); + return false; + } + + // Parse general optional args + if (!SystemConfiguration.hasProperty(EMBEDQUERYSCHEMA)) + { + SystemConfiguration.setProperty("pir.embedQuerySchema", "true"); + } + + // Parse encryption args + if (action.equals("encrypt")) + { + if (!SystemConfiguration.hasProperty(TYPE)) + { + logger.info("Must have the option " + TYPE); + return false; + } + + if (!SystemConfiguration.hasProperty(HASHBITSIZE)) + { + logger.info("Must have the option " + HASHBITSIZE); + return false; + } + + if (!SystemConfiguration.hasProperty(HASHKEY)) + { + logger.info("Must have the option " + HASHKEY); + return false; + } + + if (!SystemConfiguration.hasProperty(DATAPARTITIONSIZE)) + { + logger.info("Must have the option " + DATAPARTITIONSIZE); + return false; + } + + if (!SystemConfiguration.hasProperty(PAILLIERBITSIZE)) + { + logger.info("Must have the option " + PAILLIERBITSIZE); + return false; + } + + if (!SystemConfiguration.hasProperty(CERTAINTY)) + { + logger.info("Must have the option " + CERTAINTY); + return false; + } + + if (!SystemConfiguration.hasProperty(QUERYNAME)) + { + logger.info("Must have the option " + QUERYNAME); + return false; + } + + if (!SystemConfiguration.hasProperty(BITSET)) + { + logger.info("Must have the option " + BITSET); + return false; + } + + if (SystemConfiguration.hasProperty(QUERYSCHEMAS)) + { + SystemConfiguration.appendProperty("query.schemas", SystemConfiguration.getProperty(QUERYSCHEMAS)); + } + + if (SystemConfiguration.hasProperty(DATASCHEMAS)) + { + SystemConfiguration.appendProperty("data.schemas", SystemConfiguration.getProperty(DATASCHEMAS)); + } + + if (!SystemConfiguration.hasProperty(EMBEDSELECTOR)) + { + SystemConfiguration.setProperty(EMBEDSELECTOR, "true"); + } + + if (!SystemConfiguration.hasProperty(USEMEMLOOKUPTABLE)) + { + SystemConfiguration.setProperty(USEMEMLOOKUPTABLE, "false"); + } + + if (!SystemConfiguration.hasProperty(USEHDFSLOOKUPTABLE)) + { + SystemConfiguration.setProperty(USEHDFSLOOKUPTABLE, "false"); + } + } + + // Parse decryption args + if (action.equals("decrypt")) + { + if (!SystemConfiguration.hasProperty(QUERIERFILE)) + { + logger.info("Must have the option " + QUERIERFILE); + return false; + } + } + + return valid; + } +} diff --git a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java index b529e8f2..e04a9242 100755 --- a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java +++ b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java @@ -34,7 +34,9 @@ *

* 1) Load in the DEFAULT_PROPERTY_FILE, if found on the classpath. (Currently 'pirk.properties') *

- * 2) Load in any properties from LOCAL_PROPERTY_FILE + * 2) Load in any properties files in the LOCAL_PROPERTIES_DIR. The filenames must end with '.properties' + *

+ * 3) Load in properties from the QUERIER_PROPERTIES_FILE and RESPONDER_PROPERTIES_FILE * */ public class SystemConfiguration @@ -48,10 +50,11 @@ public class SystemConfiguration */ private static final String DEFAULT_PROPERTY_FILE = "pirk.properties"; - /** - * By default, if the local.pirk.properties file is found on the root of the classpath, it is loaded after pirk.properites. - */ - private static final String LOCAL_PROPERTY_FILE = "local.pirk.properties"; + private static final String LOCAL_PROPERTIES_DIR = "local.pirk.properties.dir"; + + private static final String QUERIER_PROPERTIES_FILE = "querier.properties"; + + private static final String RESPONDER_PROPERTIES_FILE = "responder.properties"; static { @@ -82,40 +85,14 @@ public class SystemConfiguration public static void initialize() { // First try to load the default properties file - try - { - InputStream stream = SystemConfiguration.class.getClassLoader().getResourceAsStream(DEFAULT_PROPERTY_FILE); - if (stream != null) - { - logger.info("Loading default properties file '" + DEFAULT_PROPERTY_FILE + "'"); - props.load(stream); - stream.close(); - } - else - { - logger.error("No default configuration file found '" + DEFAULT_PROPERTY_FILE + "'"); - } - } catch (IOException e) - { - logger.error("Problem loading default properties file '" + DEFAULT_PROPERTY_FILE + "'"); - e.printStackTrace(); - } - - // Try to load the local properties file, if one exists - File localFile = new File(getProperty(LOCAL_PROPERTY_FILE)); - if (localFile.exists()) - { - try (InputStream stream = new FileInputStream(localFile);) - { - logger.info("Loading local properties file '" + localFile.getAbsolutePath() + "'"); - props.load(stream); - stream.close(); - } catch (IOException e) - { - logger.error("Problem loading local properties file '" + localFile.getAbsolutePath() + "'"); - e.printStackTrace(); - } - } + loadPropsFromFile(DEFAULT_PROPERTY_FILE); + + // Try to load props from the querier and responder property files, if they exist + loadPropsFromFile(QUERIER_PROPERTIES_FILE); + loadPropsFromFile(RESPONDER_PROPERTIES_FILE); + + // Try to load the local properties files, if they exists + loadPropsFromDir(LOCAL_PROPERTIES_DIR); } /** @@ -151,7 +128,31 @@ public static void setProperty(String propertyName, String value) { props.setProperty(propertyName, value); } + + public static boolean hasProperty(String propertyName) + { + return props.contains(propertyName); + } + /** + * Append a property via a column separated list + *

+ * If the property does not exist, it adds it + */ + public static void appendProperty(String property, String propToAdd) + { + String value = props.getProperty(property); + if(value != null) + { + value += "," + propToAdd; + } + else + { + value = propToAdd; + } + props.setProperty(property, value); + } + /** * Reset all properties to the default values */ @@ -160,4 +161,58 @@ public static void resetProperties() clearProperties(); initialize(); } + + /** + * Loads the properties from local properties file in the specified directory + *

+ * Only files ending in '.properties' will be loaded + */ + public static void loadPropsFromDir(String dirName) + { + File dir = new File(dirName); + File[] directoryListing = dir.listFiles(); + if (directoryListing != null) + { + for (File file : directoryListing) + { + if(file.getName().endsWith(".properties")) + { + loadPropsFromFile(file); + } + } + } + } + + /** + * Loads the properties from the specified file + */ + public static void loadPropsFromFile(String fileName) + { + File file = new File(getProperty(fileName)); + loadPropsFromFile(file); + } + + /** + * Loads the properties from the specified file + */ + public static void loadPropsFromFile(File file) + { + if (file.exists()) + { + try (InputStream stream = new FileInputStream(file);) + { + logger.info("Loading properties file '" + file.getAbsolutePath() + "'"); + props.load(stream); + stream.close(); + } catch (IOException e) + { + logger.error("Problem loading properties file '" + file.getAbsolutePath() + "'"); + e.printStackTrace(); + } + } + else + { + logger.warn("Properties file does not exist: '" + file.getAbsolutePath() + "'"); + } + } } diff --git a/src/main/resources/pirk.properties b/src/main/resources/pirk.properties index d42f16f1..8f631383 100755 --- a/src/main/resources/pirk.properties +++ b/src/main/resources/pirk.properties @@ -24,9 +24,9 @@ # Name of log4j properties file (relative to current folder) log4jPropertiesFile=log4j2.properties -#Name of the local properties file - used when running with the -#hadoop jar command -local.pirk.properties=/root/local.pirk.properties +#Name of the directory holding the local properties files +#All property files must end in .properties +local.pirk.properties.dir=/root/properties ## ##Spark path for SparkLauncher diff --git a/src/main/resources/querier.properties b/src/main/resources/querier.properties new file mode 100644 index 00000000..351c2ce2 --- /dev/null +++ b/src/main/resources/querier.properties @@ -0,0 +1,120 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +############################################################################### + + +#action -- required - 'encrypt' or 'decrypt' -- The action performed by the QuerierDriver +querier.action= + +#inputFile - required - Fully qualified file containing input +#The input is either: +#(1) For Encryption: A query file - Contains the query selectors, one per line; +#the first line must be the query number +#OR +#(2) For Decryption: A response file - Contains the serialized Response object +querier.inputFile= + +#outputFile -- required - Fully qualified file for the result output. +#The output file specifies either: +#(1) For encryption: +#(a) A file to contain the serialized Querier object named: -querier +#AND +#(b) A file to contain the serialized Query object named: -query +#OR +#(2) A file to contain the decryption results where each line is where each line +#corresponds to one hit and is a JSON object with the schema QuerySchema +querier.outputFile= + +#numThreads -- required -- Number of threads to use for encryption/decryption +querier.numThreads= + +## +## Optional Args - Leave empty if not using/not changing default values +## + +#dataSchemas -- optional -- Comma separated list of data schema file names +querier.dataSchemas= + +#querySchemas -- optional -- Comma separated list of query schema file names +querier.dataSchemas= + + +## Optional, but required for Encryption (ignored if not encrypting) + +#bitset -- required for encryption -- Ensure that this bit position is set in the Paillier +#modulus (will generate Paillier moduli until finding one in which this bit is set) +querier.bitSet= + +#certainty -- required for encryption -- Certainty of prime generation for Paillier +#must be greater than or equal to 128 +querier.certainty= + +#dataPartitionBitSize -- required for encryption -- Partition bit size in data partitioning +querier.dataPartitionBitSize= + +#embedSelector - required for encryption -- 'true' or 'false' +#Whether or not to embed the selector in the results to reduce false positives +#Defaults to 'true' +querier.embedSelector= + +#embedQuerySchema - true or false +#Whether or not to embed the QuerySchema in the Query (via QueryInfo) +#Defaults to 'false' +querier.embedQuerySchema= + +#hashBitSize - required for encryption-- Bit size of keyed hash +querier.hashBitSize= + +#hashKey -- required for encryption -- String key for the keyed hash functionality +querier.hashKey= + +#useHDFSLookupTable -- required for encryption -- 'true' or 'false' +#Whether or not to generate and use a hdfs modular exponentation lookup table +#Defaults to 'false' +querier.useHDFSLookupTable= + +#memLookupTable -- required for encryption -- 'true' or 'false' +#Whether or not to generate and use an in memory modular exponentation lookup table - only for +#standalone/testing right now... +#Defaults to 'false' +querier.memLookupTable= + +#paillierBitSize -- required for encryption -- Paillier modulus size N +querier.paillierBitSize= + +#queryName -- required for encryption -- Name of the query +querier.queryName= + +#queryType -- required for encryption +#Type of the query as defined in the 'schemaName' tag of the corresponding query schema file +querier.queryType= + +#secureRandomAlg -- specify the SecureRandom algorithm +#Defaults to NativePRNG +querier.secureRandomAlg= + +#secureRandomProvider -- specify the SecureRandom provider +#Defaults to SUN +querier.secureRandomProvider= + +## Optional, but required for Decryption (ignored if not decrypting) + +#querierFile -- required for decryption +#Fully qualified file containing the serialized Querier object +querier.querierFile= + \ No newline at end of file diff --git a/src/main/resources/responder.properties b/src/main/resources/responder.properties new file mode 100644 index 00000000..e69de29b From 1bc5b5113a83bdb6c06df7e7ab36760f62b5cb21 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Tue, 26 Jul 2016 18:19:05 -0400 Subject: [PATCH 04/10] intermediate changes to Querier properties -2 --- .../pirk/querier/wideskies/QuerierDriver.java | 2 +- .../querier/wideskies/QuerierDriverCLI.java | 6 +- .../pirk/querier/wideskies/QuerierProps.java | 53 +-- .../responder/wideskies/ResponderCLI.java | 375 ++++-------------- .../responder/wideskies/ResponderDriver.java | 6 +- .../responder/wideskies/ResponderProps.java | 207 ++++++++++ .../distributed/testsuite/DistTestSuite.java | 40 +- src/main/resources/responder.properties | 138 +++++++ 8 files changed, 479 insertions(+), 348 deletions(-) create mode 100644 src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java index a6d259d5..f28976f1 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriver.java @@ -111,7 +111,7 @@ public static void main(String... args) throws IOException, InterruptedException numThreads = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.NUMTHREADS)); if (action.equals("encrypt")) { - queryType = SystemConfiguration.getProperty(QuerierProps.TYPE); + queryType = SystemConfiguration.getProperty(QuerierProps.QUERYTYPE); queryName = SystemConfiguration.getProperty(QuerierProps.QUERYNAME); hashBitSize = Integer.parseInt(SystemConfiguration.getProperty(QuerierProps.HASHBITSIZE)); hashKey = SystemConfiguration.getProperty(QuerierProps.HASHBITSIZE); diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java index 59bd73f2..4005395d 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java @@ -126,7 +126,7 @@ private boolean parseOptions() SystemConfiguration.setProperty(prop, getOptionValue(prop)); } } - } + } //Validate properties valid = QuerierProps.validateQuerierProperties(); @@ -218,10 +218,10 @@ private Options createOptions() options.addOption(optionQuerySchemas); // TYPE - Option optionTYPE = new Option("qt", QuerierProps.TYPE, true, "required for encryption -- Type of the query as defined " + Option optionTYPE = new Option("qt", QuerierProps.QUERYTYPE, true, "required for encryption -- Type of the query as defined " + "in the 'schemaName' tag of the corresponding query schema file"); optionTYPE.setRequired(false); - optionTYPE.setArgName(QuerierProps.TYPE); + optionTYPE.setArgName(QuerierProps.QUERYTYPE); optionTYPE.setType(String.class); options.addOption(optionTYPE); diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java index c10da551..c350b4d6 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java @@ -8,7 +8,7 @@ import org.slf4j.LoggerFactory; /** - * Properties constants for the Querier + * Properties constants and validation for the Querier */ public class QuerierProps { @@ -18,10 +18,9 @@ public class QuerierProps public static final String ACTION = "querier.action"; public static final String INPUTFILE = "querier.inputFile"; public static final String OUTPUTFILE = "querier.outputFile"; - public static final String TYPE = "querier.queryType"; + public static final String QUERYTYPE = "querier.queryType"; public static final String NUMTHREADS = "querier.numThreads"; - public static final String EMBEDQUERYSCHEMA = "querier.embedQuerySchema"; - + // Encryption properties public static final String HASHBITSIZE = "querier.hashBitSize"; public static final String HASHKEY = "querier.hashKey"; @@ -35,13 +34,14 @@ public class QuerierProps public static final String EMBEDSELECTOR = "querier.embedSelector"; public static final String USEMEMLOOKUPTABLE = "querier.memLookupTable"; public static final String USEHDFSLOOKUPTABLE = "querier.useHDFSLookupTable"; - public static final String SR_ALGORITHM = "querier.secureRandomAlg"; - public static final String SR_PROVIDER = "querier.secureRandomProvider"; + public static final String SR_ALGORITHM = "pallier.secureRandom.algorithm"; + public static final String SR_PROVIDER = "pallier.secureRandom.provider"; + public static final String EMBEDQUERYSCHEMA = "pir.embedQuerySchema"; // Decryption properties public static final String QUERIERFILE = "querier.querierFile"; - public static final List PROPSLIST = Arrays.asList(ACTION, INPUTFILE, OUTPUTFILE, TYPE, NUMTHREADS, + public static final List PROPSLIST = Arrays.asList(ACTION, INPUTFILE, OUTPUTFILE, QUERYTYPE, NUMTHREADS, EMBEDQUERYSCHEMA, HASHBITSIZE, HASHKEY, DATAPARTITIONSIZE, PAILLIERBITSIZE, BITSET, CERTAINTY, QUERYNAME, QUERYSCHEMAS, DATASCHEMAS, EMBEDSELECTOR, USEMEMLOOKUPTABLE, USEHDFSLOOKUPTABLE, SR_ALGORITHM, SR_PROVIDER); @@ -54,91 +54,94 @@ public static boolean validateQuerierProperties() { boolean valid = true; - // Parse general required options + // Parse general required properties + if (!SystemConfiguration.hasProperty(ACTION)) { logger.info("Must have the option " + ACTION); - return false; + valid = false; } String action = SystemConfiguration.getProperty(ACTION).toLowerCase(); if (!action.equals("encrypt") && !action.equals("decrypt")) { logger.info("Unsupported action: " + action); + valid = false; } if (!SystemConfiguration.hasProperty(INPUTFILE)) { logger.info("Must have the option " + INPUTFILE); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(OUTPUTFILE)) { logger.info("Must have the option " + OUTPUTFILE); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(NUMTHREADS)) { logger.info("Must have the option " + NUMTHREADS); - return false; + valid = false; } - // Parse general optional args + // Parse general optional properties if (!SystemConfiguration.hasProperty(EMBEDQUERYSCHEMA)) { SystemConfiguration.setProperty("pir.embedQuerySchema", "true"); } - // Parse encryption args + // Parse encryption properties + if (action.equals("encrypt")) { - if (!SystemConfiguration.hasProperty(TYPE)) + if (!SystemConfiguration.hasProperty(QUERYTYPE)) { - logger.info("Must have the option " + TYPE); - return false; + logger.info("Must have the option " + QUERYTYPE); + valid = false; } if (!SystemConfiguration.hasProperty(HASHBITSIZE)) { logger.info("Must have the option " + HASHBITSIZE); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(HASHKEY)) { logger.info("Must have the option " + HASHKEY); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(DATAPARTITIONSIZE)) { logger.info("Must have the option " + DATAPARTITIONSIZE); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(PAILLIERBITSIZE)) { logger.info("Must have the option " + PAILLIERBITSIZE); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(CERTAINTY)) { logger.info("Must have the option " + CERTAINTY); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(QUERYNAME)) { logger.info("Must have the option " + QUERYNAME); - return false; + valid = false; } if (!SystemConfiguration.hasProperty(BITSET)) { logger.info("Must have the option " + BITSET); - return false; + valid = false; } if (SystemConfiguration.hasProperty(QUERYSCHEMAS)) @@ -173,7 +176,7 @@ public static boolean validateQuerierProperties() if (!SystemConfiguration.hasProperty(QUERIERFILE)) { logger.info("Must have the option " + QUERIERFILE); - return false; + valid = false; } } diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java index e60c262c..5355afb0 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java @@ -24,7 +24,6 @@ import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; -import org.apache.pirk.inputformat.hadoop.InputFormatConst; import org.apache.pirk.schema.data.LoadDataSchemas; import org.apache.pirk.schema.query.LoadQuerySchemas; import org.apache.pirk.utils.SystemConfiguration; @@ -41,37 +40,8 @@ public class ResponderCLI private Options cliOptions = null; private CommandLine commandLine = null; - // Required args - public static String PLATFORM = "platform"; - public static String QUERYINPUT = "queryInput"; - public static String DATAINPUTFORMAT = "dataInputFormat"; - public static String INPUTDATA = "inputData"; - public static String BASEQUERY = "baseQuery"; - public static String ESRESOURCE = "esResource"; - public static String ESQUERY = "esQuery"; - public static String OUTPUTFILE = "outputFile"; - - // Optional args - public static String BASEINPUTFORMAT = "baseInputFormat"; - public static String STOPLISTFILE = "stopListFile"; - private static String NUMREDUCETASKS = "numReduceTasks"; - public static String USELOCALCACHE = "useLocalCache"; - public static String LIMITHITSPERSELECTOR = "limitHitsPerSelector"; - public static String MAXHITSPERSELECTOR = "maxHitsPerSelector"; - private static String MAPMEMORY = "mapreduceMapMemoryMb"; - private static String REDUCEMEMORY = "mapreduceReduceMemoryMb"; - private static String MAPJAVAOPTS = "mapreduceMapJavaOpts"; - private static String REDUCEJAVAOPTS = "mapreduceReduceJavaOpts"; - public static String QUERYSCHEMAS = "querySchemas"; - public static String DATASCHEMAS = "dataSchemas"; - public static String NUMEXPLOOKUPPARTS = "numExpLookupPartitions"; - private static String USEHDFSLOOKUPTABLE = "useHDFSLookupTable"; - private static String NUMDATAPARTITIONS = "numDataPartitions"; - public static String NUMCOLMULTPARTITIONS = "numColMultPartitions"; - public static String USEMODEXPJOIN = "useModExpJoin"; - public static String COLMULTREDUCEBYKEY = "colMultReduceByKey"; - public static String ALLOWEMBEDDEDQUERYSCHEMAS = "allowAdHocQuerySchemas"; - + private static final String LOCALPROPFILE = "local.responder.properties"; + /** * Create and parse allowable options * @@ -140,228 +110,41 @@ public String getOptionValue(String option) private boolean parseOptions() { boolean valid = true; - - // Parse general required options - if (!hasOption(PLATFORM)) - { - logger.info("Must have the option " + PLATFORM); - return false; - } - String platform = getOptionValue(PLATFORM).toLowerCase(); - if (!platform.equals("mapreduce") && !platform.equals("spark") && !platform.equals("standalone")) - { - logger.info("Unsupported platform: " + platform); - return false; - } - SystemConfiguration.setProperty("platform", getOptionValue(PLATFORM)); - - if (!hasOption(QUERYINPUT)) + + //If we have a local.querier.properties file specified, load it + if(hasOption(LOCALPROPFILE)) { - logger.info("Must have the option " + QUERYINPUT); - return false; + SystemConfiguration.loadPropsFromFile(getOptionValue(LOCALPROPFILE)); } - SystemConfiguration.setProperty("pir.queryInput", getOptionValue(QUERYINPUT)); - - if (!hasOption(OUTPUTFILE)) - { - logger.info("Must have the option " + OUTPUTFILE); - return false; - } - SystemConfiguration.setProperty("pir.outputFile", getOptionValue(OUTPUTFILE)); - - if (!hasOption(QUERYSCHEMAS)) - { - logger.info("Must have the option " + QUERYSCHEMAS); - return false; - } - SystemConfiguration.setProperty("query.schemas", getOptionValue(QUERYSCHEMAS)); - - if (!hasOption(DATASCHEMAS)) + else { - logger.info("Must have the option " + DATASCHEMAS); - return false; - } - SystemConfiguration.setProperty("data.schemas", getOptionValue(DATASCHEMAS)); - - if (!hasOption(DATAINPUTFORMAT)) - { - logger.info("Must have the option " + DATAINPUTFORMAT); - return false; - } - String dataInputFormat = getOptionValue(DATAINPUTFORMAT).toLowerCase(); - SystemConfiguration.setProperty("pir.dataInputFormat", dataInputFormat); - - // Parse required options by dataInputFormat - if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT)) - { - if (!hasOption(BASEINPUTFORMAT)) + //Pull options, set as properties + for(String prop: ResponderProps.PROPSLIST) { - logger.info("Must have the option " + BASEINPUTFORMAT + " if using " + InputFormatConst.BASE_FORMAT); - return false; + if(hasOption(prop)) + { + SystemConfiguration.setProperty(prop, getOptionValue(prop)); + } } - SystemConfiguration.setProperty("pir.baseInputFormat", getOptionValue(BASEINPUTFORMAT)); + } - if (!hasOption(INPUTDATA)) - { - logger.info("Must have the option " + INPUTDATA + " if using " + InputFormatConst.BASE_FORMAT); - return false; - } - SystemConfiguration.setProperty("pir.inputData", getOptionValue(INPUTDATA)); + //Validate properties + valid = ResponderProps.validateResponderProperties(); - if (hasOption(BASEQUERY)) - { - SystemConfiguration.setProperty("pir.baseQuery", getOptionValue(BASEQUERY)); - } - else - { - SystemConfiguration.setProperty("pir.baseQuery", "?q=*"); - } - } - else if (dataInputFormat.equals(InputFormatConst.ES)) + // Load the new local query and data schemas + if(valid) { - if (!hasOption(ESRESOURCE)) + logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " + + SystemConfiguration.getProperty("query.schemas")); + try { - logger.info("Must have the option " + ESRESOURCE); - return false; - } - SystemConfiguration.setProperty("pir.esResource", getOptionValue(ESRESOURCE)); + LoadDataSchemas.initialize(); + LoadQuerySchemas.initialize(); - if (!hasOption(ESQUERY)) - { - logger.info("Must have the option " + ESQUERY); - return false; - } - SystemConfiguration.setProperty("pir.esQuery", getOptionValue(ESQUERY)); - } - else if (dataInputFormat.equalsIgnoreCase("standalone")) - { - if (!hasOption(INPUTDATA)) + } catch (Exception e) { - logger.info("Must have the option " + INPUTDATA + " if using " + InputFormatConst.BASE_FORMAT); - return false; + e.printStackTrace(); } - SystemConfiguration.setProperty("pir.inputData", getOptionValue(INPUTDATA)); - } - else - { - logger.info("Unsupported inputFormat = " + dataInputFormat); - return false; - } - - // Parse optional args - if (hasOption(STOPLISTFILE)) - { - SystemConfiguration.setProperty("pir.stopListFile", getOptionValue(STOPLISTFILE)); - } - - if (hasOption(NUMREDUCETASKS)) - { - SystemConfiguration.setProperty("pir.numReduceTasks", getOptionValue(NUMREDUCETASKS)); - } - - if (hasOption(USELOCALCACHE)) - { - SystemConfiguration.setProperty("pir.useLocalCache", getOptionValue(USELOCALCACHE)); - } - - if (hasOption(LIMITHITSPERSELECTOR)) - { - SystemConfiguration.setProperty("pir.limitHitsPerSelector", getOptionValue(LIMITHITSPERSELECTOR)); - } - - if (hasOption(MAXHITSPERSELECTOR)) - { - SystemConfiguration.setProperty("pir.maxHitsPerSelector", getOptionValue(MAXHITSPERSELECTOR)); - } - - if (hasOption(MAPMEMORY)) - { - SystemConfiguration.setProperty("mapreduce.map.memory.mb", getOptionValue(MAPMEMORY)); - } - - if (hasOption(REDUCEMEMORY)) - { - SystemConfiguration.setProperty("mapreduce.reduce.memory.mb", getOptionValue(REDUCEMEMORY)); - } - - if (hasOption(MAPJAVAOPTS)) - { - SystemConfiguration.setProperty("mapreduce.map.java.opts", getOptionValue(MAPJAVAOPTS)); - } - - if (hasOption(REDUCEJAVAOPTS)) - { - SystemConfiguration.setProperty("mapreduce.reduce.java.opts", getOptionValue(REDUCEJAVAOPTS)); - } - - if (hasOption(NUMEXPLOOKUPPARTS)) - { - SystemConfiguration.setProperty("pir.numExpLookupPartitions", getOptionValue(NUMEXPLOOKUPPARTS)); - } - - if (hasOption(USEHDFSLOOKUPTABLE)) - { - SystemConfiguration.setProperty("pir.useHDFSLookupTable", getOptionValue(USEHDFSLOOKUPTABLE)); - } - else - { - SystemConfiguration.setProperty("pir.useHDFSLookupTable", "false"); - } - - if (hasOption(USEMODEXPJOIN)) - { - SystemConfiguration.setProperty("pir.useModExpJoin", getOptionValue(USEMODEXPJOIN)); - } - else - { - SystemConfiguration.setProperty("pir.useModExpJoin", "false"); - } - - if (hasOption(NUMDATAPARTITIONS)) - { - SystemConfiguration.setProperty("pir.numDataPartitions", getOptionValue(NUMDATAPARTITIONS)); - } - else - { - SystemConfiguration.setProperty("pir.numDataPartitions", "1000"); - } - - if (hasOption(NUMCOLMULTPARTITIONS)) - { - SystemConfiguration.setProperty("pir.numColMultPartitions", getOptionValue(NUMCOLMULTPARTITIONS)); - } - else - { - SystemConfiguration.setProperty("pir.numColMultPartitions", "1000"); - } - - if (hasOption(COLMULTREDUCEBYKEY)) - { - SystemConfiguration.setProperty("pir.colMultReduceByKey", getOptionValue(COLMULTREDUCEBYKEY)); - } - else - { - SystemConfiguration.setProperty("pir.colMultReduceByKey", "false"); - } - - if (hasOption(ALLOWEMBEDDEDQUERYSCHEMAS)) - { - SystemConfiguration.setProperty("pir.allowEmbeddedQuerySchemas", getOptionValue(ALLOWEMBEDDEDQUERYSCHEMAS)); - } - else - { - SystemConfiguration.setProperty("pir.allowEmbeddedQuerySchemas", "false"); - } - - // Load the new local query and data schemas - try - { - LoadDataSchemas.initialize(); - LoadQuerySchemas.initialize(); - - } catch (Exception e) - { - e.printStackTrace(); } return valid; @@ -382,204 +165,204 @@ private Options createOptions() options.addOption(optionHelp); // platform - Option optionPlatform = new Option("p", PLATFORM, true, + Option optionPlatform = new Option("p", ResponderProps.PLATFORM, true, "required -- 'mapreduce', 'spark', or 'standalone' : Processing platform technology for the responder"); optionPlatform.setRequired(false); - optionPlatform.setArgName(PLATFORM); + optionPlatform.setArgName(ResponderProps.PLATFORM); optionPlatform.setType(String.class); options.addOption(optionPlatform); // queryInput - Option optionQueryInput = new Option("q", QUERYINPUT, true, "required -- Fully qualified dir in hdfs of Query files"); + Option optionQueryInput = new Option("q", ResponderProps.QUERYINPUT, true, "required -- Fully qualified dir in hdfs of Query files"); optionQueryInput.setRequired(false); - optionQueryInput.setArgName(QUERYINPUT); + optionQueryInput.setArgName(ResponderProps.QUERYINPUT); optionQueryInput.setType(String.class); options.addOption(optionQueryInput); // dataInputFormat - Option optionDataInputFormat = new Option("d", DATAINPUTFORMAT, true, "required -- 'base', 'elasticsearch', or 'standalone' : Specify the input format"); + Option optionDataInputFormat = new Option("d", ResponderProps.DATAINPUTFORMAT, true, "required -- 'base', 'elasticsearch', or 'standalone' : Specify the input format"); optionDataInputFormat.setRequired(false); - optionDataInputFormat.setArgName(DATAINPUTFORMAT); + optionDataInputFormat.setArgName(ResponderProps.DATAINPUTFORMAT); optionDataInputFormat.setType(String.class); options.addOption(optionDataInputFormat); // inputData - Option optionInputData = new Option("i", INPUTDATA, true, "required -- Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base'"); + Option optionInputData = new Option("i", ResponderProps.INPUTDATA, true, "required -- Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base'"); optionInputData.setRequired(false); - optionInputData.setArgName(INPUTDATA); + optionInputData.setArgName(ResponderProps.INPUTDATA); optionInputData.setType(String.class); options.addOption(optionInputData); // baseInputFormat - Option optionBaseInputFormat = new Option("bif", BASEINPUTFORMAT, true, + Option optionBaseInputFormat = new Option("bif", ResponderProps.BASEINPUTFORMAT, true, "required if baseInputFormat = 'base' -- Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat"); optionBaseInputFormat.setRequired(false); - optionBaseInputFormat.setArgName(BASEINPUTFORMAT); + optionBaseInputFormat.setArgName(ResponderProps.BASEINPUTFORMAT); optionBaseInputFormat.setType(String.class); options.addOption(optionBaseInputFormat); // baseQuery - Option optionBaseQuery = new Option("j", BASEQUERY, true, + Option optionBaseQuery = new Option("j", ResponderProps.BASEQUERY, true, "optional -- ElasticSearch-like query if using 'base' input format - used to filter records in the RecordReader"); optionBaseQuery.setRequired(false); - optionBaseQuery.setArgName(BASEQUERY); + optionBaseQuery.setArgName(ResponderProps.BASEQUERY); optionBaseQuery.setType(String.class); options.addOption(optionBaseQuery); // esResource - Option optionEsResource = new Option("er", ESRESOURCE, true, + Option optionEsResource = new Option("er", ResponderProps.ESRESOURCE, true, "required if baseInputFormat = 'elasticsearch' -- Requires the format / : Elasticsearch resource where data is read and written to"); optionEsResource.setRequired(false); - optionEsResource.setArgName(ESRESOURCE); + optionEsResource.setArgName(ResponderProps.ESRESOURCE); optionEsResource.setType(String.class); options.addOption(optionEsResource); // esQuery - Option optionEsQuery = new Option("eq", ESQUERY, true, + Option optionEsQuery = new Option("eq", ResponderProps.ESQUERY, true, "required if baseInputFormat = 'elasticsearch' -- ElasticSearch query if using 'elasticsearch' input format"); optionEsQuery.setRequired(false); - optionEsQuery.setArgName(ESQUERY); + optionEsQuery.setArgName(ResponderProps.ESQUERY); optionEsQuery.setType(String.class); options.addOption(optionEsQuery); // outputFile - Option optionOutputFile = new Option("o", OUTPUTFILE, true, "required -- Fully qualified name of output file in hdfs"); + Option optionOutputFile = new Option("o", ResponderProps.OUTPUTFILE, true, "required -- Fully qualified name of output file in hdfs"); optionOutputFile.setRequired(false); - optionOutputFile.setArgName(OUTPUTFILE); + optionOutputFile.setArgName(ResponderProps.OUTPUTFILE); optionOutputFile.setType(String.class); options.addOption(optionOutputFile); // stopListFile - Option optionStopListFile = new Option("sf", STOPLISTFILE, true, + Option optionStopListFile = new Option("sf", ResponderProps.STOPLISTFILE, true, "optional (unless using StopListFilter) -- Fully qualified file in hdfs containing stoplist terms; used by the StopListFilter"); optionStopListFile.setRequired(false); - optionStopListFile.setArgName(STOPLISTFILE); + optionStopListFile.setArgName(ResponderProps.STOPLISTFILE); optionStopListFile.setType(String.class); options.addOption(optionStopListFile); // numReduceTasks - Option optionNumReduceTasks = new Option("nr", NUMREDUCETASKS, true, "optional -- Number of reduce tasks"); + Option optionNumReduceTasks = new Option("nr", ResponderProps.NUMREDUCETASKS, true, "optional -- Number of reduce tasks"); optionNumReduceTasks.setRequired(false); - optionNumReduceTasks.setArgName(NUMREDUCETASKS); + optionNumReduceTasks.setArgName(ResponderProps.NUMREDUCETASKS); optionNumReduceTasks.setType(String.class); options.addOption(optionNumReduceTasks); // useLocalCache - Option optionUseLocalCache = new Option("ulc", USELOCALCACHE, true, + Option optionUseLocalCache = new Option("ulc", ResponderProps.USELOCALCACHE, true, "optional -- 'true' or 'false : Whether or not to use the local cache for modular exponentiation; Default is 'true'"); optionUseLocalCache.setRequired(false); - optionUseLocalCache.setArgName(USELOCALCACHE); + optionUseLocalCache.setArgName(ResponderProps.USELOCALCACHE); optionUseLocalCache.setType(String.class); options.addOption(optionUseLocalCache); // limitHitsPerSelector - Option optionLimitHitsPerSelector = new Option("lh", LIMITHITSPERSELECTOR, true, + Option optionLimitHitsPerSelector = new Option("lh", ResponderProps.LIMITHITSPERSELECTOR, true, "optional -- 'true' or 'false : Whether or not to limit the number of hits per selector; Default is 'true'"); optionLimitHitsPerSelector.setRequired(false); - optionLimitHitsPerSelector.setArgName(LIMITHITSPERSELECTOR); + optionLimitHitsPerSelector.setArgName(ResponderProps.LIMITHITSPERSELECTOR); optionLimitHitsPerSelector.setType(String.class); options.addOption(optionLimitHitsPerSelector); // maxHitsPerSelector - Option optionMaxHitsPerSelector = new Option("mh", MAXHITSPERSELECTOR, true, "optional -- Max number of hits encrypted per selector"); + Option optionMaxHitsPerSelector = new Option("mh", ResponderProps.MAXHITSPERSELECTOR, true, "optional -- Max number of hits encrypted per selector"); optionMaxHitsPerSelector.setRequired(false); - optionMaxHitsPerSelector.setArgName(MAXHITSPERSELECTOR); + optionMaxHitsPerSelector.setArgName(ResponderProps.MAXHITSPERSELECTOR); optionMaxHitsPerSelector.setType(String.class); options.addOption(optionMaxHitsPerSelector); // mapreduce.map.memory.mb - Option optionMapMemory = new Option("mm", MAPMEMORY, true, "optional -- Amount of memory (in MB) to allocate per map task; Default is 3000"); + Option optionMapMemory = new Option("mm", ResponderProps.MAPMEMORY, true, "optional -- Amount of memory (in MB) to allocate per map task; Default is 3000"); optionMapMemory.setRequired(false); - optionMapMemory.setArgName(MAPMEMORY); + optionMapMemory.setArgName(ResponderProps.MAPMEMORY); optionMapMemory.setType(String.class); options.addOption(optionMapMemory); // mapreduce.reduce.memory.mb - Option optionReduceMemory = new Option("rm", REDUCEMEMORY, true, "optional -- Amount of memory (in MB) to allocate per reduce task; Default is 3000"); + Option optionReduceMemory = new Option("rm", ResponderProps.REDUCEMEMORY, true, "optional -- Amount of memory (in MB) to allocate per reduce task; Default is 3000"); optionReduceMemory.setRequired(false); - optionReduceMemory.setArgName(REDUCEMEMORY); + optionReduceMemory.setArgName(ResponderProps.REDUCEMEMORY); optionReduceMemory.setType(String.class); options.addOption(optionReduceMemory); // mapreduce.map.java.opts - Option optionMapOpts = new Option("mjo", MAPJAVAOPTS, true, "optional -- Amount of heap (in MB) to allocate per map task; Default is -Xmx2800m"); + Option optionMapOpts = new Option("mjo", ResponderProps.MAPJAVAOPTS, true, "optional -- Amount of heap (in MB) to allocate per map task; Default is -Xmx2800m"); optionMapOpts.setRequired(false); - optionMapOpts.setArgName(MAPJAVAOPTS); + optionMapOpts.setArgName(ResponderProps.MAPJAVAOPTS); optionMapOpts.setType(String.class); options.addOption(optionMapOpts); // mapreduce.reduce.java.opts - Option optionReduceOpts = new Option("rjo", REDUCEJAVAOPTS, true, "optional -- Amount of heap (in MB) to allocate per reduce task; Default is -Xmx2800m"); + Option optionReduceOpts = new Option("rjo", ResponderProps.REDUCEJAVAOPTS, true, "optional -- Amount of heap (in MB) to allocate per reduce task; Default is -Xmx2800m"); optionReduceOpts.setRequired(false); - optionReduceOpts.setArgName(REDUCEJAVAOPTS); + optionReduceOpts.setArgName(ResponderProps.REDUCEJAVAOPTS); optionReduceOpts.setType(String.class); options.addOption(optionReduceOpts); // data.schemas - Option optionDataSchemas = new Option("ds", DATASCHEMAS, true, "required -- Comma separated list of data schema file names"); + Option optionDataSchemas = new Option("ds", ResponderProps.DATASCHEMAS, true, "required -- Comma separated list of data schema file names"); optionDataSchemas.setRequired(false); - optionDataSchemas.setArgName(DATASCHEMAS); + optionDataSchemas.setArgName(ResponderProps.DATASCHEMAS); optionDataSchemas.setType(String.class); options.addOption(optionDataSchemas); // query.schemas - Option optionQuerySchemas = new Option("qs", QUERYSCHEMAS, true, "required -- Comma separated list of query schema file names"); + Option optionQuerySchemas = new Option("qs", ResponderProps.QUERYSCHEMAS, true, "required -- Comma separated list of query schema file names"); optionQuerySchemas.setRequired(false); - optionQuerySchemas.setArgName(QUERYSCHEMAS); + optionQuerySchemas.setArgName(ResponderProps.QUERYSCHEMAS); optionQuerySchemas.setType(String.class); options.addOption(optionQuerySchemas); // pir.numExpLookupPartitions - Option optionExpParts = new Option("expParts", NUMEXPLOOKUPPARTS, true, "optional -- Number of partitions for the exp lookup table"); + Option optionExpParts = new Option("expParts", ResponderProps.NUMEXPLOOKUPPARTS, true, "optional -- Number of partitions for the exp lookup table"); optionExpParts.setRequired(false); - optionExpParts.setArgName(NUMEXPLOOKUPPARTS); + optionExpParts.setArgName(ResponderProps.NUMEXPLOOKUPPARTS); optionExpParts.setType(String.class); options.addOption(optionExpParts); // pir.numExpLookupPartitions - Option optionHdfsExp = new Option("hdfsExp", USEHDFSLOOKUPTABLE, true, + Option optionHdfsExp = new Option("hdfsExp", ResponderProps.USEHDFSLOOKUPTABLE, true, "optional -- 'true' or 'false' - Whether or not to generate and use the hdfs lookup table" + " for modular exponentiation"); optionHdfsExp.setRequired(false); - optionHdfsExp.setArgName(USEHDFSLOOKUPTABLE); + optionHdfsExp.setArgName(ResponderProps.USEHDFSLOOKUPTABLE); optionHdfsExp.setType(String.class); options.addOption(optionHdfsExp); // numDataPartitions - Option optionDataParts = new Option("dataParts", NUMDATAPARTITIONS, true, "optional -- Number of partitions for the input data"); + Option optionDataParts = new Option("dataParts", ResponderProps.NUMDATAPARTITIONS, true, "optional -- Number of partitions for the input data"); optionDataParts.setRequired(false); - optionDataParts.setArgName(NUMDATAPARTITIONS); + optionDataParts.setArgName(ResponderProps.NUMDATAPARTITIONS); optionDataParts.setType(String.class); options.addOption(optionDataParts); // useModExpJoin - Option optionModExpJoin = new Option("useModExpJoin", USEMODEXPJOIN, true, "optional -- 'true' or 'false' -- Spark only -- Whether or not to " + Option optionModExpJoin = new Option("useModExpJoin", ResponderProps.USEMODEXPJOIN, true, "optional -- 'true' or 'false' -- Spark only -- Whether or not to " + "pre-compute the modular exponentiation table and join it to the data partitions when performing the encrypted row calculations"); optionModExpJoin.setRequired(false); - optionModExpJoin.setArgName(USEMODEXPJOIN); + optionModExpJoin.setArgName(ResponderProps.USEMODEXPJOIN); optionModExpJoin.setType(String.class); options.addOption(optionModExpJoin); // numColMultPartitions - Option optionNumColMultPartitions = new Option("numColMultParts", NUMCOLMULTPARTITIONS, true, "optional, Spark only -- Number of partitions to " + Option optionNumColMultPartitions = new Option("numColMultParts", ResponderProps.NUMCOLMULTPARTITIONS, true, "optional, Spark only -- Number of partitions to " + "use when performing column multiplication"); optionNumColMultPartitions.setRequired(false); - optionNumColMultPartitions.setArgName(NUMCOLMULTPARTITIONS); + optionNumColMultPartitions.setArgName(ResponderProps.NUMCOLMULTPARTITIONS); optionNumColMultPartitions.setType(String.class); options.addOption(optionNumColMultPartitions); // colMultReduceByKey - Option optionColMultReduceByKey = new Option("colMultRBK", COLMULTREDUCEBYKEY, true, "optional -- 'true' or 'false' -- Spark only -- " + Option optionColMultReduceByKey = new Option("colMultRBK", ResponderProps.COLMULTREDUCEBYKEY, true, "optional -- 'true' or 'false' -- Spark only -- " + "If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce"); optionColMultReduceByKey.setRequired(false); - optionColMultReduceByKey.setArgName(COLMULTREDUCEBYKEY); + optionColMultReduceByKey.setArgName(ResponderProps.COLMULTREDUCEBYKEY); optionColMultReduceByKey.setType(String.class); options.addOption(optionColMultReduceByKey); // colMultReduceByKey - Option optionAllowEmbeddedQS = new Option("allowEmbeddedQS", ALLOWEMBEDDEDQUERYSCHEMAS, true, "optional -- 'true' or 'false' (defaults to 'false') -- " + Option optionAllowEmbeddedQS = new Option("allowEmbeddedQS", ResponderProps.ALLOWEMBEDDEDQUERYSCHEMAS, true, "optional -- 'true' or 'false' (defaults to 'false') -- " + "If true, allows embedded QuerySchemas for a query."); optionAllowEmbeddedQS.setRequired(false); - optionAllowEmbeddedQS.setArgName(ALLOWEMBEDDEDQUERYSCHEMAS); + optionAllowEmbeddedQS.setArgName(ResponderProps.ALLOWEMBEDDEDQUERYSCHEMAS); optionAllowEmbeddedQS.setType(String.class); options.addOption(optionAllowEmbeddedQS); diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java index d10f2e7f..da24ae4c 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderDriver.java @@ -50,14 +50,14 @@ public static void main(String[] args) throws Exception { ResponderCLI responderCLI = new ResponderCLI(args); - if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("mapreduce")) + if (SystemConfiguration.getProperty(ResponderProps.PLATFORM).equals("mapreduce")) { logger.info("Launching MapReduce ResponderTool:"); ComputeResponseTool pirWLTool = new ComputeResponseTool(); ToolRunner.run(pirWLTool, new String[] {}); } - else if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("spark")) + else if (SystemConfiguration.getProperty(ResponderProps.PLATFORM).equals("spark")) { logger.info("Launching Spark ComputeResponse:"); @@ -65,7 +65,7 @@ else if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("spark")) ComputeResponse computeResponse = new ComputeResponse(fs); computeResponse.performQuery(); } - else if (responderCLI.getOptionValue(ResponderCLI.PLATFORM).equals("standalone")) + else if (SystemConfiguration.getProperty(ResponderProps.PLATFORM).equals("standalone")) { logger.info("Launching Standalone Responder:"); diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java new file mode 100644 index 00000000..04894daf --- /dev/null +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java @@ -0,0 +1,207 @@ +package org.apache.pirk.responder.wideskies; + +import java.util.Arrays; +import java.util.List; + +import org.apache.pirk.inputformat.hadoop.InputFormatConst; +import org.apache.pirk.schema.data.LoadDataSchemas; +import org.apache.pirk.schema.query.LoadQuerySchemas; +import org.apache.pirk.utils.SystemConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Properties constants and validation for the Responder + */ +public class ResponderProps +{ + private static final Logger logger = LoggerFactory.getLogger(ResponderDriver.class); + + // Required properties + public static final String PLATFORM = "platform"; + public static final String QUERYINPUT = "pir.queryInput"; + public static final String DATAINPUTFORMAT = "pir.dataInputFormat"; + public static final String INPUTDATA = "pir.inputData"; + public static final String BASEQUERY = "pir.baseQuery"; + public static final String ESRESOURCE = "pir.esResource"; + public static final String ESQUERY = "pir.esQuery"; + public static final String OUTPUTFILE = "pir.outputFile"; + + // Optional properties + public static final String BASEINPUTFORMAT = "pir.baseInputFormat"; + public static final String STOPLISTFILE = "pir.stopListFile"; + public static final String NUMREDUCETASKS = "pir.numReduceTasks"; + public static final String USELOCALCACHE = "pir.useLocalCache"; + public static final String LIMITHITSPERSELECTOR = "pir.limitHitsPerSelector"; + public static final String MAXHITSPERSELECTOR = "pir.maxHitsPerSelector"; + public static final String MAPMEMORY = "mapreduce.map.memory.mb"; + public static final String REDUCEMEMORY = "mapreduce.reduce.memory.mb"; + public static final String MAPJAVAOPTS = "mapreduce.map.java.opts"; + public static final String REDUCEJAVAOPTS = "mapreduce.reduce.java.opts"; + public static final String QUERYSCHEMAS = "responder.querySchemas"; + public static final String DATASCHEMAS = "responder.dataSchemas"; + public static final String NUMEXPLOOKUPPARTS = "pir.numExpLookupPartitions"; + public static final String USEHDFSLOOKUPTABLE = "pir.useHDFSLookupTable"; + public static final String NUMDATAPARTITIONS = "pir.numDataPartitions"; + public static final String NUMCOLMULTPARTITIONS = "pir.numColMultPartitions"; + public static final String USEMODEXPJOIN = "pir.useModExpJoin"; + public static final String COLMULTREDUCEBYKEY = "pir.colMultReduceByKeys"; + public static final String ALLOWEMBEDDEDQUERYSCHEMAS = "pir.allowEmbeddedQuerySchemas"; + + public static final List PROPSLIST = Arrays.asList(PLATFORM,QUERYINPUT,DATAINPUTFORMAT, + INPUTDATA,BASEQUERY,ESRESOURCE,ESQUERY,OUTPUTFILE,BASEINPUTFORMAT,STOPLISTFILE,NUMREDUCETASKS, + USELOCALCACHE,LIMITHITSPERSELECTOR,MAXHITSPERSELECTOR,MAPMEMORY,REDUCEMEMORY,MAPJAVAOPTS, + REDUCEJAVAOPTS,QUERYSCHEMAS,DATASCHEMAS,NUMEXPLOOKUPPARTS,USEHDFSLOOKUPTABLE,NUMDATAPARTITIONS, + NUMCOLMULTPARTITIONS,USEMODEXPJOIN,COLMULTREDUCEBYKEY,ALLOWEMBEDDEDQUERYSCHEMAS); + + /** + * Validates the responder properties + * + */ + public static boolean validateResponderProperties() + { + boolean valid = true; + + // Parse general required options + + if (!SystemConfiguration.hasProperty(PLATFORM)) + { + logger.info("Must have the option " + PLATFORM); + valid = false; + } + + String platform = SystemConfiguration.getProperty(PLATFORM).toLowerCase(); + if (!platform.equals("mapreduce") && !platform.equals("spark") && !platform.equals("standalone")) + { + logger.info("Unsupported platform: " + platform); + valid = false; + } + + if (!SystemConfiguration.hasProperty(QUERYINPUT)) + { + logger.info("Must have the option " + QUERYINPUT); + valid = false; + } + + if (!SystemConfiguration.hasProperty(OUTPUTFILE)) + { + logger.info("Must have the option " + OUTPUTFILE); + valid = false; + } + + if (!SystemConfiguration.hasProperty(QUERYSCHEMAS)) + { + SystemConfiguration.appendProperty("query.schemas", SystemConfiguration.getProperty(QUERYSCHEMAS)); + } + + if (!SystemConfiguration.hasProperty(DATASCHEMAS)) + { + SystemConfiguration.appendProperty("data.schemas", SystemConfiguration.getProperty(DATASCHEMAS)); + } + + if (!SystemConfiguration.hasProperty(DATAINPUTFORMAT)) + { + logger.info("Must have the option " + DATAINPUTFORMAT); + valid = false; + } + String dataInputFormat = SystemConfiguration.getProperty(DATAINPUTFORMAT).toLowerCase(); + + // Parse required properties by dataInputFormat + + if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT)) + { + if (!SystemConfiguration.hasProperty(BASEINPUTFORMAT)) + { + logger.info("Must have the option " + BASEINPUTFORMAT + " if using " + InputFormatConst.BASE_FORMAT); + valid = false; + } + + if (!SystemConfiguration.hasProperty(INPUTDATA)) + { + logger.info("Must have the option " + INPUTDATA + " if using " + InputFormatConst.BASE_FORMAT); + valid = false; + } + + if (!SystemConfiguration.hasProperty(BASEQUERY)) + { + SystemConfiguration.setProperty("BASEQUERY", "?q=*"); + } + } + else if (dataInputFormat.equals(InputFormatConst.ES)) + { + if (!SystemConfiguration.hasProperty(ESRESOURCE)) + { + logger.info("Must have the option " + ESRESOURCE); + valid = false; + } + + if (!SystemConfiguration.hasProperty(ESQUERY)) + { + logger.info("Must have the option " + ESQUERY); + valid = false; + } + } + else if (dataInputFormat.equalsIgnoreCase("standalone")) + { + if (!SystemConfiguration.hasProperty(INPUTDATA)) + { + logger.info("Must have the option " + INPUTDATA + " if using " + InputFormatConst.BASE_FORMAT); + valid = false; + } + } + else + { + logger.info("Unsupported inputFormat = " + dataInputFormat); + valid = false; + } + + // Parse optional properties with defaults + + if (!SystemConfiguration.hasProperty(USEHDFSLOOKUPTABLE)) + { + SystemConfiguration.setProperty(USEHDFSLOOKUPTABLE, "false"); + } + + if (!SystemConfiguration.hasProperty(USEMODEXPJOIN)) + { + SystemConfiguration.setProperty(USEMODEXPJOIN, "false"); + } + + if (!SystemConfiguration.hasProperty(NUMDATAPARTITIONS)) + { + SystemConfiguration.setProperty(NUMDATAPARTITIONS, "1000"); + } + + if (!SystemConfiguration.hasProperty(NUMCOLMULTPARTITIONS)) + { + SystemConfiguration.setProperty(NUMCOLMULTPARTITIONS, "1000"); + } + + if (!SystemConfiguration.hasProperty(COLMULTREDUCEBYKEY)) + { + SystemConfiguration.setProperty(COLMULTREDUCEBYKEY, "false"); + } + + if (!SystemConfiguration.hasProperty(ALLOWEMBEDDEDQUERYSCHEMAS)) + { + SystemConfiguration.setProperty(ALLOWEMBEDDEDQUERYSCHEMAS, "false"); + } + + // Load the new local query and data schemas + try + { + LoadDataSchemas.initialize(); + LoadQuerySchemas.initialize(); + + } catch (Exception e) + { + e.printStackTrace(); + } + + + return valid; + } + + + +} diff --git a/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java b/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java index 15d76223..c818e310 100644 --- a/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java +++ b/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java @@ -32,7 +32,7 @@ import org.apache.pirk.querier.wideskies.encrypt.EncryptQuery; import org.apache.pirk.query.wideskies.Query; import org.apache.pirk.query.wideskies.QueryInfo; -import org.apache.pirk.responder.wideskies.ResponderCLI; +import org.apache.pirk.responder.wideskies.ResponderProps; import org.apache.pirk.responder.wideskies.mapreduce.ComputeResponseTool; import org.apache.pirk.response.wideskies.Response; import org.apache.pirk.schema.response.QueryResponseJSON; @@ -402,30 +402,30 @@ public static ArrayList performQuery(String queryType, ArrayL String inputFormat = SystemConfiguration.getProperty("pir.dataInputFormat"); logger.info("inputFormat = " + inputFormat); ArrayList args = new ArrayList<>(); - args.add("-" + ResponderCLI.PLATFORM + "=spark"); - args.add("-" + ResponderCLI.DATAINPUTFORMAT + "=" + inputFormat); - args.add("-" + ResponderCLI.QUERYINPUT + "=" + SystemConfiguration.getProperty("pir.queryInput")); - args.add("-" + ResponderCLI.OUTPUTFILE + "=" + SystemConfiguration.getProperty("pir.outputFile")); - args.add("-" + ResponderCLI.STOPLISTFILE + "=" + SystemConfiguration.getProperty("pir.stopListFile")); - args.add("-" + ResponderCLI.USELOCALCACHE + "=" + SystemConfiguration.getProperty("pir.useLocalCache", "true")); - args.add("-" + ResponderCLI.LIMITHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false")); - args.add("-" + ResponderCLI.MAXHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.maxHitsPerSelector", "1000")); - args.add("-" + ResponderCLI.QUERYSCHEMAS + "=" + Inputs.HDFS_QUERY_FILES); - args.add("-" + ResponderCLI.DATASCHEMAS + "=" + Inputs.DATA_SCHEMA_FILE_HDFS); - args.add("-" + ResponderCLI.NUMEXPLOOKUPPARTS + "=" + SystemConfiguration.getProperty("pir.numExpLookupPartitions", "100")); - args.add("-" + ResponderCLI.USEMODEXPJOIN + "=" + SystemConfiguration.getProperty("pir.useModExpJoin", "false")); - args.add("-" + ResponderCLI.NUMCOLMULTPARTITIONS + "=" + SystemConfiguration.getProperty("pir.numColMultPartitions", "20")); - args.add("-" + ResponderCLI.COLMULTREDUCEBYKEY + "=" + SystemConfiguration.getProperty("pir.colMultReduceByKey", "false")); + args.add("-" + ResponderProps.PLATFORM + "=spark"); + args.add("-" + ResponderProps.DATAINPUTFORMAT + "=" + inputFormat); + args.add("-" + ResponderProps.QUERYINPUT + "=" + SystemConfiguration.getProperty("pir.queryInput")); + args.add("-" + ResponderProps.OUTPUTFILE + "=" + SystemConfiguration.getProperty("pir.outputFile")); + args.add("-" + ResponderProps.STOPLISTFILE + "=" + SystemConfiguration.getProperty("pir.stopListFile")); + args.add("-" + ResponderProps.USELOCALCACHE + "=" + SystemConfiguration.getProperty("pir.useLocalCache", "true")); + args.add("-" + ResponderProps.LIMITHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.limitHitsPerSelector", "false")); + args.add("-" + ResponderProps.MAXHITSPERSELECTOR + "=" + SystemConfiguration.getProperty("pir.maxHitsPerSelector", "1000")); + args.add("-" + ResponderProps.QUERYSCHEMAS + "=" + Inputs.HDFS_QUERY_FILES); + args.add("-" + ResponderProps.DATASCHEMAS + "=" + Inputs.DATA_SCHEMA_FILE_HDFS); + args.add("-" + ResponderProps.NUMEXPLOOKUPPARTS + "=" + SystemConfiguration.getProperty("pir.numExpLookupPartitions", "100")); + args.add("-" + ResponderProps.USEMODEXPJOIN + "=" + SystemConfiguration.getProperty("pir.useModExpJoin", "false")); + args.add("-" + ResponderProps.NUMCOLMULTPARTITIONS + "=" + SystemConfiguration.getProperty("pir.numColMultPartitions", "20")); + args.add("-" + ResponderProps.COLMULTREDUCEBYKEY + "=" + SystemConfiguration.getProperty("pir.colMultReduceByKey", "false")); if (inputFormat.equals(InputFormatConst.BASE_FORMAT)) { - args.add("-" + ResponderCLI.INPUTDATA + "=" + SystemConfiguration.getProperty("pir.inputData")); - args.add("-" + ResponderCLI.BASEQUERY + "=" + SystemConfiguration.getProperty("pir.baseQuery")); - args.add("-" + ResponderCLI.BASEINPUTFORMAT + "=" + SystemConfiguration.getProperty("pir.baseInputFormat")); + args.add("-" + ResponderProps.INPUTDATA + "=" + SystemConfiguration.getProperty("pir.inputData")); + args.add("-" + ResponderProps.BASEQUERY + "=" + SystemConfiguration.getProperty("pir.baseQuery")); + args.add("-" + ResponderProps.BASEINPUTFORMAT + "=" + SystemConfiguration.getProperty("pir.baseInputFormat")); } else if (inputFormat.equals(InputFormatConst.ES)) { - args.add("-" + ResponderCLI.ESQUERY + "=" + SystemConfiguration.getProperty("pir.esQuery")); - args.add("-" + ResponderCLI.ESRESOURCE + "=" + SystemConfiguration.getProperty("pir.esResource")); + args.add("-" + ResponderProps.ESQUERY + "=" + SystemConfiguration.getProperty("pir.esQuery")); + args.add("-" + ResponderProps.ESRESOURCE + "=" + SystemConfiguration.getProperty("pir.esResource")); } for (String arg : args) diff --git a/src/main/resources/responder.properties b/src/main/resources/responder.properties index e69de29b..56211b3f 100644 --- a/src/main/resources/responder.properties +++ b/src/main/resources/responder.properties @@ -0,0 +1,138 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +############################################################################### + +## +## Required Properties +## + +#dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format +pir.dataInputFormat= + +#inputData -- required +#Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base' +pir.inputData= + +#outputFile -- required -- Fully qualified name of output file in hdfs +pir.outputFile= + +#platform -- required -- 'mapreduce', 'spark', or 'standalone' +#Processing platform technology for the responder +platform= + +#queryInput -- required -- Fully qualified dir in hdfs of Query files +pir.queryInput= + + +## +## Optional Args - Leave empty if not using/not changing default values +## + +#dataSchemas -- required -- Comma separated list of data schema file names +responder.dataSchemas= + +#querySchemas -- required -- Comma separated list of query schema file names +responder.querySchemas= + + +#allowAdHocQuerySchemas -- 'true' or 'false' +#If true, allows embedded QuerySchemas for a query. +#Defaults to 'false' +pir.allowEmbeddedQuerySchemas= + +#colMultReduceByKey -- 'true' or 'false' -- Spark only +#If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce +#Defaults to 'false' +pir.colMultReduceByKeys= + +#baseInputFormat -- required if baseInputFormat = 'base' +#Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat +pir.baseInputFormat= + +#esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query +#if using 'elasticsearch' input format +pir.esQuery= + +#esResource -- required if baseInputFormat = 'elasticsearch' +#Requires the format / : Elasticsearch resource where data is read and written to +pir.esResource= + +#useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the +#hdfs lookup table for modular exponentiation +#Defaults to 'false' +pir.useHDFSLookupTable= + +#baseQuery -- ElasticSearch-like query if using 'base' input format - +#used to filter records in the RecordReader +#Defaults to ?q=* +pir.baseQuery= + +#limitHitsPerSelector -- 'true' or 'false' +#Whether or not to limit the number of hits per selector +#Defaults to 'true' +pir.limitHitsPerSelector= + +#mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task +#Defaults to -Xmx2800m +mapreduce.map.java.opts= + +#mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task +#Defaults to 3000 +mapreduce.map.memory.mb= + +#mapreduceReduceJavaOpts +#Amount of heap (in MB) to allocate per reduce task +#Defaults to -Xmx2800m +mapreduce.reduce.java.opts= + +#mapreduceReduceMemoryMb +#Amount of memory (in MB) to allocate per reduce task +#Defaults to 3000 +mapreduce.reduce.memory.mb= + +#stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs +#containing stoplist terms; used by the StopListFilter +pir.stopListFile= + +#useLocalCache -- 'true' or 'false' +#Whether or not to use the local cache for modular exponentiation +#Defaults to 'true' +pir.useLocalCache= + +#useModExpJoin -- 'true' or 'false' -- Spark only +#Whether or not to pre-compute the modular exponentiation table and join it to the data +#partitions when performing the encrypted row calculations +#Defaults to 'false' +pir.useModExpJoin= + +#numReduceTasks -- optional -- Number of reduce tasks +pir.numReduceTasks= + +#numColMultPartitions -- optional, Spark only +#Number of partitions to use when performing column multiplication +pir.numColMultPartitions= + +#maxHitsPerSelector -- optional -- Max number of hits encrypted per selector +pir.maxHitsPerSelector= + +#dataParts -- optional -- Number of partitions for the input data +pir.numDataPartitions= + +#numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table +pir.numExpLookupPartitions= + \ No newline at end of file From 9f8f0674bddd33f786b7106e150f5cac997d0e57 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Wed, 27 Jul 2016 21:53:08 -0400 Subject: [PATCH 05/10] refined prop parsing and remove schema loading from SystemConfig --- pom-repo.xml | 381 ++++++++++++++++++ pom.xml | 30 +- .../querier/wideskies/QuerierDriverCLI.java | 48 +-- .../pirk/querier/wideskies/QuerierProps.java | 73 ++-- .../responder/wideskies/ResponderCLI.java | 68 ++-- .../responder/wideskies/ResponderProps.java | 69 ++-- .../wideskies/common/ComputeEncryptedRow.java | 8 +- .../common/HashSelectorAndPartitionData.java | 1 + .../HashSelectorsAndPartitionDataMapper.java | 1 + .../wideskies/mapreduce/RowCalcReducer.java | 1 + .../wideskies/spark/ComputeResponse.java | 3 +- .../spark/EncColMultGroupedMapper.java | 1 + .../responder/wideskies/spark/EncRowCalc.java | 5 +- .../spark/EncRowCalcPrecomputedCache.java | 1 + .../wideskies/spark/ExpKeyFilenameMap.java | 1 + .../wideskies/spark/ExpTableGenerator.java | 1 + .../spark/HashSelectorsAndPartitionData.java | 1 + .../distributed/testsuite/DistTestSuite.java | 6 +- .../org/apache/pirk/test/utils/BaseTests.java | 3 +- .../org/apache/pirk/test/utils/Inputs.java | 2 +- .../pirk/test/utils/StandaloneQuery.java | 3 +- .../apache/pirk/utils/QueryParserUtils.java | 2 - .../org/apache/pirk/utils/StringUtils.java | 2 - .../pirk/utils/SystemConfiguration.java | 113 +++--- src/main/resources/pirk.properties | 13 +- src/main/resources/pirk.properties-repo | 243 +++++++++++ src/main/resources/querier.properties | 37 +- src/main/resources/responder.properties | 44 +- .../test/general/ISO8601DateParserTest.java | 3 +- src/test/java/test/general/KeyedHashTest.java | 3 +- src/test/java/test/general/PaillierTest.java | 5 +- .../java/test/general/PartitionUtilsTest.java | 6 +- .../test/general/QueryParserUtilsTest.java | 6 +- .../test/schema/data/LoadDataSchemaTest.java | 7 +- .../schema/query/LoadQuerySchemaTest.java | 6 +- 35 files changed, 927 insertions(+), 270 deletions(-) create mode 100644 pom-repo.xml create mode 100755 src/main/resources/pirk.properties-repo diff --git a/pom-repo.xml b/pom-repo.xml new file mode 100644 index 00000000..3d4c1b56 --- /dev/null +++ b/pom-repo.xml @@ -0,0 +1,381 @@ + + + + + 4.0.0 + + + org.apache.pirk + pirk + 0.0.1-SNAPSHOT + jar + + Apache Pirk (incubating) Project + Apache Pirk (incubating) is a framework for scalable Private Information Retrieval (PIR). + http://pirk.incubator.apache.org/ + + + 2016 + + + The Apache Software Foundation + https://www.apache.org + + + + + Apache License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0 + + + + + + Dev + dev-subscribe@pirk.incubator.apache.org + dev-unsubscribe@pirk.incubator.apache.org + dev@pirk.incubator.apache.org + http://mail-archives.apache.org/mod_mbox/incubator-pirk-dev/ + + + Commits + commits-subscribe@pirk.incubator.apache.org + commits-unsubscribe@pirk.incubator.apache.org + http://mail-archives.apache.org/mod_mbox/incubator-pirk-commits + + + + + + mvn-public + http://mvnrepository.com/artifact + + always + + + + + + + UTF-8 + 1.7 + 1.7 + 2.10.4 + 1.11.3 + benchmarks + 1.7 + 1.7.21 + 2.6.2 + 4.12 + log4j2.properties + + + + + log4j + log4j + 1.2.17 + + + + junit + junit + 4.12 + + + + org.apache.commons + commons-math3 + 3.3 + + + + com.googlecode.json-simple + json-simple + 1.1 + + + + commons-net + commons-net + 3.3 + + + + org.apache.hadoop + hadoop-common + 2.7.2 + + + + org.apache.hadoop + hadoop-mapreduce-client-core + 2.7.2 + + + + org.apache.hadoop + hadoop-client + 2.7.2 + + + + org.apache.spark + spark-core_2.11 + 1.6.1 + + + + org.elasticsearch + elasticsearch-hadoop + 2.1.2 + + + commons-net + commons-net + + + org.apache.hive + hive-service + + + org.slf4j + slf4j-log4j12 + + + org.slf4j + log4j-over-slf4j + + + + + + + com.squareup.jnagmp + jnagmp + 1.1.0 + + + + + org.openjdk.jmh + jmh-core + ${jmh.version} + provided + + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + provided + + + + + org.slf4j + slf4j-api + ${slf4j.version} + + + + org.apache.logging.log4j + log4j-slf4j-impl + 2.6.2 + + + + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.18 + + true + -Xmx1G + -Djava.net.preferIPv4Stack=true + + ${log4j.configuration} + + + + + + org.apache.maven.surefire + surefire-junit4 + 2.18 + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.5.1 + + ${javac.target} + ${javac.target} + ${javac.target} + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.0.1 + + + org/apache/pirk/benchmark/** + org/openjdk/jmh/** + + + + + + org.apache.maven.plugins + maven-shade-plugin + 2.4.3 + + + + package + main + + shade + + + true + exe + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + + + org.apache.rat + apache-rat-plugin + 0.11 + + + nb-configuration.xml + nbactions.xml + DEPENDENCIES + + .travis.yml + appveyor.yml + + + + + + org.apache.maven.doxia + doxia-core + 1.6 + + + xerces + xercesImpl + + + + + + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + + org.scala-tools + + + maven-scala-plugin + + + [2.15.2,) + + + testCompile + + + + + + + + + + org.apache.rat + + + apache-rat-plugin + + + [0.11,) + + + check + + + + + + + + + + + + + + + + diff --git a/pom.xml b/pom.xml index 3d4c1b56..a9b77ad9 100644 --- a/pom.xml +++ b/pom.xml @@ -117,6 +117,7 @@ 3.3 + + + org.apache.hadoop + hadoop-common + 2.7.2 + + + + org.apache.hadoop + hadoop-mapreduce-client-core + 2.7.2 + + + + org.apache.hadoop + hadoop-client + 2.7.2 + + + + org.apache.spark + spark-core_2.11 + 1.6.1 + + + + org.elasticsearch elasticsearch-hadoop @@ -239,7 +268,6 @@ org.apache.maven.plugins maven-jar-plugin - 3.0.1 org/apache/pirk/benchmark/** diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java index 4005395d..d775fb5d 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java @@ -18,14 +18,14 @@ */ package org.apache.pirk.querier.wideskies; +import java.io.File; + import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; -import org.apache.pirk.schema.data.LoadDataSchemas; -import org.apache.pirk.schema.query.LoadQuerySchemas; import org.apache.pirk.utils.SystemConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -109,44 +109,28 @@ public String getOptionValue(String option) */ private boolean parseOptions() { - boolean valid = true; + boolean valid = true; - //If we have a local.querier.properties file specified, load it - if(hasOption(LOCALPROPFILE)) + // If we have a local.querier.properties file specified, load it + if (hasOption(LOCALPROPFILE)) { - SystemConfiguration.loadPropsFromFile(getOptionValue(LOCALPROPFILE)); + SystemConfiguration.loadPropsFromFile(new File(getOptionValue(LOCALPROPFILE))); } - else + else { - //Pull options, set as properties - for(String prop: QuerierProps.PROPSLIST) + // Pull options, set as properties + for (String prop : QuerierProps.PROPSLIST) { - if(hasOption(prop)) + if (hasOption(prop)) { SystemConfiguration.setProperty(prop, getOptionValue(prop)); } } } - //Validate properties + // Validate properties valid = QuerierProps.validateQuerierProperties(); - // Load the new local query and data schemas - if(valid) - { - logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " - + SystemConfiguration.getProperty("query.schemas")); - try - { - LoadDataSchemas.initialize(); - LoadQuerySchemas.initialize(); - - } catch (Exception e) - { - e.printStackTrace(); - } - } - return valid; } @@ -247,7 +231,8 @@ private Options createOptions() options.addOption(optionHASHKEY); // DATAPARTITIONSIZE - Option optionDATAPARTITIONSIZE = new Option("dps", QuerierProps.DATAPARTITIONSIZE, true, "required for encryption -- Partition bit size in data partitioning"); + Option optionDATAPARTITIONSIZE = new Option("dps", QuerierProps.DATAPARTITIONSIZE, true, + "required for encryption -- Partition bit size in data partitioning"); optionDATAPARTITIONSIZE.setRequired(false); optionDATAPARTITIONSIZE.setArgName(QuerierProps.DATAPARTITIONSIZE); optionDATAPARTITIONSIZE.setType(String.class); @@ -278,8 +263,8 @@ private Options createOptions() options.addOption(optionBITSET); // embedSelector - Option optionEmbedSelector = new Option("embed", QuerierProps.EMBEDSELECTOR, true, "required for encryption -- 'true' or 'false' - Whether or not to embed " - + "the selector in the results to reduce false positives"); + Option optionEmbedSelector = new Option("embed", QuerierProps.EMBEDSELECTOR, true, + "required for encryption -- 'true' or 'false' - Whether or not to embed " + "the selector in the results to reduce false positives"); optionEmbedSelector.setRequired(false); optionEmbedSelector.setArgName(QuerierProps.EMBEDSELECTOR); optionEmbedSelector.setType(String.class); @@ -303,7 +288,8 @@ private Options createOptions() options.addOption(optionUseHDFSLookupTable); // QUERIERFILE - Option optionQUERIERFILE = new Option("qf", QuerierProps.QUERIERFILE, true, "required for decryption -- Fully qualified file containing the serialized Querier object"); + Option optionQUERIERFILE = new Option("qf", QuerierProps.QUERIERFILE, true, + "required for decryption -- Fully qualified file containing the serialized Querier object"); optionQUERIERFILE.setRequired(false); optionQUERIERFILE.setArgName(QuerierProps.QUERIERFILE); optionQUERIERFILE.setType(String.class); diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java index c350b4d6..d91b14a4 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java @@ -3,6 +3,8 @@ import java.util.Arrays; import java.util.List; +import org.apache.pirk.schema.data.LoadDataSchemas; +import org.apache.pirk.schema.query.LoadQuerySchemas; import org.apache.pirk.utils.SystemConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -14,13 +16,13 @@ public class QuerierProps { private static final Logger logger = LoggerFactory.getLogger(QuerierProps.class); - //General properties + // General properties public static final String ACTION = "querier.action"; public static final String INPUTFILE = "querier.inputFile"; public static final String OUTPUTFILE = "querier.outputFile"; public static final String QUERYTYPE = "querier.queryType"; public static final String NUMTHREADS = "querier.numThreads"; - + // Encryption properties public static final String HASHBITSIZE = "querier.hashBitSize"; public static final String HASHKEY = "querier.hashKey"; @@ -40,12 +42,11 @@ public class QuerierProps // Decryption properties public static final String QUERIERFILE = "querier.querierFile"; - - public static final List PROPSLIST = Arrays.asList(ACTION, INPUTFILE, OUTPUTFILE, QUERYTYPE, NUMTHREADS, - EMBEDQUERYSCHEMA, HASHBITSIZE, HASHKEY, DATAPARTITIONSIZE, PAILLIERBITSIZE, BITSET, - CERTAINTY, QUERYNAME, QUERYSCHEMAS, DATASCHEMAS, EMBEDSELECTOR, USEMEMLOOKUPTABLE, - USEHDFSLOOKUPTABLE, SR_ALGORITHM, SR_PROVIDER); - + + public static final List PROPSLIST = Arrays.asList(ACTION, INPUTFILE, OUTPUTFILE, QUERYTYPE, NUMTHREADS, EMBEDQUERYSCHEMA, HASHBITSIZE, HASHKEY, + DATAPARTITIONSIZE, PAILLIERBITSIZE, BITSET, CERTAINTY, QUERYNAME, QUERYSCHEMAS, DATASCHEMAS, EMBEDSELECTOR, USEMEMLOOKUPTABLE, USEHDFSLOOKUPTABLE, + SR_ALGORITHM, SR_PROVIDER); + /** * Validates the querier properties * @@ -53,9 +54,9 @@ public class QuerierProps public static boolean validateQuerierProperties() { boolean valid = true; - + // Parse general required properties - + if (!SystemConfiguration.hasProperty(ACTION)) { logger.info("Must have the option " + ACTION); @@ -67,25 +68,25 @@ public static boolean validateQuerierProperties() logger.info("Unsupported action: " + action); valid = false; } - + if (!SystemConfiguration.hasProperty(INPUTFILE)) { logger.info("Must have the option " + INPUTFILE); valid = false; } - + if (!SystemConfiguration.hasProperty(OUTPUTFILE)) { logger.info("Must have the option " + OUTPUTFILE); valid = false; } - + if (!SystemConfiguration.hasProperty(NUMTHREADS)) { logger.info("Must have the option " + NUMTHREADS); valid = false; } - + // Parse general optional properties if (!SystemConfiguration.hasProperty(EMBEDQUERYSCHEMA)) { @@ -93,7 +94,7 @@ public static boolean validateQuerierProperties() } // Parse encryption properties - + if (action.equals("encrypt")) { if (!SystemConfiguration.hasProperty(QUERYTYPE)) @@ -101,69 +102,69 @@ public static boolean validateQuerierProperties() logger.info("Must have the option " + QUERYTYPE); valid = false; } - + if (!SystemConfiguration.hasProperty(HASHBITSIZE)) { logger.info("Must have the option " + HASHBITSIZE); valid = false; } - + if (!SystemConfiguration.hasProperty(HASHKEY)) { logger.info("Must have the option " + HASHKEY); valid = false; } - + if (!SystemConfiguration.hasProperty(DATAPARTITIONSIZE)) { logger.info("Must have the option " + DATAPARTITIONSIZE); valid = false; } - + if (!SystemConfiguration.hasProperty(PAILLIERBITSIZE)) { logger.info("Must have the option " + PAILLIERBITSIZE); valid = false; } - + if (!SystemConfiguration.hasProperty(CERTAINTY)) { logger.info("Must have the option " + CERTAINTY); valid = false; } - + if (!SystemConfiguration.hasProperty(QUERYNAME)) { logger.info("Must have the option " + QUERYNAME); valid = false; } - + if (!SystemConfiguration.hasProperty(BITSET)) { logger.info("Must have the option " + BITSET); valid = false; } - + if (SystemConfiguration.hasProperty(QUERYSCHEMAS)) { SystemConfiguration.appendProperty("query.schemas", SystemConfiguration.getProperty(QUERYSCHEMAS)); } - + if (SystemConfiguration.hasProperty(DATASCHEMAS)) { SystemConfiguration.appendProperty("data.schemas", SystemConfiguration.getProperty(DATASCHEMAS)); } - + if (!SystemConfiguration.hasProperty(EMBEDSELECTOR)) { SystemConfiguration.setProperty(EMBEDSELECTOR, "true"); } - + if (!SystemConfiguration.hasProperty(USEMEMLOOKUPTABLE)) { SystemConfiguration.setProperty(USEMEMLOOKUPTABLE, "false"); } - + if (!SystemConfiguration.hasProperty(USEHDFSLOOKUPTABLE)) { SystemConfiguration.setProperty(USEHDFSLOOKUPTABLE, "false"); @@ -179,7 +180,23 @@ public static boolean validateQuerierProperties() valid = false; } } - + + // Load the new local query and data schemas + if (valid) + { + logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " + + SystemConfiguration.getProperty("query.schemas")); + try + { + LoadDataSchemas.initialize(); + LoadQuerySchemas.initialize(); + + } catch (Exception e) + { + e.printStackTrace(); + } + } + return valid; } } diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java index 5355afb0..bcf02ba7 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java @@ -18,14 +18,14 @@ */ package org.apache.pirk.responder.wideskies; +import java.io.File; + import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; -import org.apache.pirk.schema.data.LoadDataSchemas; -import org.apache.pirk.schema.query.LoadQuerySchemas; import org.apache.pirk.utils.SystemConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,7 +41,7 @@ public class ResponderCLI private CommandLine commandLine = null; private static final String LOCALPROPFILE = "local.responder.properties"; - + /** * Create and parse allowable options * @@ -110,43 +110,27 @@ public String getOptionValue(String option) private boolean parseOptions() { boolean valid = true; - - //If we have a local.querier.properties file specified, load it - if(hasOption(LOCALPROPFILE)) + + // If we have a local.querier.properties file specified, load it + if (hasOption(LOCALPROPFILE)) { - SystemConfiguration.loadPropsFromFile(getOptionValue(LOCALPROPFILE)); + SystemConfiguration.loadPropsFromFile(new File(getOptionValue(LOCALPROPFILE))); } - else + else { - //Pull options, set as properties - for(String prop: ResponderProps.PROPSLIST) + // Pull options, set as properties + for (String prop : ResponderProps.PROPSLIST) { - if(hasOption(prop)) + if (hasOption(prop)) { SystemConfiguration.setProperty(prop, getOptionValue(prop)); } } } - //Validate properties + // Validate properties valid = ResponderProps.validateResponderProperties(); - // Load the new local query and data schemas - if(valid) - { - logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " - + SystemConfiguration.getProperty("query.schemas")); - try - { - LoadDataSchemas.initialize(); - LoadQuerySchemas.initialize(); - - } catch (Exception e) - { - e.printStackTrace(); - } - } - return valid; } @@ -180,14 +164,16 @@ private Options createOptions() options.addOption(optionQueryInput); // dataInputFormat - Option optionDataInputFormat = new Option("d", ResponderProps.DATAINPUTFORMAT, true, "required -- 'base', 'elasticsearch', or 'standalone' : Specify the input format"); + Option optionDataInputFormat = new Option("d", ResponderProps.DATAINPUTFORMAT, true, + "required -- 'base', 'elasticsearch', or 'standalone' : Specify the input format"); optionDataInputFormat.setRequired(false); optionDataInputFormat.setArgName(ResponderProps.DATAINPUTFORMAT); optionDataInputFormat.setType(String.class); options.addOption(optionDataInputFormat); // inputData - Option optionInputData = new Option("i", ResponderProps.INPUTDATA, true, "required -- Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base'"); + Option optionInputData = new Option("i", ResponderProps.INPUTDATA, true, + "required -- Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base'"); optionInputData.setRequired(false); optionInputData.setArgName(ResponderProps.INPUTDATA); optionInputData.setType(String.class); @@ -278,21 +264,24 @@ private Options createOptions() options.addOption(optionMapMemory); // mapreduce.reduce.memory.mb - Option optionReduceMemory = new Option("rm", ResponderProps.REDUCEMEMORY, true, "optional -- Amount of memory (in MB) to allocate per reduce task; Default is 3000"); + Option optionReduceMemory = new Option("rm", ResponderProps.REDUCEMEMORY, true, + "optional -- Amount of memory (in MB) to allocate per reduce task; Default is 3000"); optionReduceMemory.setRequired(false); optionReduceMemory.setArgName(ResponderProps.REDUCEMEMORY); optionReduceMemory.setType(String.class); options.addOption(optionReduceMemory); // mapreduce.map.java.opts - Option optionMapOpts = new Option("mjo", ResponderProps.MAPJAVAOPTS, true, "optional -- Amount of heap (in MB) to allocate per map task; Default is -Xmx2800m"); + Option optionMapOpts = new Option("mjo", ResponderProps.MAPJAVAOPTS, true, + "optional -- Amount of heap (in MB) to allocate per map task; Default is -Xmx2800m"); optionMapOpts.setRequired(false); optionMapOpts.setArgName(ResponderProps.MAPJAVAOPTS); optionMapOpts.setType(String.class); options.addOption(optionMapOpts); // mapreduce.reduce.java.opts - Option optionReduceOpts = new Option("rjo", ResponderProps.REDUCEJAVAOPTS, true, "optional -- Amount of heap (in MB) to allocate per reduce task; Default is -Xmx2800m"); + Option optionReduceOpts = new Option("rjo", ResponderProps.REDUCEJAVAOPTS, true, + "optional -- Amount of heap (in MB) to allocate per reduce task; Default is -Xmx2800m"); optionReduceOpts.setRequired(false); optionReduceOpts.setArgName(ResponderProps.REDUCEJAVAOPTS); optionReduceOpts.setType(String.class); @@ -335,16 +324,17 @@ private Options createOptions() options.addOption(optionDataParts); // useModExpJoin - Option optionModExpJoin = new Option("useModExpJoin", ResponderProps.USEMODEXPJOIN, true, "optional -- 'true' or 'false' -- Spark only -- Whether or not to " - + "pre-compute the modular exponentiation table and join it to the data partitions when performing the encrypted row calculations"); + Option optionModExpJoin = new Option("useModExpJoin", ResponderProps.USEMODEXPJOIN, true, + "optional -- 'true' or 'false' -- Spark only -- Whether or not to " + + "pre-compute the modular exponentiation table and join it to the data partitions when performing the encrypted row calculations"); optionModExpJoin.setRequired(false); optionModExpJoin.setArgName(ResponderProps.USEMODEXPJOIN); optionModExpJoin.setType(String.class); options.addOption(optionModExpJoin); // numColMultPartitions - Option optionNumColMultPartitions = new Option("numColMultParts", ResponderProps.NUMCOLMULTPARTITIONS, true, "optional, Spark only -- Number of partitions to " - + "use when performing column multiplication"); + Option optionNumColMultPartitions = new Option("numColMultParts", ResponderProps.NUMCOLMULTPARTITIONS, true, + "optional, Spark only -- Number of partitions to " + "use when performing column multiplication"); optionNumColMultPartitions.setRequired(false); optionNumColMultPartitions.setArgName(ResponderProps.NUMCOLMULTPARTITIONS); optionNumColMultPartitions.setType(String.class); @@ -359,8 +349,8 @@ private Options createOptions() options.addOption(optionColMultReduceByKey); // colMultReduceByKey - Option optionAllowEmbeddedQS = new Option("allowEmbeddedQS", ResponderProps.ALLOWEMBEDDEDQUERYSCHEMAS, true, "optional -- 'true' or 'false' (defaults to 'false') -- " - + "If true, allows embedded QuerySchemas for a query."); + Option optionAllowEmbeddedQS = new Option("allowEmbeddedQS", ResponderProps.ALLOWEMBEDDEDQUERYSCHEMAS, true, + "optional -- 'true' or 'false' (defaults to 'false') -- " + "If true, allows embedded QuerySchemas for a query."); optionAllowEmbeddedQS.setRequired(false); optionAllowEmbeddedQS.setArgName(ResponderProps.ALLOWEMBEDDEDQUERYSCHEMAS); optionAllowEmbeddedQS.setType(String.class); diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java index 04894daf..1dd40a9e 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java @@ -45,15 +45,14 @@ public class ResponderProps public static final String NUMDATAPARTITIONS = "pir.numDataPartitions"; public static final String NUMCOLMULTPARTITIONS = "pir.numColMultPartitions"; public static final String USEMODEXPJOIN = "pir.useModExpJoin"; - public static final String COLMULTREDUCEBYKEY = "pir.colMultReduceByKeys"; + public static final String COLMULTREDUCEBYKEY = "pir.colMultReduceByKey"; public static final String ALLOWEMBEDDEDQUERYSCHEMAS = "pir.allowEmbeddedQuerySchemas"; - - public static final List PROPSLIST = Arrays.asList(PLATFORM,QUERYINPUT,DATAINPUTFORMAT, - INPUTDATA,BASEQUERY,ESRESOURCE,ESQUERY,OUTPUTFILE,BASEINPUTFORMAT,STOPLISTFILE,NUMREDUCETASKS, - USELOCALCACHE,LIMITHITSPERSELECTOR,MAXHITSPERSELECTOR,MAPMEMORY,REDUCEMEMORY,MAPJAVAOPTS, - REDUCEJAVAOPTS,QUERYSCHEMAS,DATASCHEMAS,NUMEXPLOOKUPPARTS,USEHDFSLOOKUPTABLE,NUMDATAPARTITIONS, - NUMCOLMULTPARTITIONS,USEMODEXPJOIN,COLMULTREDUCEBYKEY,ALLOWEMBEDDEDQUERYSCHEMAS); - + + public static final List PROPSLIST = Arrays.asList(PLATFORM, QUERYINPUT, DATAINPUTFORMAT, INPUTDATA, BASEQUERY, ESRESOURCE, ESQUERY, OUTPUTFILE, + BASEINPUTFORMAT, STOPLISTFILE, NUMREDUCETASKS, USELOCALCACHE, LIMITHITSPERSELECTOR, MAXHITSPERSELECTOR, MAPMEMORY, REDUCEMEMORY, MAPJAVAOPTS, + REDUCEJAVAOPTS, QUERYSCHEMAS, DATASCHEMAS, NUMEXPLOOKUPPARTS, USEHDFSLOOKUPTABLE, NUMDATAPARTITIONS, NUMCOLMULTPARTITIONS, USEMODEXPJOIN, + COLMULTREDUCEBYKEY, ALLOWEMBEDDEDQUERYSCHEMAS); + /** * Validates the responder properties * @@ -63,51 +62,51 @@ public static boolean validateResponderProperties() boolean valid = true; // Parse general required options - + if (!SystemConfiguration.hasProperty(PLATFORM)) { logger.info("Must have the option " + PLATFORM); valid = false; } - + String platform = SystemConfiguration.getProperty(PLATFORM).toLowerCase(); if (!platform.equals("mapreduce") && !platform.equals("spark") && !platform.equals("standalone")) { logger.info("Unsupported platform: " + platform); valid = false; } - + if (!SystemConfiguration.hasProperty(QUERYINPUT)) { logger.info("Must have the option " + QUERYINPUT); valid = false; } - + if (!SystemConfiguration.hasProperty(OUTPUTFILE)) { logger.info("Must have the option " + OUTPUTFILE); valid = false; } - - if (!SystemConfiguration.hasProperty(QUERYSCHEMAS)) + + if (SystemConfiguration.hasProperty(QUERYSCHEMAS)) { SystemConfiguration.appendProperty("query.schemas", SystemConfiguration.getProperty(QUERYSCHEMAS)); } - - if (!SystemConfiguration.hasProperty(DATASCHEMAS)) + + if (SystemConfiguration.hasProperty(DATASCHEMAS)) { SystemConfiguration.appendProperty("data.schemas", SystemConfiguration.getProperty(DATASCHEMAS)); } - + if (!SystemConfiguration.hasProperty(DATAINPUTFORMAT)) { logger.info("Must have the option " + DATAINPUTFORMAT); valid = false; } String dataInputFormat = SystemConfiguration.getProperty(DATAINPUTFORMAT).toLowerCase(); - + // Parse required properties by dataInputFormat - + if (dataInputFormat.equals(InputFormatConst.BASE_FORMAT)) { if (!SystemConfiguration.hasProperty(BASEINPUTFORMAT)) @@ -115,7 +114,7 @@ public static boolean validateResponderProperties() logger.info("Must have the option " + BASEINPUTFORMAT + " if using " + InputFormatConst.BASE_FORMAT); valid = false; } - + if (!SystemConfiguration.hasProperty(INPUTDATA)) { logger.info("Must have the option " + INPUTDATA + " if using " + InputFormatConst.BASE_FORMAT); @@ -134,7 +133,7 @@ else if (dataInputFormat.equals(InputFormatConst.ES)) logger.info("Must have the option " + ESRESOURCE); valid = false; } - + if (!SystemConfiguration.hasProperty(ESQUERY)) { logger.info("Must have the option " + ESQUERY); @@ -156,7 +155,7 @@ else if (dataInputFormat.equalsIgnoreCase("standalone")) } // Parse optional properties with defaults - + if (!SystemConfiguration.hasProperty(USEHDFSLOOKUPTABLE)) { SystemConfiguration.setProperty(USEHDFSLOOKUPTABLE, "false"); @@ -187,21 +186,27 @@ else if (dataInputFormat.equalsIgnoreCase("standalone")) SystemConfiguration.setProperty(ALLOWEMBEDDEDQUERYSCHEMAS, "false"); } - // Load the new local query and data schemas - try + if (!SystemConfiguration.hasProperty(USELOCALCACHE)) { - LoadDataSchemas.initialize(); - LoadQuerySchemas.initialize(); + SystemConfiguration.setProperty(USELOCALCACHE, "true"); + } - } catch (Exception e) + // Load the new local query and data schemas + if (valid) { - e.printStackTrace(); + logger.info("loading schemas: dataSchemas = " + SystemConfiguration.getProperty("data.schemas") + " querySchemas = " + + SystemConfiguration.getProperty("query.schemas")); + try + { + LoadDataSchemas.initialize(); + LoadQuerySchemas.initialize(); + + } catch (Exception e) + { + e.printStackTrace(); + } } - return valid; } - - - } diff --git a/src/main/java/org/apache/pirk/responder/wideskies/common/ComputeEncryptedRow.java b/src/main/java/org/apache/pirk/responder/wideskies/common/ComputeEncryptedRow.java index edba66ae..fe8e4aa4 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/common/ComputeEncryptedRow.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/common/ComputeEncryptedRow.java @@ -26,9 +26,6 @@ import java.util.HashMap; import java.util.concurrent.ExecutionException; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.pirk.encryption.ModPowAbstraction; @@ -36,9 +33,14 @@ import org.apache.pirk.query.wideskies.Query; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; import scala.Tuple3; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; + /** * Class to compute the encrypted row elements for a query from extracted data partitions * diff --git a/src/main/java/org/apache/pirk/responder/wideskies/common/HashSelectorAndPartitionData.java b/src/main/java/org/apache/pirk/responder/wideskies/common/HashSelectorAndPartitionData.java index b4293779..7a652df7 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/common/HashSelectorAndPartitionData.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/common/HashSelectorAndPartitionData.java @@ -31,6 +31,7 @@ import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java b/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java index a244a40a..e679ef48 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java @@ -40,6 +40,7 @@ import org.apache.pirk.utils.SystemConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/RowCalcReducer.java b/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/RowCalcReducer.java index 8bbf652a..e6c664b0 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/RowCalcReducer.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/RowCalcReducer.java @@ -39,6 +39,7 @@ import org.apache.pirk.utils.SystemConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java index 169493bf..049ce709 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java @@ -181,6 +181,7 @@ private void setup() throws Exception { qSchema = LoadQuerySchemas.getSchema(queryInfo.getQueryType()); } + DataSchema dSchema = LoadDataSchemas.getSchema(qSchema.getDataSchemaName()); bVars.setQuerySchema(qSchema); bVars.setDataSchema(dSchema); @@ -200,7 +201,7 @@ private void setup() throws Exception numColMultPartitions = Integer.parseInt(SystemConfiguration.getProperty("pir.numColMultPartitions", numDataPartsString)); // Whether or not we are performing a reduceByKey or a groupByKey->reduce for column multiplication - colMultReduceByKey = SystemConfiguration.getProperty("pir.colMultReduceByKey").equals("true"); + colMultReduceByKey = SystemConfiguration.getProperty("pir.colMultReduceByKey", "false").equals("true"); // Set the expDir bVars.setExpDir(outputDirExp); diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/EncColMultGroupedMapper.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/EncColMultGroupedMapper.java index 72d6b95f..0f82b6d9 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/EncColMultGroupedMapper.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/EncColMultGroupedMapper.java @@ -24,6 +24,7 @@ import org.apache.spark.api.java.function.PairFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalc.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalc.java index aeab1284..7bf11bd4 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalc.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalc.java @@ -27,13 +27,10 @@ import org.apache.pirk.query.wideskies.Query; import org.apache.pirk.query.wideskies.QueryInfo; import org.apache.pirk.responder.wideskies.common.ComputeEncryptedRow; -import org.apache.pirk.schema.data.DataSchema; -import org.apache.pirk.schema.data.LoadDataSchemas; -import org.apache.pirk.schema.query.LoadQuerySchemas; -import org.apache.pirk.schema.query.QuerySchema; import org.apache.spark.api.java.function.PairFlatMapFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalcPrecomputedCache.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalcPrecomputedCache.java index 360b8ef6..4d11ef77 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalcPrecomputedCache.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/EncRowCalcPrecomputedCache.java @@ -27,6 +27,7 @@ import org.apache.spark.api.java.function.PairFlatMapFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpKeyFilenameMap.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpKeyFilenameMap.java index 0642e223..9e505b49 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpKeyFilenameMap.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpKeyFilenameMap.java @@ -31,6 +31,7 @@ import org.apache.spark.api.java.function.PairFlatMapFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpTableGenerator.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpTableGenerator.java index b071f7bc..39d23ce1 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpTableGenerator.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/ExpTableGenerator.java @@ -26,6 +26,7 @@ import org.apache.spark.api.java.function.PairFlatMapFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/HashSelectorsAndPartitionData.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/HashSelectorsAndPartitionData.java index 90fef672..087031ef 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/HashSelectorsAndPartitionData.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/HashSelectorsAndPartitionData.java @@ -29,6 +29,7 @@ import org.apache.spark.api.java.function.PairFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + import scala.Tuple2; /** diff --git a/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java b/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java index c818e310..2970d43f 100644 --- a/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java +++ b/src/main/java/org/apache/pirk/test/distributed/testsuite/DistTestSuite.java @@ -226,15 +226,15 @@ public static void testJSONInputSpark(FileSystem fs, ArrayList dataE // Test embedded QuerySchema SystemConfiguration.setProperty("pir.allowAdHocQuerySchemas", "true"); SystemConfiguration.setProperty("pir.embedQuerySchema", "false"); - BaseTests.testDNSHostnameQuery(dataElements, fs, false, true, 1); + BaseTests.testDNSHostnameQuery(dataElements, fs, true, true, 1); SystemConfiguration.setProperty("pir.allowAdHocQuerySchemas", "true"); SystemConfiguration.setProperty("pir.embedQuerySchema", "true"); - BaseTests.testDNSHostnameQuery(dataElements, fs, false, true, 1); + BaseTests.testDNSHostnameQuery(dataElements, fs, true, true, 1); SystemConfiguration.setProperty("pir.allowAdHocQuerySchemas", "false"); SystemConfiguration.setProperty("pir.embedQuerySchema", "true"); - BaseTests.testDNSHostnameQuery(dataElements, fs, false, true, 1); + BaseTests.testDNSHostnameQuery(dataElements, fs, true, true, 1); SystemConfiguration.setProperty("pir.embedQuerySchema", "false"); // Test pad columns diff --git a/src/main/java/org/apache/pirk/test/utils/BaseTests.java b/src/main/java/org/apache/pirk/test/utils/BaseTests.java index 1816783f..a01df92b 100644 --- a/src/main/java/org/apache/pirk/test/utils/BaseTests.java +++ b/src/main/java/org/apache/pirk/test/utils/BaseTests.java @@ -18,6 +18,8 @@ */ package org.apache.pirk.test.utils; +import static org.junit.Assert.fail; + import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -35,7 +37,6 @@ import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assert.fail; /** * Class to hold the base functional distributed tests diff --git a/src/main/java/org/apache/pirk/test/utils/Inputs.java b/src/main/java/org/apache/pirk/test/utils/Inputs.java index b36ad70a..94f75bf6 100644 --- a/src/main/java/org/apache/pirk/test/utils/Inputs.java +++ b/src/main/java/org/apache/pirk/test/utils/Inputs.java @@ -408,7 +408,7 @@ public static String createPIRStopList(FileSystem fs, boolean hdfs) throws IOExc logger.info("pirStopListFile file successfully created!"); } - tmpFileName = TestUtils.writeToTmpFile(elements, SystemConfiguration.getProperty("pir.stopListFile"), null); + tmpFileName = TestUtils.writeToTmpFile(elements, SystemConfiguration.getProperty(DistributedTestDriver.PIR_STOPLIST_FILE), null); return tmpFileName; } diff --git a/src/main/java/org/apache/pirk/test/utils/StandaloneQuery.java b/src/main/java/org/apache/pirk/test/utils/StandaloneQuery.java index 684d04dd..d88daa97 100644 --- a/src/main/java/org/apache/pirk/test/utils/StandaloneQuery.java +++ b/src/main/java/org/apache/pirk/test/utils/StandaloneQuery.java @@ -18,6 +18,8 @@ */ package org.apache.pirk.test.utils; +import static org.junit.Assert.fail; + import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -42,7 +44,6 @@ import org.json.simple.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assert.fail; public class StandaloneQuery { diff --git a/src/main/java/org/apache/pirk/utils/QueryParserUtils.java b/src/main/java/org/apache/pirk/utils/QueryParserUtils.java index 36892a3a..b9bea5b4 100644 --- a/src/main/java/org/apache/pirk/utils/QueryParserUtils.java +++ b/src/main/java/org/apache/pirk/utils/QueryParserUtils.java @@ -26,12 +26,10 @@ import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Text; - import org.apache.pirk.inputformat.hadoop.TextArrayWritable; import org.apache.pirk.schema.data.DataSchema; import org.apache.pirk.schema.data.partitioner.IPDataPartitioner; import org.elasticsearch.hadoop.mr.WritableArrayWritable; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/org/apache/pirk/utils/StringUtils.java b/src/main/java/org/apache/pirk/utils/StringUtils.java index c06e969c..73ab897d 100755 --- a/src/main/java/org/apache/pirk/utils/StringUtils.java +++ b/src/main/java/org/apache/pirk/utils/StringUtils.java @@ -27,13 +27,11 @@ import org.apache.hadoop.io.MapWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; - import org.apache.pirk.schema.data.DataSchema; import org.elasticsearch.hadoop.mr.WritableArrayWritable; import org.json.simple.JSONObject; import org.json.simple.parser.JSONParser; import org.json.simple.parser.ParseException; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java index e04a9242..9f0c0f3a 100755 --- a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java +++ b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java @@ -24,8 +24,6 @@ import java.io.InputStream; import java.util.Properties; -import org.apache.pirk.schema.data.LoadDataSchemas; -import org.apache.pirk.schema.query.LoadQuerySchemas; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,51 +44,29 @@ public class SystemConfiguration private static final Properties props; /** - * By default, if the pirk.properties file is found on the root of the classpath, it is loaded first. + * By default, these files should be found on the root of the classpath */ private static final String DEFAULT_PROPERTY_FILE = "pirk.properties"; - - private static final String LOCAL_PROPERTIES_DIR = "local.pirk.properties.dir"; - private static final String QUERIER_PROPERTIES_FILE = "querier.properties"; - private static final String RESPONDER_PROPERTIES_FILE = "responder.properties"; + private static final String LOCAL_PROPERTIES_DIR = "local.pirk.properties.dir"; + static { props = new Properties(); initialize(); - - // Load any data schema files indicated in the properties - try - { - LoadDataSchemas.class.newInstance(); - } catch (Exception e) - { - logger.error("Issue when invoking LoadDataSchemas"); - e.printStackTrace(); - } - - // Load any query schema files indicated in the properties - try - { - LoadQuerySchemas.class.newInstance(); - } catch (Exception e) - { - logger.error("Issue when invoking LoadDataSchemas"); - e.printStackTrace(); - } } public static void initialize() { // First try to load the default properties file - loadPropsFromFile(DEFAULT_PROPERTY_FILE); - + loadPropsFromStream(DEFAULT_PROPERTY_FILE); + // Try to load props from the querier and responder property files, if they exist - loadPropsFromFile(QUERIER_PROPERTIES_FILE); - loadPropsFromFile(RESPONDER_PROPERTIES_FILE); - + loadPropsFromStream(QUERIER_PROPERTIES_FILE); + loadPropsFromStream(RESPONDER_PROPERTIES_FILE); + // Try to load the local properties files, if they exists loadPropsFromDir(LOCAL_PROPERTIES_DIR); } @@ -128,10 +104,10 @@ public static void setProperty(String propertyName, String value) { props.setProperty(propertyName, value); } - + public static boolean hasProperty(String propertyName) { - return props.contains(propertyName); + return props.containsKey(propertyName); } /** @@ -141,18 +117,19 @@ public static boolean hasProperty(String propertyName) */ public static void appendProperty(String property, String propToAdd) { - String value = props.getProperty(property); - if(value != null) - { - value += "," + propToAdd; - } - else - { - value = propToAdd; - } - props.setProperty(property, value); + String value = props.getProperty(property); + + if (value != null && !value.equals("none")) + { + value += "," + propToAdd; + } + else + { + value = propToAdd; + } + props.setProperty(property, value); } - + /** * Reset all properties to the default values */ @@ -161,7 +138,7 @@ public static void resetProperties() clearProperties(); initialize(); } - + /** * Loads the properties from local properties file in the specified directory *

@@ -171,27 +148,18 @@ public static void loadPropsFromDir(String dirName) { File dir = new File(dirName); File[] directoryListing = dir.listFiles(); - if (directoryListing != null) + if (directoryListing != null) { - for (File file : directoryListing) + for (File file : directoryListing) { - if(file.getName().endsWith(".properties")) + if (file.getName().endsWith(".properties")) { loadPropsFromFile(file); } } - } - } - - /** - * Loads the properties from the specified file - */ - public static void loadPropsFromFile(String fileName) - { - File file = new File(getProperty(fileName)); - loadPropsFromFile(file); + } } - + /** * Loads the properties from the specified file */ @@ -215,4 +183,29 @@ public static void loadPropsFromFile(File file) logger.warn("Properties file does not exist: '" + file.getAbsolutePath() + "'"); } } + + /** + * Loads the properties from the specified file on the classpath + */ + public static void loadPropsFromStream(String name) + { + try + { + InputStream stream = SystemConfiguration.class.getClassLoader().getResourceAsStream(name); + if (stream != null) + { + logger.info("Loading file '" + name + "'"); + props.load(stream); + stream.close(); + } + else + { + logger.error("No file found '" + name + "'"); + } + } catch (IOException e) + { + logger.error("Problem loading file '" + name + "'"); + e.printStackTrace(); + } + } } diff --git a/src/main/resources/pirk.properties b/src/main/resources/pirk.properties index 8f631383..449daf40 100755 --- a/src/main/resources/pirk.properties +++ b/src/main/resources/pirk.properties @@ -22,16 +22,17 @@ ## # Name of log4j properties file (relative to current folder) -log4jPropertiesFile=log4j2.properties +log4jPropertiesFile=logging/log4j2.properties -#Name of the directory holding the local properties files -#All property files must end in .properties -local.pirk.properties.dir=/root/properties +#Name of the local properties file - used when running with the +#hadoop jar command +local.pirk.properties=/root/local.pirk.properties ## ##Spark path for SparkLauncher ## -spark.home = /usr +#spark.home = /usr +spark.home=/opt/cloudera/parcels/CDH/lib/spark ## ## Data schema properties @@ -201,7 +202,7 @@ pir.esQuery = none pir.outputFile = none #Fully qualified dir in hdfs of file containing stoplist terms -pir.stopListFile = none +pir.stopListFile = stopListFile #Number of reduce tasks pir.numReduceTasks = 100 diff --git a/src/main/resources/pirk.properties-repo b/src/main/resources/pirk.properties-repo new file mode 100755 index 00000000..8f631383 --- /dev/null +++ b/src/main/resources/pirk.properties-repo @@ -0,0 +1,243 @@ +############################################################################### +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +############################################################################### + +## +## Default System Configuration Properties +## + +# Name of log4j properties file (relative to current folder) +log4jPropertiesFile=log4j2.properties + +#Name of the directory holding the local properties files +#All property files must end in .properties +local.pirk.properties.dir=/root/properties + +## +##Spark path for SparkLauncher +## +spark.home = /usr + +## +## Data schema properties +## +## Each data schema should be specified in an xml file of the form; +## all items are treated in a case insensitive manner: +## +## +## name of the schema +## +## element name +## class name or type name (if Java primitive type) of the element +## true or false -- whether or not the schema element is an array within the data +## optional - Partitioner class for the element; defaults to primitive java type partitioner +## +## +## +## Primitive Java types must be one of the following: "byte", "short", "int", "long", "float", +## "double", "char", "string", "boolean" +## + +#Comma separated list of local data schema files to load, fully qualified file names +data.schemas = none + +## +## Query schema properties +## +## Each query schema should be specified in an xml file of the form; +## all items are treated in a case insensitive manner: +## +## +## name of the schema +## name of the data schema over which this query is run +## name of the element in the data schema that will be the selector +## +## element name of element in the data schema to include in the query response +## +## (optional) name of the filter class to use to filter the data +## +## (optional) element name of element in the data schema to apply pre-processing filters +## +## +## +## + +#Comma separated list of local query schema files to load, fully qualified file names +query.schemas = none + +## +##Properties for ElasticSearch compatibility +## + +#ES host address - One Elasticsearch node in the cluster - may include port specification +es.nodes= none + +#Default HTTP/REST port used for connecting to Elasticsearch +es.port=9200 + +#Number of results/items returned by each individual scroll +es.scroll.size = 1000 + +#Whether elasticsearch-hadoop will treat empty fields as null +es.field.read.empty.as.null=yes + + +## +##Properties for functional testing +## + +#Test index to create in ES (created and deleted within the tests) +#Need final form to be: :/ +test.es.index = testindex +test.pir.es.index = testindexpir + +#Type of elements to insert in ES +test.es.type = pkt + +#Elasticsearch resource - Elasticsearch resource location where data is read and written to. +#Requires the format / +test.es.resource= none +test.pir.es.resource = none + +#Pathname in hdfs to place input JSON file testing +test.inputJSONFile = none +test.pir.inputJSONFile = /tmp/testJSONInput + +#Pathname in hdfs to place output file for testing +test.outputHDFSFile = /tmp/testOutput + +#PIR query input dir in hdfs for testing +test.pir.queryInputDir = none + +#PIR stoplist file +test.pir.stopListFile = /tmp/testStopListFile + +#Whether or not we are running PIR testing (used as a flag to dump intermediate RDDs for checking) +#This should default to false; it is changed to true in the test suite, as applicable +pir.test = false + +#HDFS output dir for PIR intermediate testing +#Should default to none; changed to a real hdfs path in the test suite, as applicable +pir.testOut = none + + +## +## Properties to enable/disable JNA-GMP modPow acceleration for Paillier +## + +paillier.useGMPForModPow = true + +# The JNA-GMP library we use to invoke the much faster, natively compiled GMP +# can be called in a way that tries to make all modPow calls take a constant amount +# of time to execute. This will slow down the modPow performance (still better than +# Java's BigInteger.modPow() ). +# If you are using this package in a multi-tenant computing environment and have +# concerns about other tenants being able to closely inspect the runtime of your +# software, you may want to enable this property. +paillier.GMPConstantTimeMode = false + +# This property controls the more rigorous prime generation checks in PrimeMaker.java +# which are based on FIPS SP 800-56B and FIPS 186-4 (extra Miller-Rabin rounds, limits +# on how close the primes p and q can be, and bounds on the values of the primes) +# These checks slow down prime generation considerably +pallier.FIPSPrimeGenerationChecks = true + +## These properties control the secure random number generator algorithm and provider. +## You can specify just the algorithm, or both algorithm and provider. The system's +## default secure random is used when the algorithm is left unspecified. +pallier.secureRandom.algorithm=NativePRNG +#pallier.secureRandom.provider=SUN + +## +## Properties for PIR query and response +## + +#Number of bits to return when encoding/returning string values as part of return data elements +pir.stringBits = 64 + +#Number of array elements to return when encoding/returning array valued elements +pir.numReturnArrayElements = 2 + +#Default prime certainty +pir.primeCertainty = 128 + +#Fully qualified dir in hdfs of Query files +pir.queryInput = none + +#Data input format type -- 'base' or 'elasticsearch' (given in InputFormatsConst class) +pir.dataInputFormat = base + +#Fully qualified name of input file/directory in hdfs; used if pir.dataInputFormat = base +pir.inputData = none + +#Inputformat for 'base' data input format type -- must extend BaseInputFormat +pir.baseInputFormat = none + +#ElasticSearch-like query if using 'base' input format +pir.baseQuery = none + +#ES resource for input data +pir.esResource = null + +#ES query for input data +pir.esQuery = none + +#Fully qualified name of output file in hdfs +pir.outputFile = none + +#Fully qualified dir in hdfs of file containing stoplist terms +pir.stopListFile = none + +#Number of reduce tasks +pir.numReduceTasks = 100 + +#Whether or not to use the local cache during PIR computations +pir.useLocalCache = true + +#Whether or not to limit the hits for each query term +pir.limitHitsPerSelector = true + +#Number of hits to limit for each query term, if pir.limitHitsPerSelector = true +pir.maxHitsPerSelector = 100 + +#Whether or not to embed the selector in the results for false positive reduction +pir.embedSelector = true + +#Whether or not to generate and use the HDFS modular exponentiation lookup table +pir.useHDFSLookupTable = false + +#Number of partitions to coalesce the input data into in Spark +pir.numDataPartitions = 1500 + +#Mapreduce memory options +mapreduce.map.memory.mb = 3000 +mapreduce.reduce.memory.mb = 3000 +mapreduce.map.java.opts = -Xmx2800m +mapreduce.reduce.java.opts = -Xmx2800m + +#HDFS directory for the expLookupTable +pir.expDir = none + +#Parallelism for expLookupTable creation in hdfs +pir.expCreationSplits = 600 + + + + + + diff --git a/src/main/resources/querier.properties b/src/main/resources/querier.properties index 351c2ce2..6a52ebd4 100644 --- a/src/main/resources/querier.properties +++ b/src/main/resources/querier.properties @@ -17,6 +17,9 @@ # under the License. ############################################################################### +## +## Required Properties +## #action -- required - 'encrypt' or 'decrypt' -- The action performed by the QuerierDriver querier.action= @@ -48,73 +51,73 @@ querier.numThreads= ## #dataSchemas -- optional -- Comma separated list of data schema file names -querier.dataSchemas= +#querier.dataSchemas= #querySchemas -- optional -- Comma separated list of query schema file names -querier.dataSchemas= +#querier.dataSchemas= ## Optional, but required for Encryption (ignored if not encrypting) #bitset -- required for encryption -- Ensure that this bit position is set in the Paillier #modulus (will generate Paillier moduli until finding one in which this bit is set) -querier.bitSet= +#querier.bitSet= #certainty -- required for encryption -- Certainty of prime generation for Paillier #must be greater than or equal to 128 -querier.certainty= +#querier.certainty= #dataPartitionBitSize -- required for encryption -- Partition bit size in data partitioning -querier.dataPartitionBitSize= +#querier.dataPartitionBitSize= #embedSelector - required for encryption -- 'true' or 'false' #Whether or not to embed the selector in the results to reduce false positives #Defaults to 'true' -querier.embedSelector= +#querier.embedSelector= #embedQuerySchema - true or false #Whether or not to embed the QuerySchema in the Query (via QueryInfo) #Defaults to 'false' -querier.embedQuerySchema= +#querier.embedQuerySchema= #hashBitSize - required for encryption-- Bit size of keyed hash -querier.hashBitSize= +#querier.hashBitSize= #hashKey -- required for encryption -- String key for the keyed hash functionality -querier.hashKey= +#querier.hashKey= #useHDFSLookupTable -- required for encryption -- 'true' or 'false' #Whether or not to generate and use a hdfs modular exponentation lookup table #Defaults to 'false' -querier.useHDFSLookupTable= +#querier.useHDFSLookupTable= #memLookupTable -- required for encryption -- 'true' or 'false' #Whether or not to generate and use an in memory modular exponentation lookup table - only for #standalone/testing right now... #Defaults to 'false' -querier.memLookupTable= +#querier.memLookupTable= #paillierBitSize -- required for encryption -- Paillier modulus size N -querier.paillierBitSize= +#querier.paillierBitSize= #queryName -- required for encryption -- Name of the query -querier.queryName= +#querier.queryName= #queryType -- required for encryption #Type of the query as defined in the 'schemaName' tag of the corresponding query schema file -querier.queryType= +#querier.queryType= #secureRandomAlg -- specify the SecureRandom algorithm #Defaults to NativePRNG -querier.secureRandomAlg= +#querier.secureRandomAlg= #secureRandomProvider -- specify the SecureRandom provider #Defaults to SUN -querier.secureRandomProvider= +#querier.secureRandomProvider= ## Optional, but required for Decryption (ignored if not decrypting) #querierFile -- required for decryption #Fully qualified file containing the serialized Querier object -querier.querierFile= +#querier.querierFile= \ No newline at end of file diff --git a/src/main/resources/responder.properties b/src/main/resources/responder.properties index 56211b3f..7556d001 100644 --- a/src/main/resources/responder.properties +++ b/src/main/resources/responder.properties @@ -44,95 +44,95 @@ pir.queryInput= ## #dataSchemas -- required -- Comma separated list of data schema file names -responder.dataSchemas= +#responder.dataSchemas= #querySchemas -- required -- Comma separated list of query schema file names -responder.querySchemas= +#responder.querySchemas= #allowAdHocQuerySchemas -- 'true' or 'false' #If true, allows embedded QuerySchemas for a query. #Defaults to 'false' -pir.allowEmbeddedQuerySchemas= +#pir.allowEmbeddedQuerySchemas= #colMultReduceByKey -- 'true' or 'false' -- Spark only #If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce #Defaults to 'false' -pir.colMultReduceByKeys= +#pir.colMultReduceByKey= #baseInputFormat -- required if baseInputFormat = 'base' #Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat -pir.baseInputFormat= +#pir.baseInputFormat= #esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query #if using 'elasticsearch' input format -pir.esQuery= +#pir.esQuery= #esResource -- required if baseInputFormat = 'elasticsearch' #Requires the format / : Elasticsearch resource where data is read and written to -pir.esResource= +#pir.esResource= #useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the #hdfs lookup table for modular exponentiation #Defaults to 'false' -pir.useHDFSLookupTable= +#pir.useHDFSLookupTable= #baseQuery -- ElasticSearch-like query if using 'base' input format - #used to filter records in the RecordReader #Defaults to ?q=* -pir.baseQuery= +#pir.baseQuery= #limitHitsPerSelector -- 'true' or 'false' #Whether or not to limit the number of hits per selector #Defaults to 'true' -pir.limitHitsPerSelector= +#pir.limitHitsPerSelector= #mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task #Defaults to -Xmx2800m -mapreduce.map.java.opts= +#mapreduce.map.java.opts= #mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task #Defaults to 3000 -mapreduce.map.memory.mb= +#mapreduce.map.memory.mb= #mapreduceReduceJavaOpts #Amount of heap (in MB) to allocate per reduce task #Defaults to -Xmx2800m -mapreduce.reduce.java.opts= +#mapreduce.reduce.java.opts= #mapreduceReduceMemoryMb #Amount of memory (in MB) to allocate per reduce task #Defaults to 3000 -mapreduce.reduce.memory.mb= +#mapreduce.reduce.memory.mb= #stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs #containing stoplist terms; used by the StopListFilter -pir.stopListFile= +#pir.stopListFile= #useLocalCache -- 'true' or 'false' #Whether or not to use the local cache for modular exponentiation #Defaults to 'true' -pir.useLocalCache= +#pir.useLocalCache= #useModExpJoin -- 'true' or 'false' -- Spark only #Whether or not to pre-compute the modular exponentiation table and join it to the data #partitions when performing the encrypted row calculations #Defaults to 'false' -pir.useModExpJoin= +#pir.useModExpJoin= #numReduceTasks -- optional -- Number of reduce tasks -pir.numReduceTasks= +#pir.numReduceTasks= #numColMultPartitions -- optional, Spark only #Number of partitions to use when performing column multiplication -pir.numColMultPartitions= +#pir.numColMultPartitions= #maxHitsPerSelector -- optional -- Max number of hits encrypted per selector -pir.maxHitsPerSelector= +#pir.maxHitsPerSelector= #dataParts -- optional -- Number of partitions for the input data -pir.numDataPartitions= +#pir.numDataPartitions= #numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table -pir.numExpLookupPartitions= +#pir.numExpLookupPartitions= \ No newline at end of file diff --git a/src/test/java/test/general/ISO8601DateParserTest.java b/src/test/java/test/general/ISO8601DateParserTest.java index b3d41aad..02391d4c 100644 --- a/src/test/java/test/general/ISO8601DateParserTest.java +++ b/src/test/java/test/general/ISO8601DateParserTest.java @@ -18,13 +18,14 @@ */ package test.general; +import static org.junit.Assert.assertEquals; + import java.text.ParseException; import org.apache.pirk.utils.ISO8601DateParser; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assert.assertEquals; /** * Class to test basic functionality of ISO8601DateParser class diff --git a/src/test/java/test/general/KeyedHashTest.java b/src/test/java/test/general/KeyedHashTest.java index 131e18f4..6f69b389 100644 --- a/src/test/java/test/general/KeyedHashTest.java +++ b/src/test/java/test/general/KeyedHashTest.java @@ -18,11 +18,12 @@ */ package test.general; +import static org.junit.Assert.assertEquals; + import org.apache.pirk.utils.KeyedHash; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assert.assertEquals; /** * Basic functional tests for KeyedHash diff --git a/src/test/java/test/general/PaillierTest.java b/src/test/java/test/general/PaillierTest.java index ce862856..abb50fa5 100644 --- a/src/test/java/test/general/PaillierTest.java +++ b/src/test/java/test/general/PaillierTest.java @@ -18,6 +18,9 @@ */ package test.general; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + import java.math.BigInteger; import java.util.Random; @@ -27,8 +30,6 @@ import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; /** * Basic test functionality for Paillier library diff --git a/src/test/java/test/general/PartitionUtilsTest.java b/src/test/java/test/general/PartitionUtilsTest.java index 7dc97e25..f226cb5c 100644 --- a/src/test/java/test/general/PartitionUtilsTest.java +++ b/src/test/java/test/general/PartitionUtilsTest.java @@ -18,6 +18,9 @@ */ package test.general; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + import java.math.BigInteger; import java.util.ArrayList; @@ -29,9 +32,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; - /** * Class to functionally test the bit conversion utils */ diff --git a/src/test/java/test/general/QueryParserUtilsTest.java b/src/test/java/test/general/QueryParserUtilsTest.java index c57676e3..02d28ede 100644 --- a/src/test/java/test/general/QueryParserUtilsTest.java +++ b/src/test/java/test/general/QueryParserUtilsTest.java @@ -18,6 +18,9 @@ */ package test.general; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + import java.util.ArrayList; import java.util.Map; @@ -32,9 +35,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - /** * Class for testing the QueryParser methods */ diff --git a/src/test/java/test/schema/data/LoadDataSchemaTest.java b/src/test/java/test/schema/data/LoadDataSchemaTest.java index 361442a9..b41d52fa 100644 --- a/src/test/java/test/schema/data/LoadDataSchemaTest.java +++ b/src/test/java/test/schema/data/LoadDataSchemaTest.java @@ -18,6 +18,10 @@ */ package test.schema.data; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + import java.io.File; import java.io.IOException; @@ -39,9 +43,6 @@ import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; /** * Test suite for LoadDataSchema and DataSchema diff --git a/src/test/java/test/schema/query/LoadQuerySchemaTest.java b/src/test/java/test/schema/query/LoadQuerySchemaTest.java index dee83369..91aedc9b 100644 --- a/src/test/java/test/schema/query/LoadQuerySchemaTest.java +++ b/src/test/java/test/schema/query/LoadQuerySchemaTest.java @@ -18,6 +18,9 @@ */ package test.schema.query; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; + import java.io.File; import java.io.IOException; import java.util.Arrays; @@ -45,9 +48,6 @@ import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; -import test.schema.data.LoadDataSchemaTest; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; import test.schema.data.LoadDataSchemaTest; From 5ae65147991f098eda9d766318eafd90dcf2a869 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Thu, 28 Jul 2016 09:37:39 -0400 Subject: [PATCH 06/10] updates to CLI parsing and clean up --- pom-repo.xml | 381 ------------------ pom.xml | 30 +- .../querier/wideskies/QuerierDriverCLI.java | 5 +- .../pirk/querier/wideskies/QuerierProps.java | 18 +- .../responder/wideskies/ResponderCLI.java | 9 +- .../responder/wideskies/ResponderProps.java | 34 +- .../pirk/utils/SystemConfiguration.java | 19 +- src/main/resources/pirk.properties | 5 +- src/main/resources/querier.properties | 6 +- src/main/resources/responder.properties | 13 +- 10 files changed, 62 insertions(+), 458 deletions(-) delete mode 100644 pom-repo.xml diff --git a/pom-repo.xml b/pom-repo.xml deleted file mode 100644 index 3d4c1b56..00000000 --- a/pom-repo.xml +++ /dev/null @@ -1,381 +0,0 @@ - - - - - 4.0.0 - - - org.apache.pirk - pirk - 0.0.1-SNAPSHOT - jar - - Apache Pirk (incubating) Project - Apache Pirk (incubating) is a framework for scalable Private Information Retrieval (PIR). - http://pirk.incubator.apache.org/ - - - 2016 - - - The Apache Software Foundation - https://www.apache.org - - - - - Apache License, Version 2.0 - https://www.apache.org/licenses/LICENSE-2.0 - - - - - - Dev - dev-subscribe@pirk.incubator.apache.org - dev-unsubscribe@pirk.incubator.apache.org - dev@pirk.incubator.apache.org - http://mail-archives.apache.org/mod_mbox/incubator-pirk-dev/ - - - Commits - commits-subscribe@pirk.incubator.apache.org - commits-unsubscribe@pirk.incubator.apache.org - http://mail-archives.apache.org/mod_mbox/incubator-pirk-commits - - - - - - mvn-public - http://mvnrepository.com/artifact - - always - - - - - - - UTF-8 - 1.7 - 1.7 - 2.10.4 - 1.11.3 - benchmarks - 1.7 - 1.7.21 - 2.6.2 - 4.12 - log4j2.properties - - - - - log4j - log4j - 1.2.17 - - - - junit - junit - 4.12 - - - - org.apache.commons - commons-math3 - 3.3 - - - - com.googlecode.json-simple - json-simple - 1.1 - - - - commons-net - commons-net - 3.3 - - - - org.apache.hadoop - hadoop-common - 2.7.2 - - - - org.apache.hadoop - hadoop-mapreduce-client-core - 2.7.2 - - - - org.apache.hadoop - hadoop-client - 2.7.2 - - - - org.apache.spark - spark-core_2.11 - 1.6.1 - - - - org.elasticsearch - elasticsearch-hadoop - 2.1.2 - - - commons-net - commons-net - - - org.apache.hive - hive-service - - - org.slf4j - slf4j-log4j12 - - - org.slf4j - log4j-over-slf4j - - - - - - - com.squareup.jnagmp - jnagmp - 1.1.0 - - - - - org.openjdk.jmh - jmh-core - ${jmh.version} - provided - - - - org.openjdk.jmh - jmh-generator-annprocess - ${jmh.version} - provided - - - - - org.slf4j - slf4j-api - ${slf4j.version} - - - - org.apache.logging.log4j - log4j-slf4j-impl - 2.6.2 - - - - - - - - org.apache.maven.plugins - maven-surefire-plugin - 2.18 - - true - -Xmx1G - -Djava.net.preferIPv4Stack=true - - ${log4j.configuration} - - - - - - org.apache.maven.surefire - surefire-junit4 - 2.18 - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.5.1 - - ${javac.target} - ${javac.target} - ${javac.target} - - - - - org.apache.maven.plugins - maven-jar-plugin - 3.0.1 - - - org/apache/pirk/benchmark/** - org/openjdk/jmh/** - - - - - - org.apache.maven.plugins - maven-shade-plugin - 2.4.3 - - - - package - main - - shade - - - true - exe - - - - - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - - - - - - - - org.apache.rat - apache-rat-plugin - 0.11 - - - nb-configuration.xml - nbactions.xml - DEPENDENCIES - - .travis.yml - appveyor.yml - - - - - - org.apache.maven.doxia - doxia-core - 1.6 - - - xerces - xercesImpl - - - - - - - - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - - org.scala-tools - - - maven-scala-plugin - - - [2.15.2,) - - - testCompile - - - - - - - - - - org.apache.rat - - - apache-rat-plugin - - - [0.11,) - - - check - - - - - - - - - - - - - - - - diff --git a/pom.xml b/pom.xml index a9b77ad9..3d4c1b56 100644 --- a/pom.xml +++ b/pom.xml @@ -117,7 +117,6 @@ 3.3 - - - org.apache.hadoop - hadoop-common - 2.7.2 - - - - org.apache.hadoop - hadoop-mapreduce-client-core - 2.7.2 - - - - org.apache.hadoop - hadoop-client - 2.7.2 - - - - org.apache.spark - spark-core_2.11 - 1.6.1 - - - - org.elasticsearch elasticsearch-hadoop @@ -268,6 +239,7 @@ org.apache.maven.plugins maven-jar-plugin + 3.0.1 org/apache/pirk/benchmark/** diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java index d775fb5d..d9a3c956 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java @@ -114,6 +114,7 @@ private boolean parseOptions() // If we have a local.querier.properties file specified, load it if (hasOption(LOCALPROPFILE)) { + System.out.println("getOptionValue(LOCALPROPFILE) = " + getOptionValue(LOCALPROPFILE)); SystemConfiguration.loadPropsFromFile(new File(getOptionValue(LOCALPROPFILE))); } else @@ -149,8 +150,10 @@ private Options createOptions() options.addOption(optionHelp); // local.querier.properties - Option optionLocalPropFile = new Option("localPropFile", LOCALPROPFILE, false, "Optional local properties file"); + Option optionLocalPropFile = new Option("localPropFile", LOCALPROPFILE, true, "Optional local properties file"); optionLocalPropFile.setRequired(false); + optionLocalPropFile.setArgName(LOCALPROPFILE); + optionLocalPropFile.setType(String.class); options.addOption(optionLocalPropFile); // ACTION diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java index d91b14a4..d420b45a 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java @@ -99,49 +99,49 @@ public static boolean validateQuerierProperties() { if (!SystemConfiguration.hasProperty(QUERYTYPE)) { - logger.info("Must have the option " + QUERYTYPE); + logger.info("For action='encrypt': Must have the option " + QUERYTYPE); valid = false; } if (!SystemConfiguration.hasProperty(HASHBITSIZE)) { - logger.info("Must have the option " + HASHBITSIZE); + logger.info("For action='encrypt': Must have the option " + HASHBITSIZE); valid = false; } if (!SystemConfiguration.hasProperty(HASHKEY)) { - logger.info("Must have the option " + HASHKEY); + logger.info("For action='encrypt': Must have the option " + HASHKEY); valid = false; } if (!SystemConfiguration.hasProperty(DATAPARTITIONSIZE)) { - logger.info("Must have the option " + DATAPARTITIONSIZE); + logger.info("For action='encrypt': Must have the option " + DATAPARTITIONSIZE); valid = false; } if (!SystemConfiguration.hasProperty(PAILLIERBITSIZE)) { - logger.info("Must have the option " + PAILLIERBITSIZE); + logger.info("For action='encrypt': Must have the option " + PAILLIERBITSIZE); valid = false; } if (!SystemConfiguration.hasProperty(CERTAINTY)) { - logger.info("Must have the option " + CERTAINTY); + logger.info("For action='encrypt': Must have the option " + CERTAINTY); valid = false; } if (!SystemConfiguration.hasProperty(QUERYNAME)) { - logger.info("Must have the option " + QUERYNAME); + logger.info("For action='encrypt': Must have the option " + QUERYNAME); valid = false; } if (!SystemConfiguration.hasProperty(BITSET)) { - logger.info("Must have the option " + BITSET); + logger.info("For action='encrypt': Must have the option " + BITSET); valid = false; } @@ -176,7 +176,7 @@ public static boolean validateQuerierProperties() { if (!SystemConfiguration.hasProperty(QUERIERFILE)) { - logger.info("Must have the option " + QUERIERFILE); + logger.info("For action='decrypt': Must have the option " + QUERIERFILE); valid = false; } } diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java index bcf02ba7..6a92f63d 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java @@ -148,6 +148,13 @@ private Options createOptions() optionHelp.setRequired(false); options.addOption(optionHelp); + // local.querier.properties + Option optionLocalPropFile = new Option("localPropFile", LOCALPROPFILE, true, "Optional local properties file"); + optionLocalPropFile.setRequired(false); + optionLocalPropFile.setArgName(LOCALPROPFILE); + optionLocalPropFile.setType(String.class); + options.addOption(optionLocalPropFile); + // platform Option optionPlatform = new Option("p", ResponderProps.PLATFORM, true, "required -- 'mapreduce', 'spark', or 'standalone' : Processing platform technology for the responder"); @@ -348,7 +355,7 @@ private Options createOptions() optionColMultReduceByKey.setType(String.class); options.addOption(optionColMultReduceByKey); - // colMultReduceByKey + // allowEmbeddedQS Option optionAllowEmbeddedQS = new Option("allowEmbeddedQS", ResponderProps.ALLOWEMBEDDEDQUERYSCHEMAS, true, "optional -- 'true' or 'false' (defaults to 'false') -- " + "If true, allows embedded QuerySchemas for a query."); optionAllowEmbeddedQS.setRequired(false); diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java index 1dd40a9e..d4395f2c 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java @@ -21,13 +21,13 @@ public class ResponderProps public static final String PLATFORM = "platform"; public static final String QUERYINPUT = "pir.queryInput"; public static final String DATAINPUTFORMAT = "pir.dataInputFormat"; + public static final String OUTPUTFILE = "pir.outputFile"; + + // Optional properties public static final String INPUTDATA = "pir.inputData"; public static final String BASEQUERY = "pir.baseQuery"; public static final String ESRESOURCE = "pir.esResource"; public static final String ESQUERY = "pir.esQuery"; - public static final String OUTPUTFILE = "pir.outputFile"; - - // Optional properties public static final String BASEINPUTFORMAT = "pir.baseInputFormat"; public static final String STOPLISTFILE = "pir.stopListFile"; public static final String NUMREDUCETASKS = "pir.numReduceTasks"; @@ -88,16 +88,6 @@ public static boolean validateResponderProperties() valid = false; } - if (SystemConfiguration.hasProperty(QUERYSCHEMAS)) - { - SystemConfiguration.appendProperty("query.schemas", SystemConfiguration.getProperty(QUERYSCHEMAS)); - } - - if (SystemConfiguration.hasProperty(DATASCHEMAS)) - { - SystemConfiguration.appendProperty("data.schemas", SystemConfiguration.getProperty(DATASCHEMAS)); - } - if (!SystemConfiguration.hasProperty(DATAINPUTFORMAT)) { logger.info("Must have the option " + DATAINPUTFORMAT); @@ -111,13 +101,13 @@ public static boolean validateResponderProperties() { if (!SystemConfiguration.hasProperty(BASEINPUTFORMAT)) { - logger.info("Must have the option " + BASEINPUTFORMAT + " if using " + InputFormatConst.BASE_FORMAT); + logger.info("For base inputformt: Must have the option " + BASEINPUTFORMAT + " if using " + InputFormatConst.BASE_FORMAT); valid = false; } if (!SystemConfiguration.hasProperty(INPUTDATA)) { - logger.info("Must have the option " + INPUTDATA + " if using " + InputFormatConst.BASE_FORMAT); + logger.info("For base inputformt: Must have the option " + INPUTDATA + " if using " + InputFormatConst.BASE_FORMAT); valid = false; } @@ -130,13 +120,13 @@ else if (dataInputFormat.equals(InputFormatConst.ES)) { if (!SystemConfiguration.hasProperty(ESRESOURCE)) { - logger.info("Must have the option " + ESRESOURCE); + logger.info("For ElasticSearch inputformt: Must have the option " + ESRESOURCE); valid = false; } if (!SystemConfiguration.hasProperty(ESQUERY)) { - logger.info("Must have the option " + ESQUERY); + logger.info("For ElasticSearch inputformat: Must have the option " + ESQUERY); valid = false; } } @@ -156,6 +146,16 @@ else if (dataInputFormat.equalsIgnoreCase("standalone")) // Parse optional properties with defaults + if (SystemConfiguration.hasProperty(QUERYSCHEMAS)) + { + SystemConfiguration.appendProperty("query.schemas", SystemConfiguration.getProperty(QUERYSCHEMAS)); + } + + if (SystemConfiguration.hasProperty(DATASCHEMAS)) + { + SystemConfiguration.appendProperty("data.schemas", SystemConfiguration.getProperty(DATASCHEMAS)); + } + if (!SystemConfiguration.hasProperty(USEHDFSLOOKUPTABLE)) { SystemConfiguration.setProperty(USEHDFSLOOKUPTABLE, "false"); diff --git a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java index 9f0c0f3a..b5ac98f0 100755 --- a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java +++ b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.FileInputStream; +import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.util.Properties; @@ -111,7 +112,7 @@ public static boolean hasProperty(String propertyName) } /** - * Append a property via a column separated list + * Append a property via a comma separated list *

* If the property does not exist, it adds it */ @@ -146,16 +147,20 @@ public static void resetProperties() */ public static void loadPropsFromDir(String dirName) { - File dir = new File(dirName); - File[] directoryListing = dir.listFiles(); + File[] directoryListing = new File(dirName).listFiles(new FilenameFilter() + { + @Override + public boolean accept(File dir, String name) + { + return name.endsWith(".properties"); + } + }); + if (directoryListing != null) { for (File file : directoryListing) { - if (file.getName().endsWith(".properties")) - { - loadPropsFromFile(file); - } + loadPropsFromFile(file); } } } diff --git a/src/main/resources/pirk.properties b/src/main/resources/pirk.properties index 449daf40..0bcedb8a 100755 --- a/src/main/resources/pirk.properties +++ b/src/main/resources/pirk.properties @@ -26,13 +26,12 @@ log4jPropertiesFile=logging/log4j2.properties #Name of the local properties file - used when running with the #hadoop jar command -local.pirk.properties=/root/local.pirk.properties +local.pirk.properties=/root/ ## ##Spark path for SparkLauncher ## -#spark.home = /usr -spark.home=/opt/cloudera/parcels/CDH/lib/spark +spark.home = /usr ## ## Data schema properties diff --git a/src/main/resources/querier.properties b/src/main/resources/querier.properties index 6a52ebd4..2051d463 100644 --- a/src/main/resources/querier.properties +++ b/src/main/resources/querier.properties @@ -47,13 +47,13 @@ querier.outputFile= querier.numThreads= ## -## Optional Args - Leave empty if not using/not changing default values +## Optional Args ## -#dataSchemas -- optional -- Comma separated list of data schema file names +#dataSchemas -- optional -- Comma separated list of data schema file names to load #querier.dataSchemas= -#querySchemas -- optional -- Comma separated list of query schema file names +#querySchemas -- optional -- Comma separated list of query schema file names to load #querier.dataSchemas= diff --git a/src/main/resources/responder.properties b/src/main/resources/responder.properties index 7556d001..11ad7f68 100644 --- a/src/main/resources/responder.properties +++ b/src/main/resources/responder.properties @@ -24,10 +24,6 @@ #dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format pir.dataInputFormat= -#inputData -- required -#Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base' -pir.inputData= - #outputFile -- required -- Fully qualified name of output file in hdfs pir.outputFile= @@ -43,13 +39,16 @@ pir.queryInput= ## Optional Args - Leave empty if not using/not changing default values ## -#dataSchemas -- required -- Comma separated list of data schema file names +#inputData -- required if baseInputFormat = 'base' +#Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base' +#pir.inputData= + +#dataSchemas -- optional -- Comma separated list of data schema file names to load #responder.dataSchemas= -#querySchemas -- required -- Comma separated list of query schema file names +#querySchemas -- optional -- Comma separated list of query schema file names to load #responder.querySchemas= - #allowAdHocQuerySchemas -- 'true' or 'false' #If true, allows embedded QuerySchemas for a query. #Defaults to 'false' From 5ea6cbbc267b680ccf97f46ca6f9232cde59f6f4 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Thu, 28 Jul 2016 14:50:23 -0400 Subject: [PATCH 07/10] source formatting and organizing imports --- .../apache/pirk/inputformat/hadoop/json/JSONRecordReader.java | 2 +- .../java/org/apache/pirk/responder/wideskies/ResponderCLI.java | 2 +- .../mapreduce/HashSelectorsAndPartitionDataMapper.java | 2 +- .../apache/pirk/responder/wideskies/spark/ComputeResponse.java | 2 +- .../org/apache/pirk/schema/query/filter/StopListFilter.java | 1 - src/main/java/org/apache/pirk/utils/SystemConfiguration.java | 1 + src/test/java/test/general/QueryParserUtilsTest.java | 1 - src/test/java/test/schema/data/LoadDataSchemaTest.java | 2 +- src/test/java/test/schema/query/LoadQuerySchemaTest.java | 2 +- 9 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/apache/pirk/inputformat/hadoop/json/JSONRecordReader.java b/src/main/java/org/apache/pirk/inputformat/hadoop/json/JSONRecordReader.java index 5adf853f..06e395fa 100644 --- a/src/main/java/org/apache/pirk/inputformat/hadoop/json/JSONRecordReader.java +++ b/src/main/java/org/apache/pirk/inputformat/hadoop/json/JSONRecordReader.java @@ -30,8 +30,8 @@ import org.apache.hadoop.mapreduce.lib.input.LineRecordReader; import org.apache.pirk.inputformat.hadoop.TextArrayWritable; import org.apache.pirk.schema.data.DataSchema; -import org.apache.pirk.schema.data.DataSchemaRegistry; import org.apache.pirk.schema.data.DataSchemaLoader; +import org.apache.pirk.schema.data.DataSchemaRegistry; import org.apache.pirk.utils.QueryParserUtils; import org.apache.pirk.utils.StringUtils; import org.apache.pirk.utils.SystemConfiguration; diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java index 62e6ccea..6a92f63d 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderCLI.java @@ -41,7 +41,7 @@ public class ResponderCLI private CommandLine commandLine = null; private static final String LOCALPROPFILE = "local.responder.properties"; - + /** * Create and parse allowable options * diff --git a/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java b/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java index 75186e4c..adfe5b76 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/mapreduce/HashSelectorsAndPartitionDataMapper.java @@ -33,8 +33,8 @@ import org.apache.pirk.schema.data.DataSchema; import org.apache.pirk.schema.data.DataSchemaLoader; import org.apache.pirk.schema.data.DataSchemaRegistry; -import org.apache.pirk.schema.query.QuerySchemaLoader; import org.apache.pirk.schema.query.QuerySchema; +import org.apache.pirk.schema.query.QuerySchemaLoader; import org.apache.pirk.schema.query.QuerySchemaRegistry; import org.apache.pirk.schema.query.filter.DataFilter; import org.apache.pirk.serialization.HadoopFileSystemStore; diff --git a/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java b/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java index 0a0a8bf5..a14664c4 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/spark/ComputeResponse.java @@ -183,7 +183,7 @@ private void setup() throws Exception { qSchema = QuerySchemaRegistry.get(queryInfo.getQueryType()); } - + DataSchema dSchema = DataSchemaRegistry.get(qSchema.getDataSchemaName()); bVars.setQuerySchema(qSchema); bVars.setDataSchema(dSchema); diff --git a/src/main/java/org/apache/pirk/schema/query/filter/StopListFilter.java b/src/main/java/org/apache/pirk/schema/query/filter/StopListFilter.java index 0dcf590d..c68d3004 100644 --- a/src/main/java/org/apache/pirk/schema/query/filter/StopListFilter.java +++ b/src/main/java/org/apache/pirk/schema/query/filter/StopListFilter.java @@ -19,7 +19,6 @@ package org.apache.pirk.schema.query.filter; import java.util.Arrays; -import java.util.HashSet; import java.util.List; import java.util.Set; diff --git a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java index 53034171..b5ac98f0 100755 --- a/src/main/java/org/apache/pirk/utils/SystemConfiguration.java +++ b/src/main/java/org/apache/pirk/utils/SystemConfiguration.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.Properties; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/src/test/java/test/general/QueryParserUtilsTest.java b/src/test/java/test/general/QueryParserUtilsTest.java index 854ed811..16d73bb9 100644 --- a/src/test/java/test/general/QueryParserUtilsTest.java +++ b/src/test/java/test/general/QueryParserUtilsTest.java @@ -27,7 +27,6 @@ import org.apache.hadoop.io.MapWritable; import org.apache.pirk.schema.data.DataSchema; import org.apache.pirk.schema.data.DataSchemaRegistry; -import org.apache.pirk.schema.data.DataSchemaLoader; import org.apache.pirk.test.utils.Inputs; import org.apache.pirk.utils.QueryParserUtils; import org.apache.pirk.utils.StringUtils; diff --git a/src/test/java/test/schema/data/LoadDataSchemaTest.java b/src/test/java/test/schema/data/LoadDataSchemaTest.java index 7edb2890..07167fc9 100644 --- a/src/test/java/test/schema/data/LoadDataSchemaTest.java +++ b/src/test/java/test/schema/data/LoadDataSchemaTest.java @@ -33,8 +33,8 @@ import javax.xml.transform.stream.StreamResult; import org.apache.pirk.schema.data.DataSchema; -import org.apache.pirk.schema.data.DataSchemaRegistry; import org.apache.pirk.schema.data.DataSchemaLoader; +import org.apache.pirk.schema.data.DataSchemaRegistry; import org.apache.pirk.schema.data.partitioner.IPDataPartitioner; import org.apache.pirk.schema.data.partitioner.PrimitiveTypePartitioner; import org.apache.pirk.test.utils.TestUtils; diff --git a/src/test/java/test/schema/query/LoadQuerySchemaTest.java b/src/test/java/test/schema/query/LoadQuerySchemaTest.java index 310580d2..2f1fc371 100644 --- a/src/test/java/test/schema/query/LoadQuerySchemaTest.java +++ b/src/test/java/test/schema/query/LoadQuerySchemaTest.java @@ -37,8 +37,8 @@ import org.apache.pirk.schema.data.DataSchemaLoader; import org.apache.pirk.schema.data.partitioner.IPDataPartitioner; import org.apache.pirk.schema.data.partitioner.PrimitiveTypePartitioner; -import org.apache.pirk.schema.query.QuerySchemaLoader; import org.apache.pirk.schema.query.QuerySchema; +import org.apache.pirk.schema.query.QuerySchemaLoader; import org.apache.pirk.schema.query.QuerySchemaRegistry; import org.apache.pirk.schema.query.filter.StopListFilter; import org.apache.pirk.test.utils.Inputs; From c3df176dc41b7a7e4a8fa7e2ed76a1822d59edec Mon Sep 17 00:00:00 2001 From: eawilliams Date: Thu, 28 Jul 2016 15:13:00 -0400 Subject: [PATCH 08/10] removing unneeded scripts --- bin/run-querier.sh | 234 ------------------------------------- bin/run-responder.sh | 268 ------------------------------------------- 2 files changed, 502 deletions(-) delete mode 100755 bin/run-querier.sh delete mode 100755 bin/run-responder.sh diff --git a/bin/run-querier.sh b/bin/run-querier.sh deleted file mode 100755 index f2d4c708..00000000 --- a/bin/run-querier.sh +++ /dev/null @@ -1,234 +0,0 @@ -#!/bin/bash -############################################################################### -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -############################################################################### - -# This script runs the QuerierDriver - -#Jar file - full path to jar -JAR="" -echo ${JAR} - -#QuerierDriver class -QUERIER_DRIVER="org.apache.pirk.querier.wideskies.QuerierDriver" -echo ${QUERIER_DRIVER} - - -## -## CLI Options -## - -## -## Required Args -## - -#action -- required - 'encrypt' or 'decrypt' -- The action performed by the QuerierDriver -ACTION="" -echo ${ACTION} - -#dataSchemas -- required -- Comma separated list of data schema file names -DATASCHEMAS="" -echo ${DATASCHEMAS} - -#inputFile - required - Fully qualified file containing input -#The input is either: -#(1) For Encryption: A query file - Contains the query selectors, one per line; -#the first line must be the query number -#OR -#(2) For Decryption: A response file - Contains the serialized Response object -INPUTFILE="" -echo ${INPUTFILE} - -#numThreads -- required -- Number of threads to use for encryption/decryption -NUMTHREADS="" -echo ${NUMTHREADS} - -#outputFile -- required - Fully qualified file for the result output. -#The output file specifies either: -#(1) For encryption: -#(a) A file to contain the serialized Querier object named: -querier -#AND -#(b) A file to contain the serialized Query object named: -query -#OR -#(2) A file to contain the decryption results where each line is where each line -#corresponds to one hit and is a JSON object with the schema QuerySchema -OUTPUTFILE="" -echo ${OUTPUTFILE} - -#querySchemas -- required -- Comma separated list of query schema file names -QUERYSCHEMAS="" -echo ${QUERYSCHEMAS} - -## -## Optional Args - Leave empty if not using/not changing default values -## - -## Optional, but required for Encryption (ignored if not encrypting) - -#bitset -- required for encryption -- Ensure that this bit position is set in the Paillier -#modulus (will generate Paillier moduli until finding one in which this bit is set) -BITSET="" -echo ${BITSET} - -#certainty -- required for encryption -- Certainty of prime generation for Paillier -#must be greater than or equal to 128 -CERTAINTY="" -echo ${CERTAINTY} - -#dataPartitionBitSize -- required for encryption -- Partition bit size in data partitioning -DATAPARTITIONBITSIZE="" -echo ${DATAPARTITIONBITSIZE} - -#embedSelector - required for encryption -- 'true' or 'false' -#Whether or not to embed the selector in the results to reduce false positives -#Defaults to 'true' -EMBEDSELECTOR="" -echo ${EMBEDSELECTOR} - -#embedQuerySchema - true or false -#Whether or not to embed the QuerySchema in the Query (via QueryInfo) -#Defaults to 'false' -EMBEDQUERYSCHEMA="" -echo ${EMBEDQUERYSCHEMA} - -#hashBitSize - required for encryption-- Bit size of keyed hash -HASHBITSIZE="" -echo ${HASHBITSIZE} - -#hashKey -- required for encryption -- String key for the keyed hash functionality -HASHKEY="" -echo ${HASHKEY} - -#useHDFSLookupTable -- required for encryption -- 'true' or 'false' -#Whether or not to generate and use a hdfs modular exponentation lookup table -#Defaults to 'false' -USEHDFSLOOKUP="" -echo ${USEHDFSLOOKUP} - -#memLookupTable -- required for encryption -- 'true' or 'false' -#Whether or not to generate and use an in memory modular exponentation lookup table - only for -#standalone/testing right now... -#Defaults to 'false' -MEMLOOKUPTABLE="" -echo ${MEMLOOKUPTABLE} - -#paillierBitSize -- required for encryption -- Paillier modulus size N -PAILLIERBITSIZE="" -echo ${PAILLIERBITSIZE} - -#queryName -- required for encryption -- Name of the query -QUERYNAME="" -echo ${QUERYNAME} - -#queryType -- required for encryption -#Type of the query as defined in the 'schemaName' tag of the corresponding query schema file -QUERYTYPE="" -echo ${QUERYTYPE} - -#secureRandomAlg -- specify the SecureRandom algorithm -#Defaults to NativePRNG -SECURERANDOMALG="" -echo ${SECURERANDOMALG} - -#secureRandomProvider -- specify the SecureRandom provider -#Defaults to SUN -SECURERANDOMPROVIDER="" -echo ${SECURERANDOMPROVIDER} - -## Optional, but required for Decryption (ignored if not decrypting) - -#querierFile -- required for decryption -#Fully qualified file containing the serialized Querier object -QUERIERFILE="" -echo ${QUERIERFILE} - - -## -## Define the command -## - -QUERIER_DRIVER_CMD="java -cp ${JAR} ${QUERIER_DRIVER} -a ${ACTION} -ds ${DATASCHEMAS} \ --i ${INPUTFILE} -nt ${NUMTHREADS} -o ${OUTPUTFILE} -qs ${QUERYSCHEMAS}" - -# Add the optional args - -if [ -n "${BITSET}" ]; then - QUERIER_DRIVER_CMD+=" -b ${BITSET}" -fi -if [ -n "${CERTAINTY}" ]; then - QUERIER_DRIVER_CMD+=" -c ${CERTAINTY}" -fi -if [ -n "${DATAPARTITIONBITSIZE}" ]; then - QUERIER_DRIVER_CMD+=" -dps ${DATAPARTITIONBITSIZE}" -fi -if [ -n "${EMBEDSELECTOR}" ]; then - QUERIER_DRIVER_CMD+=" -embed ${EMBEDSELECTOR}" -fi -if [ -n "${EMBEDQUERYSCHEMA}" ]; then - QUERIER_DRIVER_CMD+=" -embedQS ${EMBEDQUERYSCHEMA}" -fi -if [ -n "${HASHBITSIZE}" ]; then - QUERIER_DRIVER_CMD+=" -hb ${HASHBITSIZE}" -fi -if [ -n "${HASHKEY}" ]; then - QUERIER_DRIVER_CMD+=" -hk ${HASHKEY}" -fi -if [ -n "${USEHDFSLOOKUP}" ]; then - QUERIER_DRIVER_CMD+=" -lu ${USEHDFSLOOKUP}" -fi -if [ -n "${MEMLOOKUPTABLE}" ]; then - QUERIER_DRIVER_CMD+=" -mlu ${MEMLOOKUPTABLE}" -fi -if [ -n "${PAILLIERBITSIZE}" ]; then - QUERIER_DRIVER_CMD+=" -pbs ${PAILLIERBITSIZE}" -fi -if [ -n "${QUERYNAME}" ]; then - QUERIER_DRIVER_CMD+=" -qn ${QUERYNAME}" -fi -if [ -n "${QUERYTYPE}" ]; then - QUERIER_DRIVER_CMD+=" -qt ${QUERYTYPE}" -fi -if [ -n "${SECURERANDOMALG}" ]; then - QUERIER_DRIVER_CMD+=" -srAlg ${SECURERANDOMALG}" -fi -if [ -n "${SECURERANDOMPROVIDER}" ]; then - QUERIER_DRIVER_CMD+=" -srProvider ${SECURERANDOMPROVIDER}" -fi -if [ -n "${QUERIERFILE}" ]; then - QUERIER_DRIVER_CMD+=" -qf ${QUERIERFILE}" -fi -echo ${QUERIER_DRIVER_CMD} - - -## -## Execute the QuerierDriver -## Results will be displayed in the log file. -## - -LOG_FILE="LOG_QUERIER.txt" -echo ${LOG_FILE} - -{ -echo ${QUERIER_DRIVER_CMD} -${QUERIER_DRIVER_CMD} -if [ $? -ne 0 ] -then -echo "ERROR QuerierDriver. SEE LOG." -exit 0 -fi -} &> ${LOG_FILE} diff --git a/bin/run-responder.sh b/bin/run-responder.sh deleted file mode 100755 index da16125b..00000000 --- a/bin/run-responder.sh +++ /dev/null @@ -1,268 +0,0 @@ -#!/bin/bash -############################################################################### -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -############################################################################### - -#This script runs the ResponderDriver - -#Jar file - full path to jar -JAR="" -echo ${JAR} - -#ResponderDriver class -RESPONDER_DRIVER="org.apache.pirk.responder.wideskies.ResponderDriver" -echo ${RESPONDER_DRIVER} - -## -## CLI Options -## - -## -## Required Args -## - -#dataInputFormat -- required -- 'base', 'elasticsearch', or 'standalone' -- Specify the input format -DATAINPUTFORMAT="" -echo ${DATAINPUTFORMAT} - -#dataSchemas -- required -- Comma separated list of data schema file names -DATASCHEMAS="" -echo ${DATASCHEMAS} - -#inputData -- required -#Fully qualified name of input file/directory in hdfs; used if inputFormat = 'base' -INPUTDATA="" -echo ${INPUTDATA} - -#outputFile -- required -- Fully qualified name of output file in hdfs -OUTPUTFILE="" -echo ${OUTPUTFILE} - -#platform -- required -- 'mapreduce', 'spark', or 'standalone' -#Processing platform technology for the responder -PLATFORM="" -echo ${PLATFORM} - -#queryInput -- required -- Fully qualified dir in hdfs of Query files -QUERYINPUT="" -echo ${QUERYINPUT} - -#querySchemas -- required -- Comma separated list of query schema file names -QUERYSCHEMAS="" -echo ${QUERYSCHEMAS} - -## -## Optional Args - Leave empty if not using/not changing default values -## - -#allowAdHocQuerySchemas -- 'true' or 'false' -#If true, allows embedded QuerySchemas for a query. -#Defaults to 'false' -ALLOWADHOCQUERYSCHEMAS="" -echo ${ALLOWADHOCQUERYSCHEMAS} - -#colMultReduceByKey -- 'true' or 'false' -- Spark only -#If true, uses reduceByKey in performing column multiplication; if false, uses groupByKey -> reduce -#Defaults to 'false' -COLMULTREDUCEBYKEY="" -echo ${COLMULTREDUCEBYKEY} - -#baseInputFormat -- required if baseInputFormat = 'base' -#Full class name of the InputFormat to use when reading in the data - must extend BaseInputFormat -BASEINPUTFORMAT="" -echo ${BASEINPUTFORMAT} - -#esQuery -- required if baseInputFormat = 'elasticsearch' -- ElasticSearch query -#if using 'elasticsearch' input format -ESQUERY="" -echo ${ESQUERY} - -#esResource -- required if baseInputFormat = 'elasticsearch' -#Requires the format / : Elasticsearch resource where data is read and written to -ESRESOURCE="" -echo ${ESRESOURCE} - -#useHDFSLookupTable -- 'true' or 'false' - Whether or not to generate and use the -#hdfs lookup table for modular exponentiation -#Defaults to 'false' -HDFSEXP="" -echo ${HDFSEXP} - -#baseQuery -- ElasticSearch-like query if using 'base' input format - -#used to filter records in the RecordReader -#Defaults to ?q=* -BASEQUERY="" -echo ${BASEQUERY} - -#limitHitsPerSelector -- 'true' or 'false' -#Whether or not to limit the number of hits per selector -#Defaults to 'true' -LIMITHITSPERSELECTOR="" -echo ${LIMITHITSPERSELECTOR} - -#mapreduceMapJavaOpts -- Amount of heap (in MB) to allocate per map task -#Defaults to -Xmx2800m -MRMAPJAVAOPTS="" -echo ${MRMAPJAVAOPTS} - -#mapreduceMapMemoryMb -- Amount of memory (in MB) to allocate per map task -#Defaults to 3000 -MRMAPMEMORYMB="" -echo ${MRMAPMEMORYMB} - -#mapreduceReduceJavaOpts -#Amount of heap (in MB) to allocate per reduce task -#Defaults to -Xmx2800m -MRREDUCEJAVAOPTS="" -echo ${MRREDUCEJAVAOPTS} - -#mapreduceReduceMemoryMb -#Amount of memory (in MB) to allocate per reduce task -#Defaults to 3000 -MRREDUCEMEMORYMB="" -echo ${MRREDUCEMEMORYMB} - -#stopListFile -- optional (unless using StopListFilter) -- Fully qualified file in hdfs -#containing stoplist terms; used by the StopListFilter -STOPLISTFILE="" -echo ${STOPLISTFILE} - -#useLocalCache -- 'true' or 'false' -#Whether or not to use the local cache for modular exponentiation -#Defaults to 'true' -USELOCALCACHE="" -echo ${USELOCALCACHE} - -#useModExpJoin -- 'true' or 'false' -- Spark only -#Whether or not to pre-compute the modular exponentiation table and join it to the data -#partitions when performing the encrypted row calculations -#Defaults to 'false' -USEMODEXPJOIN="" -echo ${USEMODEXPJOIN} - -#numReduceTasks -- optional -- Number of reduce tasks -NUMREDUCETASKS="" -echo ${NUMREDUCETASKS} - -#numColMultPartitions -- optional, Spark only -#Number of partitions to use when performing column multiplication -NUMCOLMULTPARTS="" -echo ${NUMCOLMULTPARTS} - -#maxHitsPerSelector -- optional -- Max number of hits encrypted per selector -MAXHITSPERSELECTOR="" -echo ${MAXHITSPERSELECTOR} - -#dataParts -- optional -- Number of partitions for the input data -DATAPARTS="" -echo ${DATAPARTS} - -#numExpLookupPartitions -- optional -- Number of partitions for the exp lookup table -EXPPARTS="" -echo ${EXPPARTS} - -## -## Define the command -## - -RESPONDER_DRIVER_CMD="java -cp ${JAR} ${RESPONDER_DRIVER} -d ${DATAINPUTFORMAT} \ - -ds ${DATASCHEMAS} -i ${INPUTDATA} -o ${OUTPUTFILE} -p ${PLATFORM} \ - -q ${QUERYINPUT} -qs ${QUERYSCHEMAS}" - -# Add the optional args - -if [ -n "${NUMREDUCETASKS}" ]; then - RESPONDER_DRIVER_CMD+=" -nr ${NUMREDUCETASKS}" -fi -if [ -n "${NUMCOLMULTPARTS}" ]; then - RESPONDER_DRIVER_CMD+=" -numColMultParts ${NUMCOLMULTPARTS}" -fi -if [ -n "${MAXHITSPERSELECTOR}" ]; then - RESPONDER_DRIVER_CMD+=" -mh ${MAXHITSPERSELECTOR}" -fi -if [ -n "${DATAPARTS}" ]; then - RESPONDER_DRIVER_CMD+=" -dataParts ${DATAPARTS}" -fi -if [ -n "${EXPPARTS}" ]; then - RESPONDER_DRIVER_CMD+=" -expParts ${EXPPARTS}" -fi -if [ -n "${ESQUERY}" ]; then - RESPONDER_DRIVER_CMD+=" -eq ${ESQUERY}" -fi -if [ -n "${ESRESOURCE}" ]; then - RESPONDER_DRIVER_CMD+=" -er ${ESRESOURCE}" -fi -if [ -n "${HDFSEXP}" ]; then - RESPONDER_DRIVER_CMD+=" -hdfsExp ${HDFSEXP}" -fi -if [ -n "${BASEQUERY}" ]; then - RESPONDER_DRIVER_CMD+=" -j ${BASEQUERY}" -fi -if [ -n "${LIMITHITSPERSELECTOR}" ]; then - RESPONDER_DRIVER_CMD+=" -lh ${LIMITHITSPERSELECTOR}" -fi -if [ -n "${MRMAPJAVAOPTS}" ]; then - RESPONDER_DRIVER_CMD+=" -mjo ${MRMAPJAVAOPTS}" -fi -if [ -n "${MRMAPMEMORYMB}" ]; then - RESPONDER_DRIVER_CMD+=" -mm ${MRMAPMEMORYMB}" -fi -if [ -n "${MRREDUCEJAVAOPTS}" ]; then - RESPONDER_DRIVER_CMD+=" -rjo ${MRREDUCEJAVAOPTS}" -fi -if [ -n "${MRREDUCEMEMORYMB}" ]; then - RESPONDER_DRIVER_CMD+=" -rm ${MRREDUCEMEMORYMB}" -fi -if [ -n "${STOPLISTFILE}" ]; then - RESPONDER_DRIVER_CMD+=" -sf ${STOPLISTFILE}" -fi -if [ -n "${USELOCALCACHE}" ]; then - RESPONDER_DRIVER_CMD+=" -ulc ${USELOCALCACHE}" -fi -if [ -n "${USEMODEXPJOIN}" ]; then - RESPONDER_DRIVER_CMD+=" -useModExpJoin ${USEMODEXPJOIN}" -fi -if [ -n "${ALLOWADHOCQUERYSCHEMAS}" ]; then - RESPONDER_DRIVER_CMD+=" -allowEmbeddedQS ${ALLOWADHOCQUERYSCHEMAS}" -fi -if [ -n "${COLMULTREDUCEBYKEY}" ]; then - RESPONDER_DRIVER_CMD+=" -colMultRBK ${COLMULTREDUCEBYKEY}" -fi -if [ -n "${BASEINPUTFORMAT}" ]; then - RESPONDER_DRIVER_CMD+=" -bif ${BASEINPUTFORMAT}" -fi -echo ${RESPONDER_DRIVER_CMD} - - -## -## Execute the ResponderDriver -## Results will be displayed in the log file. -## - -LOG_FILE="LOG_RESPONDER.txt" -echo ${LOG_FILE} - -{ -echo ${RESPONDER_DRIVER_CMD} -${RESPONDER_DRIVER_CMD} -if [ $? -ne 0 ] -then -echo "ERROR ResponderDriver. SEE LOG." -exit 0 -fi -} &> ${LOG_FILE} From 637d195e158f3ff05452c1b7c75be16243f2ed83 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Thu, 28 Jul 2016 16:47:38 -0400 Subject: [PATCH 09/10] added source headers; removed debug line --- .../querier/wideskies/QuerierDriverCLI.java | 1 - .../pirk/querier/wideskies/QuerierProps.java | 18 ++++++++++++++++++ .../responder/wideskies/ResponderProps.java | 18 ++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java index d9a3c956..a5ea3217 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierDriverCLI.java @@ -114,7 +114,6 @@ private boolean parseOptions() // If we have a local.querier.properties file specified, load it if (hasOption(LOCALPROPFILE)) { - System.out.println("getOptionValue(LOCALPROPFILE) = " + getOptionValue(LOCALPROPFILE)); SystemConfiguration.loadPropsFromFile(new File(getOptionValue(LOCALPROPFILE))); } else diff --git a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java index 93ccbb7c..e8820af0 100644 --- a/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java +++ b/src/main/java/org/apache/pirk/querier/wideskies/QuerierProps.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.pirk.querier.wideskies; import java.util.Arrays; diff --git a/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java index d361c1db..1f2130b4 100644 --- a/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java +++ b/src/main/java/org/apache/pirk/responder/wideskies/ResponderProps.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.pirk.responder.wideskies; import java.util.Arrays; From be9544f8a6f6790380ed8d12e059263a650c4cd5 Mon Sep 17 00:00:00 2001 From: eawilliams Date: Thu, 28 Jul 2016 18:41:22 -0400 Subject: [PATCH 10/10] clean up misc items --- .../pirk/schema/data/DataSchemaLoader.java | 2 +- .../pirk/schema/query/QuerySchemaLoader.java | 2 +- src/main/resources/pirk.properties | 3 +- src/main/resources/pirk.properties-repo | 243 ------------------ 4 files changed, 3 insertions(+), 247 deletions(-) delete mode 100755 src/main/resources/pirk.properties-repo diff --git a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java index f0cca325..057ed8ee 100644 --- a/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java +++ b/src/main/java/org/apache/pirk/schema/data/DataSchemaLoader.java @@ -46,7 +46,7 @@ /** * Class to load any data schemas specified in the properties file, 'data.schemas' *

- * Schemas should be specified as follows; all items are treated in a case insensitive manner: + * Schemas should be specified as follows: * *

  * {@code
diff --git a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
index a8445ca6..951a0dfe 100644
--- a/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
+++ b/src/main/java/org/apache/pirk/schema/query/QuerySchemaLoader.java
@@ -44,7 +44,7 @@
 /**
  * Class to load any query schemas specified in the properties file, 'query.schemas'
  * 

- * Schemas should be specified as follows; all items are treated in a case insensitive manner: + * Schemas should be specified as follows: * *

  * {@code
diff --git a/src/main/resources/pirk.properties b/src/main/resources/pirk.properties
index 0bcedb8a..eb386da5 100755
--- a/src/main/resources/pirk.properties
+++ b/src/main/resources/pirk.properties
@@ -36,8 +36,7 @@ spark.home = /usr
 ##
 ## Data schema properties
 ##
-## Each data schema should be specified in an xml file of the form; 
-## 		all items are treated in a case insensitive manner:
+## Each data schema should be specified in an xml file of the form:
 ##
 ##
 ##   name of the schema 
diff --git a/src/main/resources/pirk.properties-repo b/src/main/resources/pirk.properties-repo
deleted file mode 100755
index 8f631383..00000000
--- a/src/main/resources/pirk.properties-repo
+++ /dev/null
@@ -1,243 +0,0 @@
-###############################################################################
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-###############################################################################
-
-##
-## Default System Configuration Properties
-##
-
-# Name of log4j properties file (relative to current folder)
-log4jPropertiesFile=log4j2.properties
-
-#Name of the directory holding the local properties files
-#All property files must end in .properties
-local.pirk.properties.dir=/root/properties
-
-##
-##Spark path for SparkLauncher
-##
-spark.home = /usr
-
-##
-## Data schema properties
-##
-## Each data schema should be specified in an xml file of the form; 
-## 		all items are treated in a case insensitive manner:
-##
-##
-##   name of the schema 
-##  
-##       element name 
-##       class name or type name (if Java primitive type) of the element 
-##       true or false -- whether or not the schema element is an array within the data 
-##       optional - Partitioner class for the element; defaults to primitive java type partitioner  
-##  
-## 
-##
-## Primitive Java types must be one of the following: "byte", "short", "int", "long", "float", 
-##                  "double", "char", "string", "boolean"
-##
-
-#Comma separated list of local data schema files to load, fully qualified file names
-data.schemas = none
-
-##
-## Query schema properties
-##
-## Each query schema should be specified in an xml file of the form; 
-## 		all items are treated in a case insensitive manner:
-##
-##
-##   name of the schema 
-##	 name of the data schema over which this query is run 
-##	 name of the element in the data schema that will be the selector 
-##  
-##     	 element name of element in the data schema to include in the query response 
-## 	
-##   (optional) name of the filter class to use to filter the data 
-##  
-##        (optional) element name of element in the data schema to apply pre-processing filters 
-##  
-## 
-##
-##
-
-#Comma separated list of local query schema files to load, fully qualified file names
-query.schemas = none
-
-##
-##Properties for ElasticSearch compatibility
-##
-
-#ES host address - One Elasticsearch node in the cluster - may include port specification
-es.nodes= none
-          
-#Default HTTP/REST port used for connecting to Elasticsearch 
-es.port=9200
-
-#Number of results/items returned by each individual scroll
-es.scroll.size = 1000
-
-#Whether elasticsearch-hadoop will treat empty fields as null
-es.field.read.empty.as.null=yes
-
-
-##
-##Properties for functional testing
-##
-
-#Test index to create in ES (created and deleted within the tests)
-#Need final form to be: :/
-test.es.index = testindex
-test.pir.es.index = testindexpir
-
-#Type of elements to insert in ES
-test.es.type = pkt
-
-#Elasticsearch resource - Elasticsearch resource location where data is read and written to. 
-#Requires the format / 
-test.es.resource= none
-test.pir.es.resource = none
-
-#Pathname in hdfs to place input JSON file testing
-test.inputJSONFile = none
-test.pir.inputJSONFile = /tmp/testJSONInput
-
-#Pathname in hdfs to place output file for testing
-test.outputHDFSFile = /tmp/testOutput
-
-#PIR query input dir in hdfs for testing
-test.pir.queryInputDir = none
-
-#PIR stoplist file
-test.pir.stopListFile = /tmp/testStopListFile
-
-#Whether or not we are running PIR testing (used as a flag to dump intermediate RDDs for checking)
-#This should default to false; it is changed to true in the test suite, as applicable
-pir.test = false
-
-#HDFS output dir for PIR intermediate testing
-#Should default to none; changed to a real hdfs path in the test suite, as applicable
-pir.testOut = none
-
-
-##
-## Properties to enable/disable JNA-GMP modPow acceleration for Paillier 
-##
-
-paillier.useGMPForModPow = true
-
-# The JNA-GMP library we use to invoke the much faster, natively compiled GMP
-# can be called in a way that tries to make all modPow calls take a constant amount 
-# of time to execute. This will slow down the modPow performance (still better than
-# Java's BigInteger.modPow() ). 
-# If you are using this package in a multi-tenant computing environment and have 
-# concerns about other tenants being able to closely inspect the runtime of your
-# software, you may want to enable this property. 
-paillier.GMPConstantTimeMode = false
-
-# This property controls the more rigorous prime generation checks in PrimeMaker.java
-# which are based on FIPS SP 800-56B and FIPS 186-4 (extra Miller-Rabin rounds, limits
-# on how close the primes p and q can be, and bounds on the values of the primes)
-# These checks slow down prime generation considerably
-pallier.FIPSPrimeGenerationChecks = true
-
-## These properties control the secure random number generator algorithm and provider.
-## You can specify just the algorithm, or both algorithm and provider.  The system's
-## default secure random is used when the algorithm is left unspecified. 
-pallier.secureRandom.algorithm=NativePRNG
-#pallier.secureRandom.provider=SUN
-
-##
-## Properties for PIR query and response
-##
-
-#Number of bits to return when encoding/returning string values as part of return data elements
-pir.stringBits = 64
-
-#Number of array elements to return when encoding/returning array valued elements
-pir.numReturnArrayElements = 2
-
-#Default prime certainty
-pir.primeCertainty = 128
-
-#Fully qualified dir in hdfs of Query files
-pir.queryInput = none
-
-#Data input format type -- 'base' or 'elasticsearch' (given in InputFormatsConst class)
-pir.dataInputFormat = base
-
-#Fully qualified name of input file/directory in hdfs; used if pir.dataInputFormat = base
-pir.inputData = none
-
-#Inputformat for 'base' data input format type -- must extend BaseInputFormat
-pir.baseInputFormat = none
-
-#ElasticSearch-like query if using 'base' input format
-pir.baseQuery = none
-
-#ES resource for input data
-pir.esResource = null
-
-#ES query for input data
-pir.esQuery = none
-
-#Fully qualified name of output file in hdfs
-pir.outputFile = none
-
-#Fully qualified dir in hdfs of file containing stoplist terms
-pir.stopListFile = none
-
-#Number of reduce tasks
-pir.numReduceTasks = 100
-
-#Whether or not to use the local cache during PIR computations
-pir.useLocalCache = true
-
-#Whether or not to limit the hits for each query term
-pir.limitHitsPerSelector = true
-
-#Number of hits to limit for each query term, if pir.limitHitsPerSelector = true
-pir.maxHitsPerSelector = 100
-
-#Whether or not to embed the selector in the results for false positive reduction
-pir.embedSelector = true
-
-#Whether or not to generate and use the HDFS modular exponentiation lookup table
-pir.useHDFSLookupTable = false
-
-#Number of partitions to coalesce the input data into in Spark
-pir.numDataPartitions = 1500
-
-#Mapreduce memory options
-mapreduce.map.memory.mb = 3000
-mapreduce.reduce.memory.mb = 3000
-mapreduce.map.java.opts = -Xmx2800m
-mapreduce.reduce.java.opts = -Xmx2800m
-
-#HDFS directory for the expLookupTable 
-pir.expDir = none
-		
-#Parallelism for expLookupTable creation in hdfs 
-pir.expCreationSplits = 600
-
-
-
-
-
-