Skip to content

Commit

Permalink
[SPARK-23901][SQL] Add masking functions
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

The PR adds the masking function as they are described in Hive's documentation: https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-DataMaskingFunctions.
This means that only `string`s are accepted as parameter for the masking functions.

## How was this patch tested?

added UTs

Author: Marco Gaido <marcogaido91@gmail.com>

Closes #21246 from mgaido91/SPARK-23901.
  • Loading branch information
mgaido91 authored and ueshin committed May 30, 2018
1 parent ec6f971 commit 1b36f14
Show file tree
Hide file tree
Showing 6 changed files with 1,119 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql.catalyst.expressions;

/**
* Contains all the Utils methods used in the masking expressions.
*/
public class MaskExpressionsUtils {
static final int UNMASKED_VAL = -1;

/**
* Returns the masking character for {@param c} or {@param c} is it should not be masked.
* @param c the character to transform
* @param maskedUpperChar the character to use instead of a uppercase letter
* @param maskedLowerChar the character to use instead of a lowercase letter
* @param maskedDigitChar the character to use instead of a digit
* @param maskedOtherChar the character to use instead of a any other character
* @return masking character for {@param c}
*/
public static int transformChar(
final int c,
int maskedUpperChar,
int maskedLowerChar,
int maskedDigitChar,
int maskedOtherChar) {
switch(Character.getType(c)) {
case Character.UPPERCASE_LETTER:
if(maskedUpperChar != UNMASKED_VAL) {
return maskedUpperChar;
}
break;

case Character.LOWERCASE_LETTER:
if(maskedLowerChar != UNMASKED_VAL) {
return maskedLowerChar;
}
break;

case Character.DECIMAL_DIGIT_NUMBER:
if(maskedDigitChar != UNMASKED_VAL) {
return maskedDigitChar;
}
break;

default:
if(maskedOtherChar != UNMASKED_VAL) {
return maskedOtherChar;
}
break;
}

return c;
}

/**
* Returns the replacement char to use according to the {@param rep} specified by the user and
* the {@param def} default.
*/
public static int getReplacementChar(String rep, int def) {
if (rep != null && rep.length() > 0) {
return rep.codePointAt(0);
}
return def;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,14 @@ object FunctionRegistry {
expression[ArrayRepeat]("array_repeat"),
CreateStruct.registryEntry,

// mask functions
expression[Mask]("mask"),
expression[MaskFirstN]("mask_first_n"),
expression[MaskLastN]("mask_last_n"),
expression[MaskShowFirstN]("mask_show_first_n"),
expression[MaskShowLastN]("mask_show_last_n"),
expression[MaskHash]("mask_hash"),

// misc functions
expression[AssertTrue]("assert_true"),
expression[Crc32]("crc32"),
Expand Down
Loading

0 comments on commit 1b36f14

Please sign in to comment.