Skip to content

Commit

Permalink
[SPARK-29342][SQL] Make casting of string values to intervals case in…
Browse files Browse the repository at this point in the history
…sensitive

### What changes were proposed in this pull request?

In the PR, I propose to pass the `Pattern.CASE_INSENSITIVE` flag while compiling interval patterns in `CalendarInterval`. This makes casting string values to intervals case insensitive and tolerant to case of the `interval`, `year(s)`, `month(s)`, `week(s)`, `day(s)`, `hour(s)`, `minute(s)`, `second(s)`, `millisecond(s)` and `microsecond(s)`.

### Why are the changes needed?
There are at least 2 reasons:
- To maintain feature parity with PostgreSQL which is not sensitive to case:
```sql
 # select cast('10 Days' as INTERVAL);
 interval
----------
 10 days
(1 row)
```
- Spark is tolerant to case of interval literals. Case insensitivity in casting should be convenient for Spark users.
```sql
spark-sql> SELECT INTERVAL 1 YEAR 1 WEEK;
interval 1 years 1 weeks
```

### Does this PR introduce any user-facing change?
Yes, current implementation produces `NULL` for `interval`, `year`, ... `microsecond` that are not in lower case.
Before:
```sql
spark-sql> SELECT CAST('INTERVAL 10 DAYS' as INTERVAL);
NULL
```
After:
```sql
spark-sql> SELECT CAST('INTERVAL 10 DAYS' as INTERVAL);
interval 1 weeks 3 days
```

### How was this patch tested?
- by new tests in `CalendarIntervalSuite.java`
- new test in `CastSuite`

Closes #26010 from MaxGekk/interval-case-insensitive.

Authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
  • Loading branch information
MaxGekk authored and dongjoon-hyun committed Oct 7, 2019
1 parent 2399134 commit b103449
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 5 deletions.
Expand Up @@ -49,7 +49,8 @@ private static String unitRegex(String unit) {

private static Pattern p = Pattern.compile("interval" + unitRegex("year") + unitRegex("month") +
unitRegex("week") + unitRegex("day") + unitRegex("hour") + unitRegex("minute") +
unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"));
unitRegex("second") + unitRegex("millisecond") + unitRegex("microsecond"),
Pattern.CASE_INSENSITIVE);

private static Pattern yearMonthPattern =
Pattern.compile("^(?:['|\"])?([+|-])?(\\d+)-(\\d+)(?:['|\"])?$");
Expand All @@ -69,15 +70,15 @@ private static long toLong(String s) {

/**
* Convert a string to CalendarInterval. Return null if the input string is not a valid interval.
* This method is case-sensitive and all characters in the input string should be in lower case.
* This method is case-insensitive.
*/
public static CalendarInterval fromString(String s) {
if (s == null) {
return null;
}
s = s.trim();
Matcher m = p.matcher(s);
if (!m.matches() || s.equals("interval")) {
if (!m.matches() || s.compareToIgnoreCase("interval") == 0) {
return null;
} else {
long months = toLong(m.group(1)) * 12 + toLong(m.group(2));
Expand All @@ -93,8 +94,9 @@ public static CalendarInterval fromString(String s) {
}

/**
* Convert a string to CalendarInterval. Unlike fromString, this method is case-insensitive and
* will throw IllegalArgumentException when the input string is not a valid interval.
* Convert a string to CalendarInterval. Unlike fromString, this method can handle
* strings without the `interval` prefix and throws IllegalArgumentException
* when the input string is not a valid interval.
*
* @throws IllegalArgumentException if the string is not a valid internal.
*/
Expand Down
Expand Up @@ -274,4 +274,26 @@ private static void testSingleUnit(String unit, int number, int months, long mic
assertEquals(fromString(input1), result);
assertEquals(fromString(input2), result);
}

@Test
public void fromStringCaseSensitivityTest() {
testSingleUnit("YEAR", 3, 36, 0);
testSingleUnit("Month", 3, 3, 0);
testSingleUnit("Week", 3, 0, 3 * MICROS_PER_WEEK);
testSingleUnit("DAY", 3, 0, 3 * MICROS_PER_DAY);
testSingleUnit("HouR", 3, 0, 3 * MICROS_PER_HOUR);
testSingleUnit("MiNuTe", 3, 0, 3 * MICROS_PER_MINUTE);
testSingleUnit("Second", 3, 0, 3 * MICROS_PER_SECOND);
testSingleUnit("MilliSecond", 3, 0, 3 * MICROS_PER_MILLI);
testSingleUnit("MicroSecond", 3, 0, 3);

String input;

input = "INTERVAL -5 YEARS 23 MONTHS";
CalendarInterval result = new CalendarInterval(-5 * 12 + 23, 0);
assertEquals(fromString(input), result);

assertNull(fromString("INTERVAL"));
assertNull(fromString(" Interval "));
}
}
Expand Up @@ -670,6 +670,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
new CalendarInterval(15, -3 * CalendarInterval.MICROS_PER_DAY), CalendarIntervalType),
StringType),
"interval 1 years 3 months -3 days")
checkEvaluation(Cast(Literal("INTERVAL 1 Second 1 microsecond"), CalendarIntervalType),
new CalendarInterval(0, 1000001))
}

test("cast string to boolean") {
Expand Down

0 comments on commit b103449

Please sign in to comment.