Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PHOENIX-7155 Validate Partial Index support with JSON #1767

Merged
merged 5 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,36 +17,22 @@
*/
package org.apache.phoenix.end2end.index;

import static org.apache.phoenix.mapreduce.index.PhoenixIndexToolJobCounters.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.phoenix.end2end.IndexToolIT;
import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
import org.apache.phoenix.exception.PhoenixParserException;
import org.apache.phoenix.jdbc.PhoenixResultSet;
import org.apache.phoenix.mapreduce.index.IndexTool;
import org.apache.phoenix.query.BaseTest;
import org.apache.phoenix.query.QueryServices;
import org.apache.phoenix.schema.ColumnNotFoundException;
import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.thirdparty.com.google.common.collect.Maps;
import org.apache.phoenix.end2end.NeedsOwnMiniClusterTest;
import org.apache.phoenix.query.BaseTest;
import org.apache.phoenix.query.QueryServices;
import org.apache.phoenix.util.*;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.apache.phoenix.util.PhoenixRuntime;
import org.apache.phoenix.util.PropertiesUtil;
import org.apache.phoenix.util.ReadOnlyProps;
import org.junit.After;
import org.junit.Assert;
import org.junit.BeforeClass;
Expand All @@ -55,6 +41,24 @@
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;

import java.sql.Connection;
import java.sql.Date;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
import java.util.Map;
import java.util.Properties;

import static org.apache.phoenix.end2end.json.JsonFunctionsIT.getJsonString;
import static org.apache.phoenix.mapreduce.index.PhoenixIndexToolJobCounters.*;
import static org.apache.phoenix.util.TestUtil.TEST_PROPERTIES;
import static org.junit.Assert.*;

@Category(NeedsOwnMiniClusterTest.class)
@RunWith(Parameterized.class)
public class PartialIndexIT extends BaseTest {
Expand Down Expand Up @@ -182,7 +186,7 @@ public void testDDLWithAllDataTypes() throws Exception {
+ "S UNSIGNED_DATE, T UNSIGNED_TIMESTAMP, U CHAR(10), V BINARY(1024), "
+ "W VARBINARY, Y INTEGER ARRAY, Z VARCHAR ARRAY[10], AA DATE ARRAY, "
+ "AB TIMESTAMP ARRAY, AC UNSIGNED_TIME ARRAY, AD UNSIGNED_DATE ARRAY, "
+ "AE UNSIGNED_TIMESTAMP ARRAY "
+ "AE UNSIGNED_TIMESTAMP ARRAY, AF JSON "
+ "CONSTRAINT pk PRIMARY KEY (id,kp)) "
+ "MULTI_TENANT=true, COLUMN_ENCODED_BYTES=0" );
String indexTableName = generateUniqueName();
Expand Down Expand Up @@ -764,4 +768,174 @@ public void testPartialIndexPreferredOverFullIndex() throws Exception {
assertFalse(rs.next());
}
}

@Test
public void testPartialIndexWithJson() throws Exception {
Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
try (Connection conn = DriverManager.getConnection(getUrl(), props)) {
conn.setAutoCommit(true);
String dataTableName = generateUniqueName();
conn.createStatement().execute("create table " + dataTableName +
" (id varchar not null primary key, " +
"A integer, B integer, C double, D varchar, jsoncol json)");
String indexTableName = generateUniqueName();
String partialIndexJson = "json/json_partialindex_tests.json";
// Add rows to the data table before creating a partial index to test that the index
// will be built correctly by IndexTool
conn.createStatement().execute(
"upsert into " + dataTableName + " values ('id1', 25, 2, 3.14, 'a','" + getJsonString(
partialIndexJson, "$[0]") + "')");

conn.createStatement().execute(
"upsert into " + dataTableName + " (id, A, D, jsoncol) values ('id2', 100, 'b','" + getJsonString(
partialIndexJson, "$[3]") + "')");
conn.createStatement().execute("CREATE " + (uncovered ? "UNCOVERED " : " ") +
(local ? "LOCAL " : " ") + "INDEX "
+ indexTableName + " on " + dataTableName + " (CAST(TO_NUMBER(JSON_VALUE(jsoncol, '$.info.age')) AS INTEGER)) " + (
uncovered ? "" : "INCLUDE (B, C, D)") + " WHERE (CAST(TO_NUMBER(JSON_VALUE(jsoncol, '$.info.age')) AS INTEGER)) > 50 ASYNC");

IndexToolIT.runIndexTool(false, null, dataTableName, indexTableName);

String selectSql = "SELECT D from " + dataTableName + " WHERE (CAST(TO_NUMBER(JSON_VALUE(jsoncol, '$.info.age')) AS INTEGER)) > 60";
ResultSet rs = conn.createStatement().executeQuery(selectSql);
// Verify that the index table is used
assertPlan((PhoenixResultSet) rs, "", indexTableName);
assertTrue(rs.next());
assertEquals("b", rs.getString(1));
assertFalse(rs.next());

selectSql = "SELECT D from " + dataTableName + " WHERE (CAST(TO_NUMBER(JSON_VALUE(jsoncol, '$.info.age')) AS INTEGER)) = 50";
rs = conn.createStatement().executeQuery(selectSql);
// Verify that the index table is not used
assertPlan((PhoenixResultSet) rs, "", dataTableName);

// Add more rows to test the index write path
conn.createStatement().execute(
"upsert into " + dataTableName + " values ('id3', 50, 2, 9.5, 'c','" + getJsonString(
partialIndexJson, "$[1]") + "')");
conn.createStatement().execute(
"upsert into " + dataTableName + " values ('id4', 75, 2, 9.5, 'd','" + getJsonString(
partialIndexJson, "$[2]") + "')");

// Verify that index table includes only the rows with A > 50
selectSql = "SELECT * from " + indexTableName;
rs = conn.createStatement().executeQuery(selectSql);
assertTrue(rs.next());
assertEquals(75, rs.getInt(1));
assertTrue(rs.next());
assertEquals(100, rs.getInt(1));
assertFalse(rs.next());

// Overwrite an existing row that satisfies the index WHERE clause
// such that the new version of the row does not satisfy the index where clause
// anymore. This should result in deleting the index row.
String dml = "UPSERT INTO " + dataTableName + " values ('id2', 0, 2, 9.5, 'd', JSON_MODIFY(jsoncol, '$.info.age', '0')) "; //, JSON_MODIFY(jsoncol, '$.info.age', '"0"')
conn.createStatement().execute(dml);
rs = conn.createStatement().executeQuery(selectSql);
assertTrue(rs.next());
assertEquals(75, rs.getInt(1));
assertFalse(rs.next());

// Retrieve the updated row from the data table and verify that the index table is not used
selectSql = "SELECT ID from " + dataTableName + " WHERE (CAST(TO_NUMBER(JSON_VALUE(jsoncol, '$.info.age')) AS INTEGER)) = 0";
rs = conn.createStatement().executeQuery(selectSql);
assertPlan((PhoenixResultSet) rs, "", dataTableName);
assertTrue(rs.next());
assertEquals("id2", rs.getString(1));

// Test index verification and repair by IndexTool
verifyIndex(dataTableName, indexTableName);

try (Connection newConn = DriverManager.getConnection(getUrl())) {
PTable indexTable = PhoenixRuntime.getTableNoCache(newConn, indexTableName);
assertTrue(StringUtils.deleteWhitespace(indexTable.getIndexWhere())
.equals("CAST(TO_NUMBER(JSON_VALUE(JSONCOL,'$.info.age'))ASINTEGER)>50"));
}
}
}

@Test
public void testPartialIndexWithJsonExists() throws Exception {
Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES);
try (Connection conn = DriverManager.getConnection(getUrl(), props)) {
conn.setAutoCommit(true);
String dataTableName = generateUniqueName();
conn.createStatement().execute("create table " + dataTableName +
" (id varchar not null primary key, " +
"A integer, B integer, C double, D varchar, jsoncol json)"
+ (salted ? " SALT_BUCKETS=4" : ""));
String indexTableName = generateUniqueName();
String partialIndexJson = "json/json_partialindex_tests.json";
// Add rows to the data table before creating a partial index to test that the index
// will be built correctly by IndexTool
conn.createStatement().execute(
"upsert into " + dataTableName + " values ('id1', 70, 2, 3.14, 'a','" + getJsonString(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To improve the readability of the tests, we should include the json doc payload directly in the string for the upsert statement instead of retrieving from a file. The json doc should include only the content necessary for this test. For example, for this test the string for the doc could be ‘{"info": {"address ": {"exits" : true}}}’. If the same json string is used multiple times then we can define a string constant.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure makes sense. Will make these changes

partialIndexJson, "$[0]") + "')");
conn.createStatement().execute(
"upsert into " + dataTableName + " (id, A, D, jsoncol) values ('id2', 100, 'b','" + getJsonString(
partialIndexJson, "$[1]") + "')");
conn.createStatement().execute("CREATE " + (uncovered ? "UNCOVERED " : " ") +
(local ? "LOCAL " : " ") + "INDEX " + indexTableName + " on " + dataTableName + " (A) " +
(uncovered ? "" : "INCLUDE (B, C, D)") + " WHERE JSON_EXISTS(JSONCOL, '$.info.address.exists') ASYNC");
IndexToolIT.runIndexTool(false, null, dataTableName, indexTableName);

String selectSql = "SELECT " + (uncovered ? " " : "/*+ INDEX(" + dataTableName + " " + indexTableName + ")*/ ") +
" A, D from " + dataTableName + " WHERE A > 60 AND JSON_EXISTS(jsoncol, '$.info.address.exists')";
ResultSet rs = conn.createStatement().executeQuery(selectSql);
// Verify that the index table is used
assertPlan((PhoenixResultSet) rs, "", indexTableName);
assertTrue(rs.next());
assertEquals(70, rs.getInt(1));
assertEquals("a", rs.getString(2));
assertFalse(rs.next());

// Add more rows to test the index write path
conn.createStatement().execute(
"upsert into " + dataTableName + " values ('id3', 20, 2, 3.14, 'a','" + getJsonString(
partialIndexJson, "$[2]") + "')");
conn.createStatement().execute(
"upsert into " + dataTableName + " values ('id4', 90, 2, 3.14, 'a','" + getJsonString(
partialIndexJson, "$[4]") + "')");
conn.createStatement().execute(
"upsert into " + dataTableName + " (id, A, D, jsoncol) values ('id5', 150, 'b','" + getJsonString(
partialIndexJson, "$[3]") + "')");

// Verify that index table includes only the rows where jsonPath Exists
rs = conn.createStatement().executeQuery(selectSql);
assertTrue(rs.next());
assertEquals(70, rs.getInt(1));
assertEquals("a", rs.getString(2));
assertTrue(rs.next());
assertEquals(90, rs.getInt(1));
assertEquals("a", rs.getString(2));
assertFalse(rs.next());

rs = conn.createStatement().executeQuery("SELECT Count(*) from " + dataTableName);
// Verify that the index table is not used
assertPlan((PhoenixResultSet) rs, "", dataTableName);
assertTrue(rs.next());
assertEquals(5, rs.getInt(1));

// Overwrite an existing row that satisfies the index WHERE clause such that
// the new version of the row does not satisfy the index where clause anymore. This
// should result in deleting the index row.
conn.createStatement().execute(
"upsert into " + dataTableName + " (ID, B, jsoncol) values ('id4', null, '" + getJsonString(
partialIndexJson, "$[3]") + "')");
rs = conn.createStatement().executeQuery(selectSql);
assertTrue(rs.next());
assertEquals(70, rs.getInt(1));
assertEquals("a", rs.getString(2));
assertFalse(rs.next());

// Test index verification and repair by IndexTool
verifyIndex(dataTableName, indexTableName);

try (Connection newConn = DriverManager.getConnection(getUrl())) {
PTable indexTable = PhoenixRuntime.getTableNoCache(newConn, indexTableName);
assertTrue(StringUtils.deleteWhitespace(indexTable.getIndexWhere())
.equals("JSON_EXISTS(JSONCOL,'$.info.address.exists')"));
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -442,10 +442,11 @@ private void checkInvalidJsonIndexExpression(Properties props, String tableName,
}
}

private static String getJsonString(String jsonFilePath) throws IOException {
public static String getJsonString(String jsonFilePath) throws IOException {
return getJsonString(jsonFilePath, "$");
}
private static String getJsonString(String jsonFilePath, String jsonPath) throws IOException {

public static String getJsonString(String jsonFilePath, String jsonPath) throws IOException {
URL fileUrl = JsonFunctionsIT.class.getClassLoader().getResource(jsonFilePath);
String json = FileUtils.readFileToString(new File(fileUrl.getFile()));
Configuration conf = Configuration.builder().jsonProvider(new GsonJsonProvider()).build();
Expand Down
108 changes: 108 additions & 0 deletions phoenix-core/src/it/resources/json/json_partialindex_tests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
[
{
"info": {
"type": 1,
"age": 25,
"address": {
"town": "Bristol",
"county": "Avon",
"country": "England",
"exists": true
},
"tags": [
"Sport",
"Water polo"
]
},
"type": "Basic",
"name": "AndersenFamily"
},
{
"info": {
"type": 2,
"age": 50,
"address": {
"town": "Bristol2",
"county": "Avon2",
"country": "France"
},
"tags": [
"Cricket",
"Foot Ball"
]
},
"type": "Normal",
"name": "SomeFamily"
},
{
"info": {
"type": 3,
"age": 75,
"address": {
"town": "Bristol3",
"county": "Avon3",
"country": "Australia",
"exists": true
},
"tags": [
"Rugby",
"Ice Hockey"
]
},
"type": "Advanced",
"name": "AdvancedFamily"
},
{
"info": {
"type": 4,
"age": 100,
"address": {
"town": "Bristol4",
"county": "Avon4",
"country": "Antarctica"
},
"tags": [
"HorseRide",
"Dolphine"
]
},
"type": "Advanced",
"name": "AntarcticaFamily"
},
{
"info": {
"type": 5,
"age": 45,
"address": {
"town": "Bristol5",
"county": "Avon5",
"country": "Europe",
"exists": true
},
"tags": [
"Jumping",
"Hopping"
]
},
"type": "Normal",
"name": "EuropeFamily"
},
{
"info": {
"type": 6,
"age": 54,
"address": {
"town": "Bristol6",
"county": "Avon6",
"country": "Euro",
"exists": true
},
"tags": [
"Jump",
"Hop"
]
},
"type": "Basic",
"name": "EuroFamily"
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
import org.apache.phoenix.schema.types.PDataType;
import org.apache.phoenix.schema.types.PDate;
import org.apache.phoenix.schema.types.PDateArray;
import org.apache.phoenix.schema.types.PJson;
import org.apache.phoenix.schema.types.PNumericType;
import org.apache.phoenix.schema.types.PTime;
import org.apache.phoenix.schema.types.PTimeArray;
Expand Down Expand Up @@ -110,6 +111,8 @@ private String getValue(PDataType type) {
return "ARRAY[" + getValue(PDate.INSTANCE) + "]";
} else if (type instanceof PArrayDataType) {
return "ARRAY" + type.getSampleValue().toString();
} else if (type instanceof PJson) {
return "'{a:1}'";
} else {
return "0123";
}
Expand Down