Skip to content

Commit

Permalink
Export of internal ZetaSQL changes.
Browse files Browse the repository at this point in the history
--
Change by Matthew Brown <matthewbr@google.com>:
ZetaSQL: Open source differential privacy support - evaluator
--
Change by ZetaSQL Team <no-reply@google.com>:
Prevent order by constraints that involve numbers less than 1.
--
Change by ZetaSQL Team <no-reply@google.com>:
Remove not null validation in Java builders of Resolved AST nodes for IGNORABLE and IGNORABLE_DEFAULT fields
--
Change by Matthew Brown <matthewbr@google.com>:
ZetaSQL: Allow specifying proto2 DescriptorPool to use in execute_query
--
Change by ZetaSQL Team <no-reply@google.com>:
Make SimpleTable::GetColumn(i) return nullptr when `i` is invalid.
--
Change by ZetaSQL Team <no-reply@google.com>:
Fix TypeParameters::IsEmptyTypeParameters() method
--
Change by Matthew Brown <matthewbr@google.com>:
execute_query: Run the analyzer directly to allow --mode=resolve to work even if algebrizer fails
--
Change by Matthew Brown <matthewbr@google.com>:
execute_query: fix language options used to initialize function catalog
--
Change by ZetaSQL Team <no-reply@google.com>:
Fix bug that AnnotationMap::Equals() doesn't check equality of the annotation map entries.
--
Change by ZetaSQL Team <no-reply@google.com>:
Use table and column names retrieved from ResolvedTableScan's

GitOrigin-RevId: adaccdd8c2b8a276ff59c50c38bbf4d4ea2cdbfd
Change-Id: I5e0c2565d0ba4f31377e98331fb2338c00aab075
  • Loading branch information
ZetaSQL Team authored and matthewcbrown committed Jan 22, 2021
1 parent 7459805 commit 862a192
Show file tree
Hide file tree
Showing 43 changed files with 2,041 additions and 76 deletions.
7 changes: 6 additions & 1 deletion README.md
Expand Up @@ -22,7 +22,7 @@ This codebase is being open sourced in multiple phases:
2. Reference Implementation **In Progress**
- Base capability **Complete**
- Function library **In Progress**
3. Compliance Tests **In Progress**
3. Compliance Tests **Complete**
- includes framework for validating compliance of arbitrary engines
4. Misc tooling
- Improved Formatter **In Progress**
Expand Down Expand Up @@ -68,6 +68,11 @@ reference implementation:
The reference implementation is not yet completely released and currently
supports only a subset of functions and types.

## Differential Privacy
For questions, documentation and examples of ZetaSQLs implementation of
Differential Privacy, please check out
(https://github.com/google/differential-privacy).

## Versions

ZetaSQL makes no guarantees regarding compatibility between releases.
Expand Down
28 changes: 24 additions & 4 deletions bazel/zetasql_deps_step_2.bzl
Expand Up @@ -170,6 +170,26 @@ cc_proto_library(
strip_prefix = "riegeli-baf6376f694d401932cf1b9d34e79a0fae50e7c4",
)

# Differential Privacy
if not native.existing_rule("com_google_differential_privacy"):
http_archive(
name = "com_google_differential_privacy",
# Commit from 2021-01-21
url = "https://github.com/google/differential-privacy/archive/de8460c9791de4c89a9dbb906b11a8f62e045f7b.tar.gz",
sha256 = "e9f01b00e760724909a7ff7acf26855a802417a23bde54d6baec9168cfbe1dc4",
strip_prefix = "differential-privacy-de8460c9791de4c89a9dbb906b11a8f62e045f7b",
)

# Differential Privacy - cc
if not native.existing_rule("com_google_cc_differential_privacy"):
http_archive(
name = "com_google_cc_differential_privacy",
# Commit from 2021-01-21
url = "https://github.com/google/differential-privacy/archive/de8460c9791de4c89a9dbb906b11a8f62e045f7b.tar.gz",
sha256 = "e9f01b00e760724909a7ff7acf26855a802417a23bde54d6baec9168cfbe1dc4",
strip_prefix = "differential-privacy-de8460c9791de4c89a9dbb906b11a8f62e045f7b/cc",
)

# Boringssl
if not native.existing_rule("boringssl"):
http_archive(
Expand Down Expand Up @@ -238,10 +258,10 @@ cc_proto_library(
if not native.existing_rule("com_google_file_based_test_driver"):
http_archive(
name = "com_google_file_based_test_driver",
# Commit from 2020-09-28
url = "https://github.com/google/file-based-test-driver/archive/d0561d4d24117c2f3893e6fa947f1bde8c0719af.tar.gz",
sha256 = "3249acb0b74e4facb37001daab5db320153d090905cd38bb970a79d83bf0ad41",
strip_prefix = "file-based-test-driver-d0561d4d24117c2f3893e6fa947f1bde8c0719af",
# Commit from 2020-11-24
url = "https://github.com/google/file-based-test-driver/archive/5074f48f03c6a892edafab55410addc43f4a0546.tar.gz",
sha256 = "955cdee45433dd608bfde47d4d1dd6f47decf739a4c54cf4eecc11896dcbb374",
strip_prefix = "file-based-test-driver-5074f48f03c6a892edafab55410addc43f4a0546",
)

# gRPC
Expand Down
10 changes: 4 additions & 6 deletions docs/modules.md
Expand Up @@ -35,14 +35,12 @@ Modules support the following statements:

+ `MODULE`
+ `IMPORT MODULE`
+ `CREATE ( PUBLIC | PRIVATE ) [ TABLE ] FUNCTION`
+ `CREATE ( PUBLIC | PRIVATE ) [ ( TABLE | AGGREGATE ) ] FUNCTION`
+ `CREATE ( PUBLIC | PRIVATE ) CONSTANT`

Modules do not support statements that return results or have side effects. For
example, the following statements are not supported inside of modules:

+ `SELECT` statements
+ `UPDATE` statements
Modules do not support statements that return results or have side effects.
Modules only support defining an object once and do not support modifying an
object after it is defined.

### Declaring a module

Expand Down
33 changes: 33 additions & 0 deletions java/com/google/zetasql/SimpleTable.java
Expand Up @@ -22,6 +22,7 @@
import com.google.common.collect.Lists;
import com.google.zetasql.ZetaSQLType.TypeProto;
import com.google.zetasql.LocalService.TableFromProtoRequest;
import com.google.zetasql.SimpleTableProtos.SimpleAnonymizationInfoProto;
import com.google.zetasql.SimpleTableProtos.SimpleColumnProto;
import com.google.zetasql.SimpleTableProtos.SimpleTableProto;
import io.grpc.StatusRuntimeException;
Expand Down Expand Up @@ -59,6 +60,9 @@ private static synchronized void updateNextIdIfNotGreaterThan(long id) {
private boolean allowAnonymousColumnName = false;
private boolean anonymousColumnSeen = false;
private boolean allowDuplicateColumnNames = false;
// TODO: Add support for value tables with userIdColumns, as this
// implementation does not support them yet.
private SimpleColumn userIdColumn = null;

/** Make a table with the given Columns. Crashes if there are duplicate column names. */
public SimpleTable(String name, List<SimpleColumn> columns) {
Expand Down Expand Up @@ -123,6 +127,12 @@ public SimpleTableProto serialize(FileDescriptorSetsBuilder fileDescriptorSetsBu
builder.addPrimaryKeyColumnIndex(columnIndex);
}
}
if (userIdColumn != null) {
SimpleAnonymizationInfoProto.Builder anonymizationBuilder =
SimpleAnonymizationInfoProto.newBuilder();
anonymizationBuilder.addUseridColumnName(userIdColumn.getName());
builder.setAnonymizationInfo(anonymizationBuilder.build());
}
return builder.build();
}

Expand Down Expand Up @@ -155,6 +165,20 @@ public static SimpleTable deserialize(
table.setPrimaryKey(proto.getPrimaryKeyColumnIndexList());
}
table.setIsValueTable(proto.getIsValueTable());
if (proto.hasAnonymizationInfo()) {
// TODO: Support value tables with anonymization userid columns.
// This implementation will ignore anonymization userid columns for value
// tables (the findColumnByName() call will return nullptr), so such
// tables will be treated as if they do not support anonymization. We
// cannot currently throw an exception in this case, since then all
// analyzer tests will fail because the sample catalog that gets
// serialized/deserialized includes such tables and the test setup for
// every test will fail.
if (!proto.getAnonymizationInfo().getUseridColumnNameList().isEmpty()) {
table.userIdColumn =
table.findColumnByName(proto.getAnonymizationInfo().getUseridColumnName(0));
}
}
return table;
}

Expand Down Expand Up @@ -255,6 +279,15 @@ public boolean allowDuplicateColumnNames() {
return allowDuplicateColumnNames;
}

public SimpleColumn userIdColumn() {
return userIdColumn;
}

public void setUserIdColumn(SimpleColumn column) {
Preconditions.checkArgument(columns.contains(column));
userIdColumn = column;
}

public void setAllowDuplicateColumnNames(boolean value) {
Preconditions.checkState(value || duplicateColumnNames.isEmpty());
allowDuplicateColumnNames = value;
Expand Down
Expand Up @@ -451,7 +451,7 @@ public final class ResolvedNodes {
protected void validate() {
super.validate();
{% for field in (node.fields) %}
{% if field.is_constructor_arg and not field.is_optional_constructor_arg %}
{% if field.is_constructor_arg and not field.is_optional_constructor_arg and field.is_not_ignorable %}
Preconditions.checkArgument(
{{field.name|lower_camel_case}} != null, "{{field.name|lower_camel_case}} must be set");
{% endif %}
Expand Down
14 changes: 13 additions & 1 deletion javatests/com/google/zetasql/resolvedast/ResolvedNodesTest.java
Expand Up @@ -29,6 +29,7 @@
import com.google.zetasql.Type;
import com.google.zetasql.TypeFactory;
import com.google.zetasql.Value;
import com.google.zetasql.resolvedast.ResolvedNodes.ResolvedColumnDefinition;
import com.google.zetasql.resolvedast.ResolvedNodes.ResolvedExpr;
import com.google.zetasql.resolvedast.ResolvedNodes.ResolvedLimitOffsetScan;
import com.google.zetasql.resolvedast.ResolvedNodes.ResolvedLiteral;
Expand Down Expand Up @@ -102,7 +103,7 @@ public void testBasicToBuilderCanOverrideValue() {
}

@Test
public void testBuilderFailsOnUnsetValue() {
public void testBuilderFailsOnUnsetNotIgnorableField() {
try {
// FloatLiteralId and HasExplicitType are optional constructor arguments and can be omitted.
ResolvedLiteral.builder().setType(INT32_TYPE).setValue(Value.createInt32Value(5)).build();
Expand All @@ -120,6 +121,17 @@ public void testBuilderFailsOnUnsetValue() {
}
}

@Test
public void testBuilderSucceedsOnUnsetIgnorableFields() {
ResolvedColumnDefinition.builder()
.setName("testColumn")
.setType(INT32_TYPE)
.setIsHidden(false)
//.setColumn() skip setting IGNORABLE field
//.setGeneratedColumnInfo() skip setting IGNORABLE_DEFAULT field
.build();
}

@Test
public void testInheritedBasicToBuilder() {
ResolvedExpr fiveAsExpr = FIVE;
Expand Down
34 changes: 34 additions & 0 deletions zetasql/analyzer/analyzer_test.cc
Expand Up @@ -1551,6 +1551,40 @@ TEST(SQLBuilderTest, WithScanWithFilterScan) {
formatted_sql);
}

// Test that SqlBuilder prefer ResolvedTableScan.column_index_list over column
// and table names in ResolvedTableScan, which should have no semantic meaning.
// See the class comment on `ResolvedTableScan`.
TEST(SQLBuilderTest, TableScanPrefersColumnIndexList) {
const std::string table_name = "T1";
const std::string col_name = "C";
const std::string unused_name = "UNUSED_NAME";
const int column_id = 9;

TypeFactory type_factory;
auto table = absl::make_unique<SimpleTable>(table_name);
ZETASQL_ASSERT_OK(table->AddColumn(
new SimpleColumn(table_name, col_name, type_factory.get_int32()),
/*is_owned=*/true));
const ResolvedColumn scan_column(column_id, unused_name, unused_name,
type_factory.get_int32());
auto table_scan = MakeResolvedTableScan({scan_column}, table.get(),
/*for_system_time_expr=*/nullptr);
table_scan->set_column_index_list({0});
const ResolvedColumn query_column(column_id, unused_name, unused_name,
type_factory.get_int32());
auto query = MakeResolvedProjectScan({query_column}, /*expr_list=*/{},
std::move(table_scan));

SQLBuilder sql_builder;
ZETASQL_ASSERT_OK(sql_builder.Process(*query));
std::string formatted_sql;
ZETASQL_ASSERT_OK(FormatSql(sql_builder.sql(), &formatted_sql));
EXPECT_EQ(
"SELECT\n t1_2.a_1 AS a_1\nFROM\n (\n"
" SELECT\n T1.C AS a_1\n FROM\n T1\n ) AS t1_2;",
formatted_sql);
}

// Adding specific unit test to input provided by Random Query Generator tree.
// From a SQL String (like in golden file sql_builder.test), we get a different
// tree (JoinScan is under other ResolvedScans and this scenario isn't tested.
Expand Down
13 changes: 8 additions & 5 deletions zetasql/analyzer/anonymization_rewriter.cc
Expand Up @@ -49,7 +49,8 @@ namespace zetasql {
namespace {

// Rewrites a given AST that includes a ResolvedAnonymizedAggregateScan to use
// the semantics defined in (broken link).
// the semantics defined in https://arxiv.org/abs/1909.01917 and
// (broken link).
//
// Overview of the rewrite process:
// 1. This class is invoked on an AST node, blindly copying everything until a
Expand All @@ -68,8 +69,10 @@ namespace {
// in the per-user scan's column list is the appropriate intermediate
// column looked up in the column map
// 4. If kappa is specified, a partioned-by-$uid ResolvedSampleScan is
// inserted to limit the number of groups that a user can contribute to,
// in effect providing dataset-level privacy
// inserted to limit the number of groups that a user can contribute to.
// While kappa is optional, for most queries with a GROUP BY clause in the
// ResolvedAnonymizedAggregationScan it MUST be specified for the resulting
// query to provide correct epsilon-delta differential privacy.
// 5. The final cross-user ResolvedAnonymizedAggregateScan is created:
// a. The input scan is set to the (possibly sampled) per-user scan
// b. The first argument for each ANON_* function call in the anon node is
Expand Down Expand Up @@ -1172,8 +1175,8 @@ absl::Status RewriterVisitor::VisitResolvedAnonymizedAggregateScan(
std::unique_ptr<ResolvedScan> input_scan,
RewriteInnerAggregateScan(node, &injected_col_map, &uid_column));

// Inject a SampleScan if kappa is present, in order to provide the
// requested dataset-level privacy.
// Inject a SampleScan if kappa is present, in order to provide epsilon-delta
// differential privacy in the presence of a GROUP BY clause.
if (kappa_value != nullptr) {
std::vector<std::unique_ptr<const ResolvedExpr>> partition_by_list;
partition_by_list.push_back(MakeColRef(uid_column));
Expand Down
7 changes: 7 additions & 0 deletions zetasql/analyzer/resolver_query.cc
Expand Up @@ -17,6 +17,7 @@
// This file contains the implementation of query-related (i.e. SELECT)
// resolver methods from resolver.h.
#include <algorithm>
#include <limits>
#include <map>
#include <memory>
#include <numeric>
Expand Down Expand Up @@ -2672,6 +2673,12 @@ absl::Status Resolver::ResolveOrderingExprs(
const Value& value =
resolved_order_expression->GetAs<ResolvedLiteral>()->value();
if (value.type_kind() == TYPE_INT64 && !value.is_null()) {
if (value.int64_value() < 1) {
return MakeSqlErrorAt(order_by_expression)
<< "ORDER BY column number item is out of range. "
<< "Column numbers must be greater than or equal to one. "
<< "Found : " << value.int64_value();
}
const int64_t int_value = value.int64_value() - 1; // Make it 0-based.
order_by_info->emplace_back(order_by_expression, int_value,
order_by_expression->descending(),
Expand Down
5 changes: 5 additions & 0 deletions zetasql/analyzer/resolver_stmt.cc
Expand Up @@ -2168,6 +2168,11 @@ absl::Status Resolver::ResolveCreateTableStatement(
query == nullptr || language().LanguageFeatureEnabled(
FEATURE_CREATE_TABLE_AS_SELECT_COLUMN_LIST)};

if (ast_statement->like_table_name() != nullptr) {
return MakeSqlErrorAt(ast_statement->like_table_name())
<< "CREATE TABLE LIKE is unsupported";
}

if (ast_statement->partition_by() != nullptr &&
!language().LanguageFeatureEnabled(FEATURE_CREATE_TABLE_PARTITION_BY)) {
return MakeSqlErrorAt(ast_statement->partition_by())
Expand Down
26 changes: 26 additions & 0 deletions zetasql/analyzer/resolver_test.cc
Expand Up @@ -985,6 +985,32 @@ TEST_F(ResolverTest, TestExpectedErrorMessage) {
expected_error_substr);
}

TEST_F(ResolverTest, ReturnsErrorWhenRequestedToOrderByZero) {
std::unique_ptr<ParserOutput> parser_output;
std::unique_ptr<const ResolvedStatement> resolved_ast;

const std::string query = "SELECT '' FROM UNNEST([]) ORDER BY 0";
ZETASQL_ASSERT_OK(ParseStatement(query, ParserOptions(), &parser_output));
EXPECT_THAT(
resolver_->ResolveStatement(query, parser_output->statement(),
&resolved_ast),
StatusIs(absl::StatusCode::kInvalidArgument,
HasSubstr("ORDER BY column number item is out of range.")));
}

TEST_F(ResolverTest, ReturnsErrorWhenRequestedToOrderByNegativeNumber) {
std::unique_ptr<ParserOutput> parser_output;
std::unique_ptr<const ResolvedStatement> resolved_ast;

const std::string query = "SELECT '' FROM UNNEST([]) ORDER BY -1";
ZETASQL_ASSERT_OK(ParseStatement(query, ParserOptions(), &parser_output));
EXPECT_THAT(
resolver_->ResolveStatement(query, parser_output->statement(),
&resolved_ast),
StatusIs(absl::StatusCode::kInvalidArgument,
HasSubstr("ORDER BY column number item is out of range.")));
}

TEST_F(ResolverTest, TestHasAnonymization) {
std::unique_ptr<ParserOutput> parser_output;
std::unique_ptr<const ResolvedStatement> resolved_statement;
Expand Down
4 changes: 2 additions & 2 deletions zetasql/base/logging.h
Expand Up @@ -70,7 +70,7 @@
// nothing is logged.
// Example:
//
// LOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
// ZETASQL_LOG_IF(INFO, num_cookies > 10) << "Got lots of cookies";
//
// severity: the severity of the log message, one of LogSeverity. The
// FATAL severity will terminate the program after the log is emitted.
Expand Down Expand Up @@ -344,7 +344,7 @@ class LogMessage {
};

// This class is used just to take an ostream type and make it a void type to
// satisfy the ternary operator in LOG_IF.
// satisfy the ternary operator in ZETASQL_LOG_IF.
// operator& is used because it has precedence lower than << but higher than :?
class LogMessageVoidify {
public:
Expand Down
6 changes: 3 additions & 3 deletions zetasql/compliance/BUILD
Expand Up @@ -68,9 +68,9 @@ cc_library(
"@com_google_absl//absl/strings",
"@com_google_absl//absl/time",
"@com_google_farmhash//:farmhash_fingerprint",
"@com_google_file_based_test_driver//:file_based_test_driver",
"@com_google_file_based_test_driver//:run_test_case_result",
"@com_google_file_based_test_driver//:test_case_options",
"@com_google_file_based_test_driver//file_based_test_driver",
"@com_google_file_based_test_driver//file_based_test_driver:run_test_case_result",
"@com_google_file_based_test_driver//file_based_test_driver:test_case_options",
"@com_google_googletest//:gtest_main",
"@com_google_protobuf//:protobuf",
"@com_googlesource_code_re2//:re2",
Expand Down
2 changes: 1 addition & 1 deletion zetasql/compliance/compliance_test_cases.cc
Expand Up @@ -160,7 +160,7 @@ static std::vector<std::string> GetTypeLabels(

// Returns a SQL literal for the value.
static std::string MakeLiteral(const Value& value) {
LOG_IF(FATAL, value.is_null()) << "Null value " << value.DebugString();
ZETASQL_LOG_IF(FATAL, value.is_null()) << "Null value " << value.DebugString();

switch (value.type_kind()) {
case TYPE_STRING:
Expand Down
2 changes: 1 addition & 1 deletion zetasql/compliance/sql_test_base.cc
Expand Up @@ -54,7 +54,7 @@
#include "absl/strings/string_view.h"
#include "absl/strings/strip.h"
#include "absl/time/time.h"
#include "file_based_test_driver.h"
#include "file_based_test_driver/file_based_test_driver.h"
#include "zetasql/base/file_util.h"
#include "zetasql/base/map_util.h"
#include "zetasql/base/source_location.h"
Expand Down
6 changes: 3 additions & 3 deletions zetasql/compliance/sql_test_base.h
Expand Up @@ -88,9 +88,9 @@
#include "zetasql/base/statusor.h"
#include "absl/strings/string_view.h"
#include "absl/time/time.h"
#include "file_based_test_driver.h"
#include "run_test_case_result.h"
#include "test_case_options.h"
#include "file_based_test_driver/file_based_test_driver.h"
#include "file_based_test_driver/run_test_case_result.h"
#include "file_based_test_driver/test_case_options.h"
#include "re2/re2.h"
#include "zetasql/base/status.h"

Expand Down

0 comments on commit 862a192

Please sign in to comment.