Skip to content

Commit

Permalink
[SPARK-42901][CONNECT][PYTHON] Move StorageLevel into a separate fi…
Browse files Browse the repository at this point in the history
…le to avoid potential `file recursively imports`

#40510 introduce `message StorageLevel` to `base.proto`, but if we try to import `base.proto` in `catalog.proto` to reuse `StorageLevel` in `message CacheTable` and run `build/sbt "connect-common/compile" to compile, there will be following message in compile log:

```
spark/connect/base.proto:23:1: File recursively imports itself: spark/connect/base.proto -> spark/connect/commands.proto -> spark/connect/relations.proto -> spark/connect/catalog.proto -> spark/connect/base.proto
spark/connect/catalog.proto:22:1: Import "spark/connect/base.proto" was not found or had errors.
spark/connect/catalog.proto:144:12: "spark.connect.DataType" seems to be defined in "spark/connect/types.proto", which is not imported by "spark/connect/catalog.proto".  To use it here, please add the necessary import.
spark/connect/catalog.proto:161:12: "spark.connect.DataType" seems to be defined in "spark/connect/types.proto", which is not imported by "spark/connect/catalog.proto".  To use it here, please add the necessary import.
spark/connect/relations.proto:25:1: Import "spark/connect/catalog.proto" was not found or had errors.
spark/connect/relations.proto:84:5: "Catalog" is not defined.
spark/connect/commands.proto:22:1: Import "spark/connect/relations.proto" was not found or had errors.
spark/connect/commands.proto:63:3: "Relation" is not defined.
spark/connect/commands.proto:81:3: "Relation" is not defined.
spark/connect/commands.proto:142:3: "Relation" is not defined.
spark/connect/base.proto:23:1: Import "spark/connect/commands.proto" was not found or had errors.
spark/connect/base.proto:25:1: Import "spark/connect/relations.proto" was not found or had errors.
....
```

So this pr move `message StorageLevel` to a separate file to avoid this potential file recursively imports.

To avoid potential file recursively imports.

No

- Pass GitHub Actions
- Manual check:
   - Add `import "spark/connect/common.proto";` to `catalog.proto`
   - run `build/sbt "connect-common/compile"`

No compilation logs related to `File recursively imports itself` .

Closes #40518 from LuciferYang/SPARK-42889-FOLLOWUP.

Lead-authored-by: yangjie01 <yangjie01@baidu.com>
Co-authored-by: YangJie <yangjie01@baidu.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 88cc239)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
LuciferYang authored and HyukjinKwon committed Mar 23, 2023
1 parent a1b853c commit 253cb7a
Show file tree
Hide file tree
Showing 7 changed files with 312 additions and 198 deletions.
Expand Up @@ -21,6 +21,7 @@ package spark.connect;

import "google/protobuf/any.proto";
import "spark/connect/commands.proto";
import "spark/connect/common.proto";
import "spark/connect/expressions.proto";
import "spark/connect/relations.proto";
import "spark/connect/types.proto";
Expand Down Expand Up @@ -54,20 +55,6 @@ message UserContext {
repeated google.protobuf.Any extensions = 999;
}

// StorageLevel for persisting Datasets/Tables.
message StorageLevel {
// (Required) Whether the cache should use disk or not.
bool use_disk = 1;
// (Required) Whether the cache should use memory or not.
bool use_memory = 2;
// (Required) Whether the cache should use off-heap or not.
bool use_off_heap = 3;
// (Required) Whether the cached data is deserialized or not.
bool deserialized = 4;
// (Required) The number of replicas.
int32 replication = 5;
}

// Request to perform plan analyze, optionally to explain the plan.
message AnalyzePlanRequest {
// (Required)
Expand Down
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

syntax = 'proto3';

package spark.connect;

option java_multiple_files = true;
option java_package = "org.apache.spark.connect.proto";

// StorageLevel for persisting Datasets/Tables.
message StorageLevel {
// (Required) Whether the cache should use disk or not.
bool use_disk = 1;
// (Required) Whether the cache should use memory or not.
bool use_memory = 2;
// (Required) Whether the cache should use off-heap or not.
bool use_off_heap = 3;
// (Required) Whether the cached data is deserialized or not.
bool deserialized = 4;
// (Required) The number of replicas.
int32 replication = 5;
}
1 change: 1 addition & 0 deletions python/pyspark/sql/connect/proto/__init__.py
Expand Up @@ -22,3 +22,4 @@
from pyspark.sql.connect.proto.expressions_pb2 import *
from pyspark.sql.connect.proto.relations_pb2 import *
from pyspark.sql.connect.proto.catalog_pb2 import *
from pyspark.sql.connect.proto.common_pb2 import *
253 changes: 120 additions & 133 deletions python/pyspark/sql/connect/proto/base_pb2.py

Large diffs are not rendered by default.

56 changes: 5 additions & 51 deletions python/pyspark/sql/connect/proto/base_pb2.pyi
Expand Up @@ -41,6 +41,7 @@ import google.protobuf.internal.containers
import google.protobuf.internal.enum_type_wrapper
import google.protobuf.message
import pyspark.sql.connect.proto.commands_pb2
import pyspark.sql.connect.proto.common_pb2
import pyspark.sql.connect.proto.expressions_pb2
import pyspark.sql.connect.proto.relations_pb2
import pyspark.sql.connect.proto.types_pb2
Expand Down Expand Up @@ -132,53 +133,6 @@ class UserContext(google.protobuf.message.Message):

global___UserContext = UserContext

class StorageLevel(google.protobuf.message.Message):
"""StorageLevel for persisting Datasets/Tables."""

DESCRIPTOR: google.protobuf.descriptor.Descriptor

USE_DISK_FIELD_NUMBER: builtins.int
USE_MEMORY_FIELD_NUMBER: builtins.int
USE_OFF_HEAP_FIELD_NUMBER: builtins.int
DESERIALIZED_FIELD_NUMBER: builtins.int
REPLICATION_FIELD_NUMBER: builtins.int
use_disk: builtins.bool
"""(Required) Whether the cache should use disk or not."""
use_memory: builtins.bool
"""(Required) Whether the cache should use memory or not."""
use_off_heap: builtins.bool
"""(Required) Whether the cache should use off-heap or not."""
deserialized: builtins.bool
"""(Required) Whether the cached data is deserialized or not."""
replication: builtins.int
"""(Required) The number of replicas."""
def __init__(
self,
*,
use_disk: builtins.bool = ...,
use_memory: builtins.bool = ...,
use_off_heap: builtins.bool = ...,
deserialized: builtins.bool = ...,
replication: builtins.int = ...,
) -> None: ...
def ClearField(
self,
field_name: typing_extensions.Literal[
"deserialized",
b"deserialized",
"replication",
b"replication",
"use_disk",
b"use_disk",
"use_memory",
b"use_memory",
"use_off_heap",
b"use_off_heap",
],
) -> None: ...

global___StorageLevel = StorageLevel

class AnalyzePlanRequest(google.protobuf.message.Message):
"""Request to perform plan analyze, optionally to explain the plan."""

Expand Down Expand Up @@ -423,13 +377,13 @@ class AnalyzePlanRequest(google.protobuf.message.Message):
def relation(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
"""(Required) The logical plan to persist."""
@property
def storage_level(self) -> global___StorageLevel:
def storage_level(self) -> pyspark.sql.connect.proto.common_pb2.StorageLevel:
"""(Optional) The storage level."""
def __init__(
self,
*,
relation: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
storage_level: global___StorageLevel | None = ...,
storage_level: pyspark.sql.connect.proto.common_pb2.StorageLevel | None = ...,
) -> None: ...
def HasField(
self,
Expand Down Expand Up @@ -866,12 +820,12 @@ class AnalyzePlanResponse(google.protobuf.message.Message):

STORAGE_LEVEL_FIELD_NUMBER: builtins.int
@property
def storage_level(self) -> global___StorageLevel:
def storage_level(self) -> pyspark.sql.connect.proto.common_pb2.StorageLevel:
"""(Required) The StorageLevel as a result of get_storage_level request."""
def __init__(
self,
*,
storage_level: global___StorageLevel | None = ...,
storage_level: pyspark.sql.connect.proto.common_pb2.StorageLevel | None = ...,
) -> None: ...
def HasField(
self, field_name: typing_extensions.Literal["storage_level", b"storage_level"]
Expand Down
55 changes: 55 additions & 0 deletions python/pyspark/sql/connect/proto/common_pb2.py
@@ -0,0 +1,55 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: spark/connect/common.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database

# @@protoc_insertion_point(imports)

_sym_db = _symbol_database.Default()


DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
b'\n\x1aspark/connect/common.proto\x12\rspark.connect"\xb0\x01\n\x0cStorageLevel\x12\x19\n\x08use_disk\x18\x01 \x01(\x08R\x07useDisk\x12\x1d\n\nuse_memory\x18\x02 \x01(\x08R\tuseMemory\x12 \n\x0cuse_off_heap\x18\x03 \x01(\x08R\nuseOffHeap\x12"\n\x0c\x64\x65serialized\x18\x04 \x01(\x08R\x0c\x64\x65serialized\x12 \n\x0breplication\x18\x05 \x01(\x05R\x0breplicationB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
)


_STORAGELEVEL = DESCRIPTOR.message_types_by_name["StorageLevel"]
StorageLevel = _reflection.GeneratedProtocolMessageType(
"StorageLevel",
(_message.Message,),
{
"DESCRIPTOR": _STORAGELEVEL,
"__module__": "spark.connect.common_pb2"
# @@protoc_insertion_point(class_scope:spark.connect.StorageLevel)
},
)
_sym_db.RegisterMessage(StorageLevel)

if _descriptor._USE_C_DESCRIPTORS == False:

DESCRIPTOR._options = None
DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
_STORAGELEVEL._serialized_start = 46
_STORAGELEVEL._serialized_end = 222
# @@protoc_insertion_point(module_scope)
93 changes: 93 additions & 0 deletions python/pyspark/sql/connect/proto/common_pb2.pyi
@@ -0,0 +1,93 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""
@generated by mypy-protobuf. Do not edit manually!
isort:skip_file
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import builtins
import google.protobuf.descriptor
import google.protobuf.message
import sys

if sys.version_info >= (3, 8):
import typing as typing_extensions
else:
import typing_extensions

DESCRIPTOR: google.protobuf.descriptor.FileDescriptor

class StorageLevel(google.protobuf.message.Message):
"""StorageLevel for persisting Datasets/Tables."""

DESCRIPTOR: google.protobuf.descriptor.Descriptor

USE_DISK_FIELD_NUMBER: builtins.int
USE_MEMORY_FIELD_NUMBER: builtins.int
USE_OFF_HEAP_FIELD_NUMBER: builtins.int
DESERIALIZED_FIELD_NUMBER: builtins.int
REPLICATION_FIELD_NUMBER: builtins.int
use_disk: builtins.bool
"""(Required) Whether the cache should use disk or not."""
use_memory: builtins.bool
"""(Required) Whether the cache should use memory or not."""
use_off_heap: builtins.bool
"""(Required) Whether the cache should use off-heap or not."""
deserialized: builtins.bool
"""(Required) Whether the cached data is deserialized or not."""
replication: builtins.int
"""(Required) The number of replicas."""
def __init__(
self,
*,
use_disk: builtins.bool = ...,
use_memory: builtins.bool = ...,
use_off_heap: builtins.bool = ...,
deserialized: builtins.bool = ...,
replication: builtins.int = ...,
) -> None: ...
def ClearField(
self,
field_name: typing_extensions.Literal[
"deserialized",
b"deserialized",
"replication",
b"replication",
"use_disk",
b"use_disk",
"use_memory",
b"use_memory",
"use_off_heap",
b"use_off_heap",
],
) -> None: ...

global___StorageLevel = StorageLevel

0 comments on commit 253cb7a

Please sign in to comment.