Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

S3: Implement restore object with select job type #7504

Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
20 changes: 20 additions & 0 deletions moto/s3/exceptions.py
Expand Up @@ -604,3 +604,23 @@ def __init__(self) -> None:
"DaysMustProvidedExceptForSelectRequest",
"`Days` must be provided except for select requests",
)


class InvalidRestoreRequestType(S3ClientError):
code = 400

def __init__(self) -> None:
super().__init__(
"InvalidRestoreRequestType",
"Invalid type for restore request. It must be `SELECT`",
)


class MissingRequiredParametersForSelectRequest(S3ClientError):
code = 400

def __init__(self, parameter_name: str) -> None:
super().__init__(
"MissingRequiredParametersForSelectRequest",
f'Missing required parameter in RestoreRequest: "{parameter_name}" for select request',
)
74 changes: 73 additions & 1 deletion moto/s3/models.py
Expand Up @@ -48,12 +48,14 @@
InvalidPart,
InvalidPublicAccessBlockConfiguration,
InvalidRequest,
InvalidRestoreRequestType,
InvalidStorageClass,
InvalidTagError,
InvalidTargetBucketForLogging,
MalformedXML,
MissingBucket,
MissingKey,
MissingRequiredParametersForSelectRequest,
NoSuchPublicAccessBlockConfiguration,
NoSuchUpload,
ObjectLockConfigurationNotFoundError,
Expand Down Expand Up @@ -2885,7 +2887,13 @@
]

def restore_object(
self, bucket_name: str, key_name: str, days: Optional[str], type_: Optional[str]
self,
bucket_name: str,
key_name: str,
days: Optional[str],
type_: Optional[str],
select_parameters: Optional[Dict[str, Any]],
output_location: Optional[Dict[str, Any]],
) -> bool:
key = self.get_object(bucket_name, key_name)
if not key:
Expand All @@ -2897,8 +2905,72 @@
if days and type_:
raise DaysMustNotProvidedForSelectRequest()

if type_ and type_ != "SELECT":
raise InvalidRestoreRequestType()

if type_ and select_parameters is None:
raise MissingRequiredParametersForSelectRequest("SelectParameters")

if type_ and output_location is None:
raise MissingRequiredParametersForSelectRequest("OutputLocation")

if key.storage_class not in ARCHIVE_STORAGE_CLASSES:
raise InvalidObjectState(storage_class=key.storage_class)

# select request
if type_:
restored_key = copy.deepcopy(key)
output_location_s3 = output_location["S3"]

# TODO: 'Encryption', 'CannedACL', 'AccessControlList', 'Tagging' and 'UserMetadata' are need to be supported for OutputLocation.S3 configuration.
# set bucket name
_bucket_name = output_location_s3.get("BucketName")
if _bucket_name is not None:
# check if bucket is exists here.
bucket = self.get_bucket(_bucket_name)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check should be done as early as possible as well

else:
bucket = self.bucket(key.bucket_name)
restored_key.bucket_name = bucket.name

# set key name
_key_name = output_location_s3.get("Prefix")
if _key_name is not None:

Check warning on line 2937 in moto/s3/models.py

View check run for this annotation

Codecov / codecov/patch

moto/s3/models.py#L2937

Added line #L2937 was not covered by tests
restored_key.name = _key_name
key_name = _key_name
else:
key_name = key.name

# set storage class
_storage = output_location_s3.get("StorageClass")
if _storage is not None:
restored_key.set_storage_class(_storage)

Check warning on line 2946 in moto/s3/models.py

View check run for this annotation

Codecov / codecov/patch

moto/s3/models.py#L2946

Added line #L2946 was not covered by tests

# set value
contents = self.select_object_content(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The select_object_content should be done first, or at least after the validation. This operation can fail because of an invalid query, so we should only try to create new S3 objects after we know that succeeds

key.bucket_name,
key.name,
select_parameters["Expression"],
select_parameters["InputSerialization"],
)

# serialize selected object
# TODO: Delimiter extraction is also defined in `serialize_select` method in `moto/s3/select_object_content.py`. This part might be extracted into a common method.
delimiter = (
(select_parameters["OutputSerialization"].get("JSON") or {}).get(
"RecordDelimiter"
)
or "\n"
).encode("utf-8")
payload_bytes = b""
for data in contents:
payload_bytes += data + delimiter
restored_key.value = payload_bytes

# save object to bucket
bucket.keys.setlist(key_name, restored_key)
return True

# restore object request
had_expiry_date = key.expiry_date is not None
if days:
key.restore(int(days))
Expand Down
2 changes: 2 additions & 0 deletions moto/s3/responses.py
Expand Up @@ -2278,6 +2278,8 @@ def _key_response_post(
key_name,
params.get("Days", None),
params.get("Type", None),
params.get("SelectParameters", None),
params.get("OutputLocation", None),
)
status_code = 200 if previously_restored else 202
return status_code, {}, ""
Expand Down
131 changes: 130 additions & 1 deletion tests/test_s3/test_s3.py
Expand Up @@ -596,6 +596,55 @@ def test_restore_key_transition():
state_manager.unset_transition(model_name="s3::keyrestore")


@freeze_time("2012-01-01 12:00:00")
@mock_aws
def test_restore_key_with_select():
if not settings.TEST_DECORATOR_MODE:
raise SkipTest("Can't set transition directly in ServerMode")

state_manager.set_transition(
model_name="s3::keyrestore", transition={"progression": "manual", "times": 1}
)

s3_resource = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
bucket_name = "foobar"
bucket = s3_resource.Bucket(bucket_name)
bucket.create()

key = bucket.put_object(
Key="the-key", Body=json.dumps({"a1": "b1", "a2": "b2"}), StorageClass="GLACIER"
)
assert key.restore is None

# restore successfully completed
restored_key_name = "restored.json"
key.restore_object(
RestoreRequest={
"Type": "SELECT",
"SelectParameters": {
"Expression": "SELECT COUNT(*) as cnt FROM S3Object",
"ExpressionType": "SQL",
"InputSerialization": {"JSON": {"Type": "DOCUMENT"}},
"OutputSerialization": {"JSON": {"RecordDelimiter": ","}},
},
"OutputLocation": {
"S3": {
"BucketName": bucket_name,
"Prefix": restored_key_name,
"StorageClass": "STANDARD",
}
},
}
)
resp = boto3.client("s3", region_name=DEFAULT_REGION_NAME).get_object(
Bucket=bucket_name, Key=restored_key_name
)
assert resp["ContentLength"] == 9
assert resp["Body"].read() == b'{"_1":1},'

state_manager.unset_transition(model_name="s3::keyrestore")


@mock_aws
def test_restore_unknown_key():
client = boto3.client("s3", region_name=DEFAULT_REGION_NAME)
Expand Down Expand Up @@ -633,7 +682,8 @@ def test_restore_object_invalid_request_params():
raise SkipTest("Can't set transition directly in ServerMode")

s3_resource = boto3.resource("s3", region_name=DEFAULT_REGION_NAME)
bucket = s3_resource.Bucket("foobar")
bucket_name = "foobar"
bucket = s3_resource.Bucket(bucket_name)
bucket.create()

key = bucket.put_object(Key="the-key", Body=b"somedata", StorageClass="GLACIER")
Expand All @@ -652,6 +702,85 @@ def test_restore_object_invalid_request_params():
assert err["Code"] == "DaysMustNotProvidedForSelectRequest"
assert err["Message"] == "`Days` must not be provided for select requests"

# Invalid type for restore request. It must be `SELECT`.
with pytest.raises(ClientError) as exc:
key.restore_object(RestoreRequest={"Type": "INVALID_TYPE"})
err = exc.value.response["Error"]
assert err["Code"] == "InvalidRestoreRequestType"
assert err["Message"] == "Invalid type for restore request. It must be `SELECT`"

# `SelectParameters` must be provided for select requests.
with pytest.raises(ClientError) as exc:
key.restore_object(RestoreRequest={"Type": "SELECT"})
err = exc.value.response["Error"]
assert err["Code"] == "MissingRequiredParametersForSelectRequest"
assert (
err["Message"]
== 'Missing required parameter in RestoreRequest: "SelectParameters" for select request'
)

# `OutputLocation` must be provided for select requests.
with pytest.raises(ClientError) as exc:
key.restore_object(
RestoreRequest={
"Type": "SELECT",
"SelectParameters": {
"Expression": "SELECT * FROM S3Object",
"ExpressionType": "SQL",
"InputSerialization": {"JSON": {"Type": "DOCUMENT"}},
"OutputSerialization": {"JSON": {"RecordDelimiter": ","}},
},
},
)
err = exc.value.response["Error"]
assert err["Code"] == "MissingRequiredParametersForSelectRequest"
assert (
err["Message"]
== 'Missing required parameter in RestoreRequest: "OutputLocation" for select request'
)

# raise error when supecified bucket does not exists.
with pytest.raises(ClientError) as exc:
key.restore_object(
RestoreRequest={
"Type": "SELECT",
"SelectParameters": {
"Expression": "SELECT COUNT(*) as cnt FROM S3Object",
"ExpressionType": "SQL",
"InputSerialization": {"JSON": {"Type": "DOCUMENT"}},
"OutputSerialization": {"JSON": {"RecordDelimiter": ","}},
},
"OutputLocation": {
"S3": {"BucketName": "other-bucket", "Prefix": "test"}
},
}
)
err = exc.value.response["Error"]
err["Code"] == "NoSuchBucket"

# raise error when specified storage class is invalid
with pytest.raises(ClientError) as exc:
key.restore_object(
RestoreRequest={
"Type": "SELECT",
"SelectParameters": {
"Expression": "SELECT COUNT(*) as cnt FROM S3Object",
"ExpressionType": "SQL",
"InputSerialization": {"JSON": {"Type": "DOCUMENT"}},
"OutputSerialization": {"JSON": {"RecordDelimiter": ","}},
},
"OutputLocation": {
"S3": {
"BucketName": bucket_name,
"Prefix": "test.json",
"StorageClass": "INVALID_CLASS",
}
},
}
)
err = exc.value.response["Error"]
err["Code"] == "InvalidStorageClass"


@mock_aws
def test_get_versioning_status():
Expand Down