Skip to content

Commit

Permalink
feat(client-glue): This release adds support for AWS Glue Crawler wit…
Browse files Browse the repository at this point in the history
…h Iceberg Tables, allowing Crawlers to discover Iceberg Tables in S3 and register them in Glue Data Catalog for query engines to query against.
  • Loading branch information
awstools committed Jun 29, 2023
1 parent cef0845 commit 2a11fd8
Show file tree
Hide file tree
Showing 10 changed files with 216 additions and 68 deletions.
12 changes: 12 additions & 0 deletions clients/client-glue/src/commands/BatchGetCrawlersCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,18 @@ export interface BatchGetCrawlersCommandOutput extends BatchGetCrawlersResponse,
* // CreateNativeDeltaTable: true || false,
* // },
* // ],
* // IcebergTargets: [ // IcebergTargetList
* // { // IcebergTarget
* // Paths: [
* // "STRING_VALUE",
* // ],
* // ConnectionName: "STRING_VALUE",
* // Exclusions: [
* // "STRING_VALUE",
* // ],
* // MaximumTraversalDepth: Number("int"),
* // },
* // ],
* // },
* // DatabaseName: "STRING_VALUE",
* // Description: "STRING_VALUE",
Expand Down
12 changes: 12 additions & 0 deletions clients/client-glue/src/commands/CreateCrawlerCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,18 @@ export interface CreateCrawlerCommandOutput extends CreateCrawlerResponse, __Met
* CreateNativeDeltaTable: true || false,
* },
* ],
* IcebergTargets: [ // IcebergTargetList
* { // IcebergTarget
* Paths: [
* "STRING_VALUE",
* ],
* ConnectionName: "STRING_VALUE",
* Exclusions: [
* "STRING_VALUE",
* ],
* MaximumTraversalDepth: Number("int"),
* },
* ],
* },
* Schedule: "STRING_VALUE",
* Classifiers: [ // ClassifierNameList
Expand Down
12 changes: 12 additions & 0 deletions clients/client-glue/src/commands/GetCrawlerCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,18 @@ export interface GetCrawlerCommandOutput extends GetCrawlerResponse, __MetadataB
* // CreateNativeDeltaTable: true || false,
* // },
* // ],
* // IcebergTargets: [ // IcebergTargetList
* // { // IcebergTarget
* // Paths: [
* // "STRING_VALUE",
* // ],
* // ConnectionName: "STRING_VALUE",
* // Exclusions: [
* // "STRING_VALUE",
* // ],
* // MaximumTraversalDepth: Number("int"),
* // },
* // ],
* // },
* // DatabaseName: "STRING_VALUE",
* // Description: "STRING_VALUE",
Expand Down
12 changes: 12 additions & 0 deletions clients/client-glue/src/commands/GetCrawlersCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,18 @@ export interface GetCrawlersCommandOutput extends GetCrawlersResponse, __Metadat
* // CreateNativeDeltaTable: true || false,
* // },
* // ],
* // IcebergTargets: [ // IcebergTargetList
* // { // IcebergTarget
* // Paths: [
* // "STRING_VALUE",
* // ],
* // ConnectionName: "STRING_VALUE",
* // Exclusions: [
* // "STRING_VALUE",
* // ],
* // MaximumTraversalDepth: Number("int"),
* // },
* // ],
* // },
* // DatabaseName: "STRING_VALUE",
* // Description: "STRING_VALUE",
Expand Down
12 changes: 12 additions & 0 deletions clients/client-glue/src/commands/UpdateCrawlerCommand.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,18 @@ export interface UpdateCrawlerCommandOutput extends UpdateCrawlerResponse, __Met
* CreateNativeDeltaTable: true || false,
* },
* ],
* IcebergTargets: [ // IcebergTargetList
* { // IcebergTarget
* Paths: [
* "STRING_VALUE",
* ],
* ConnectionName: "STRING_VALUE",
* Exclusions: [
* "STRING_VALUE",
* ],
* MaximumTraversalDepth: Number("int"),
* },
* ],
* },
* Schedule: "STRING_VALUE",
* Classifiers: [ // ClassifierNameList
Expand Down
49 changes: 32 additions & 17 deletions clients/client-glue/src/models/models_0.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1658,6 +1658,33 @@ export interface DynamoDBTarget {
scanRate?: number;
}

/**
* @public
* <p>Specifies an Apache Iceberg data source where Iceberg tables are stored in Amazon S3.</p>
*/
export interface IcebergTarget {
/**
* <p>One or more Amazon S3 paths that contains Iceberg metadata folders as <code>s3://bucket/prefix</code>.</p>
*/
Paths?: string[];

/**
* <p>The name of the connection to use to connect to the Iceberg target.</p>
*/
ConnectionName?: string;

/**
* <p>A list of glob patterns used to exclude from the crawl.
* For more information, see <a href="https://docs.aws.amazon.com/glue/latest/dg/add-crawler.html">Catalog Tables with a Crawler</a>.</p>
*/
Exclusions?: string[];

/**
* <p>The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time.</p>
*/
MaximumTraversalDepth?: number;
}

/**
* @public
* @enum
Expand Down Expand Up @@ -1793,6 +1820,11 @@ export interface CrawlerTargets {
* <p>Specifies Delta data store targets.</p>
*/
DeltaTargets?: DeltaTarget[];

/**
* <p>Specifies Apache Iceberg data store targets.</p>
*/
IcebergTargets?: IcebergTarget[];
}

/**
Expand Down Expand Up @@ -8199,20 +8231,3 @@ export interface MLUserDataEncryption {
*/
KmsKeyId?: string;
}

/**
* @public
* <p>The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.</p>
* <p>Additionally, imported labels and trained transforms can now be encrypted using a customer provided KMS key.</p>
*/
export interface TransformEncryption {
/**
* <p>An <code>MLUserDataEncryption</code> object containing the encryption mode and customer-provided KMS key ID.</p>
*/
MlUserDataEncryption?: MLUserDataEncryption;

/**
* <p>The name of the security configuration.</p>
*/
TaskRunSecurityConfigurationName?: string;
}
66 changes: 18 additions & 48 deletions clients/client-glue/src/models/models_1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import {
FederatedDatabase,
GlueTable,
JobRun,
MLUserDataEncryption,
Partition,
PartitionInput,
PartitionValueList,
Expand All @@ -31,14 +32,30 @@ import {
SchemaId,
StorageDescriptor,
TaskStatusType,
TransformEncryption,
TransformParameters,
TransformType,
Trigger,
TriggerType,
WorkerType,
} from "./models_0";

/**
* @public
* <p>The encryption-at-rest settings of the transform that apply to accessing user data. Machine learning transforms can access user data encrypted in Amazon S3 using KMS.</p>
* <p>Additionally, imported labels and trained transforms can now be encrypted using a customer provided KMS key.</p>
*/
export interface TransformEncryption {
/**
* <p>An <code>MLUserDataEncryption</code> object containing the encryption mode and customer-provided KMS key ID.</p>
*/
MlUserDataEncryption?: MLUserDataEncryption;

/**
* <p>The name of the security configuration.</p>
*/
TaskRunSecurityConfigurationName?: string;
}

/**
* @public
*/
Expand Down Expand Up @@ -7215,50 +7232,3 @@ export interface GetUserDefinedFunctionRequest {
*/
FunctionName: string | undefined;
}

/**
* @public
* <p>Represents the equivalent of a Hive user-defined function
* (<code>UDF</code>) definition.</p>
*/
export interface UserDefinedFunction {
/**
* <p>The name of the function.</p>
*/
FunctionName?: string;

/**
* <p>The name of the catalog database that contains the function.</p>
*/
DatabaseName?: string;

/**
* <p>The Java class that contains the function code.</p>
*/
ClassName?: string;

/**
* <p>The owner of the function.</p>
*/
OwnerName?: string;

/**
* <p>The owner type.</p>
*/
OwnerType?: PrincipalType | string;

/**
* <p>The time at which the function was created.</p>
*/
CreateTime?: Date;

/**
* <p>The resource URIs for the function.</p>
*/
ResourceUris?: ResourceUri[];

/**
* <p>The ID of the Data Catalog in which the function resides.</p>
*/
CatalogId?: string;
}
50 changes: 49 additions & 1 deletion clients/client-glue/src/models/models_2.ts
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,11 @@ import {
DataCatalogEncryptionSettings,
DataQualityEvaluationRunAdditionalRunOptions,
JobBookmarkEntry,
PrincipalType,
RegistryId,
RegistryStatus,
ResourceShareType,
ResourceUri,
SchemaStatus,
SchemaVersionNumber,
SchemaVersionStatus,
Expand All @@ -118,10 +120,56 @@ import {
TableInput,
TransformFilterCriteria,
TransformSortCriteria,
UserDefinedFunction,
UserDefinedFunctionInput,
} from "./models_1";

/**
* @public
* <p>Represents the equivalent of a Hive user-defined function
* (<code>UDF</code>) definition.</p>
*/
export interface UserDefinedFunction {
/**
* <p>The name of the function.</p>
*/
FunctionName?: string;

/**
* <p>The name of the catalog database that contains the function.</p>
*/
DatabaseName?: string;

/**
* <p>The Java class that contains the function code.</p>
*/
ClassName?: string;

/**
* <p>The owner of the function.</p>
*/
OwnerName?: string;

/**
* <p>The owner type.</p>
*/
OwnerType?: PrincipalType | string;

/**
* <p>The time at which the function was created.</p>
*/
CreateTime?: Date;

/**
* <p>The resource URIs for the function.</p>
*/
ResourceUris?: ResourceUri[];

/**
* <p>The ID of the Data Catalog in which the function resides.</p>
*/
CatalogId?: string;
}

/**
* @public
*/
Expand Down
15 changes: 13 additions & 2 deletions clients/client-glue/src/protocols/Aws_json1_1.ts
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ import {
GlueTable,
GovernedCatalogSource,
GovernedCatalogTarget,
IcebergTarget,
IdempotentParameterMismatchException,
IllegalSessionStateException,
InternalServiceException,
Expand Down Expand Up @@ -650,7 +651,6 @@ import {
StorageDescriptor,
StreamingDataPreviewOptions,
TransformConfigParameter,
TransformEncryption,
TransformParameters,
Union,
UpsertRedshiftTargetOptions,
Expand Down Expand Up @@ -863,10 +863,10 @@ import {
TaskRun,
TaskRunFilterCriteria,
TaskRunSortCriteria,
TransformEncryption,
TransformFilterCriteria,
TransformSortCriteria,
UnfilteredPartition,
UserDefinedFunction,
UserDefinedFunctionInput,
XMLClassifier,
} from "../models/models_1";
Expand Down Expand Up @@ -999,6 +999,7 @@ import {
UpdateUserDefinedFunctionRequest,
UpdateWorkflowRequest,
UpdateXMLClassifierRequest,
UserDefinedFunction,
VersionMismatchException,
} from "../models/models_2";

Expand Down Expand Up @@ -15959,6 +15960,7 @@ const se_CrawlerTargets = (input: CrawlerTargets, context: __SerdeContext): any
CatalogTargets: _json,
DeltaTargets: _json,
DynamoDBTargets: (_) => se_DynamoDBTargetList(_, context),
IcebergTargets: _json,
JdbcTargets: _json,
MongoDBTargets: _json,
S3Targets: _json,
Expand Down Expand Up @@ -16654,6 +16656,10 @@ const se_GetTablesRequest = (input: GetTablesRequest, context: __SerdeContext):

// se_GovernedCatalogTarget omitted.

// se_IcebergTarget omitted.

// se_IcebergTargetList omitted.

// se_ImportCatalogToGlueRequest omitted.

// se_JDBCConnectorOptions omitted.
Expand Down Expand Up @@ -18199,6 +18205,7 @@ const de_CrawlerTargets = (output: any, context: __SerdeContext): CrawlerTargets
CatalogTargets: _json,
DeltaTargets: _json,
DynamoDBTargets: (_: any) => de_DynamoDBTargetList(_, context),
IcebergTargets: _json,
JdbcTargets: _json,
MongoDBTargets: _json,
S3Targets: _json,
Expand Down Expand Up @@ -19553,6 +19560,10 @@ const de_GrokClassifier = (output: any, context: __SerdeContext): GrokClassifier
}) as any;
};

// de_IcebergTarget omitted.

// de_IcebergTargetList omitted.

// de_IdempotentParameterMismatchException omitted.

// de_IllegalBlueprintStateException omitted.
Expand Down
Loading

0 comments on commit 2a11fd8

Please sign in to comment.