Skip to content
Permalink
Browse files
docs(samples): create and query Amazon s3 data using external table (#…
…835)

* docs(samples): add omni query on external table aws

* docs(samples): update code

* docs(samples): add user id and external id

* docs(samples): fix connection name

* docs(samples): update create external table AWS sample

* update create dataset AWS sample

* nit clean up

* update query external table sample

* fix checkstyle errors

* update based on comments

Co-authored-by: Praful Makani <praful@qlogic.io>
  • Loading branch information
stephaniewang526 and Praful Makani committed Oct 21, 2020
1 parent 36b5f06 commit 53a56be8b0878edbc4bc5dd67ed63b48c96d3899
@@ -16,46 +16,62 @@

package com.example.bigquery;

// [START bigquery_omni_create_external_table]
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;

// Sample to create an external aws table
public class CreateExternalTableAws {

public static void main(String[] args) {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String tableName = "MY_TABLE_NAME";
// Create a aws connection
// projects/{project_id}/locations/{location_id}/connections/{connection_id}
String connectionId = "MY_CONNECTION_NAME";
String connectionId = "MY_CONNECTION_ID";
String sourceUri = "s3://your-bucket-name/";
CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
ExternalTableDefinition externalTableDefinition =
ExternalTableDefinition.newBuilder(sourceUri, options)
.setConnectionId(connectionId)
.setSchema(schema)
.build();
createExternalTableAws(datasetName, tableName, externalTableDefinition);
createExternalTableAws(projectId, datasetName, tableName, externalTableDefinition);
}

public static void createExternalTableAws(
String datasetName, String tableName, ExternalTableDefinition externalTableDefinition) {
String projectId,
String datasetName,
String tableName,
ExternalTableDefinition externalTableDefinition) {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

TableId tableId = TableId.of(datasetName, tableName);
TableId tableId = TableId.of(projectId, datasetName, tableName);
TableInfo tableInfo = TableInfo.newBuilder(tableId, externalTableDefinition).build();

bigquery.create(tableInfo);
System.out.println("Aws external table created successfully");

// Clean up
bigquery.delete(TableId.of(projectId, datasetName, tableName));
} catch (BigQueryException e) {
System.out.println("Aws external was not created." + e.toString());
}
}
}
// [END bigquery_omni_create_external_table]
@@ -0,0 +1,68 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

// [START bigquery_omni_query_external_aws_s3]
import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryException;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.DatasetId;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.QueryJobConfiguration;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.TableResult;

// Sample to queries an external data source aws s3 using a permanent table
public class QueryExternalTableAws {

public static void main(String[] args) throws InterruptedException {
// TODO(developer): Replace these variables before running the sample.
String projectId = "MY_PROJECT_ID";
String datasetName = "MY_DATASET_NAME";
String externalTableName = "MY_EXTERNAL_TABLE_NAME";
// Query to find states starting with 'W'
String query =
String.format(
"SELECT * FROM s%.%s.%s WHERE name LIKE 'W%%'",
projectId, datasetName, externalTableName);
queryExternalTableAws(query);
}

public static void queryExternalTableAws(String query) throws InterruptedException {
try {
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests.
BigQuery bigquery = BigQueryOptions.getDefaultInstance().getService();

TableResult results = bigquery.query(QueryJobConfiguration.of(query));

results
.iterateAll()
.forEach(row -> row.forEach(val -> System.out.printf("%s,", val.toString())));

System.out.println("Query on aws external permanent table performed successfully.");
} catch (BigQueryException e) {
System.out.println("Query not performed \n" + e.toString());
}
}
}
// [END bigquery_omni_query_external_aws_s3]
@@ -39,7 +39,7 @@ public class CreateDatasetAwsIT {
private PrintStream out;
private PrintStream originalPrintStream;

private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
@@ -51,7 +51,7 @@ private static String requireEnvVar(String varName) {

@BeforeClass
public static void checkRequirements() {
requireEnvVar("GOOGLE_CLOUD_PROJECT");
requireEnvVar("OMNI_PROJECT_ID");
}

@Before
@@ -66,7 +66,7 @@ public void setUp() {
@After
public void tearDown() {
// Clean up
DeleteDataset.deleteDataset(PROJECT_ID, datasetName);
DeleteDataset.deleteDataset(OMNI_PROJECT_ID, datasetName);
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
@@ -75,7 +75,7 @@ public void tearDown() {

@Test
public void testCreateDatasetAws() {
CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION);
CreateDatasetAws.createDatasetAws(OMNI_PROJECT_ID, datasetName, LOCATION);
assertThat(bout.toString()).contains("Aws dataset created successfully :");
}
}
@@ -47,16 +47,14 @@ public class CreateExternalTableAwsIT {
private static final String ID = UUID.randomUUID().toString().substring(0, 8);
private static final String LOCATION = "aws-us-east-1";
private final Logger log = Logger.getLogger(this.getClass().getName());
private String datasetName;
private String tableName;
private String connectionName;
private ByteArrayOutputStream bout;
private PrintStream out;
private PrintStream originalPrintStream;

private static final String PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String AWS_ACCOUNT_ID = requireEnvVar("AWS_ACCOUNT_ID");
private static final String AWS_ROLE_ID = requireEnvVar("AWS_ROLE_ID");
private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME");
private static final String AWS_READ_CONNECTION_ID = requireEnvVar("AWS_READ_CONNECTION_ID");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
@@ -69,49 +67,21 @@ private static String requireEnvVar(String varName) {
@BeforeClass
public static void checkRequirements() {
requireEnvVar("OMNI_PROJECT_ID");
requireEnvVar("AWS_ACCOUNT_ID");
requireEnvVar("AWS_ROLE_ID");
requireEnvVar("OMNI_DATASET_NAME");
requireEnvVar("AWS_READ_CONNECTION_ID");
}

@Before
public void setUp() throws IOException {
datasetName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
public void setUp() {
tableName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
connectionName = "CREATE_EXTERNAL_TABLE_AWS_TEST_" + ID;
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
originalPrintStream = System.out;
System.setOut(out);
// create a temporary aws connection
try (ConnectionServiceClient client = ConnectionServiceClient.create()) {
LocationName parent = LocationName.of(PROJECT_ID, LOCATION);
String iamRoleId = String.format("arn:aws:iam::%s:role/%s", AWS_ACCOUNT_ID, AWS_ROLE_ID);
AwsCrossAccountRole role = AwsCrossAccountRole.newBuilder().setIamRoleId(iamRoleId).build();
AwsProperties awsProperties = AwsProperties.newBuilder().setCrossAccountRole(role).build();
Connection connection = Connection.newBuilder().setAws(awsProperties).build();
CreateConnectionRequest request =
CreateConnectionRequest.newBuilder()
.setParent(parent.toString())
.setConnection(connection)
.setConnectionId(connectionName)
.build();
connectionName = client.createConnection(request).getName();
}
// create a temporary dataset
CreateDatasetAws.createDatasetAws(PROJECT_ID, datasetName, LOCATION);
}

@After
public void tearDown() throws IOException {
// delete a temporary aws connection
try (ConnectionServiceClient client = ConnectionServiceClient.create()) {
DeleteConnectionRequest request =
DeleteConnectionRequest.newBuilder().setName(connectionName).build();
client.deleteConnection(request);
}
// Clean up
DeleteTable.deleteTable(datasetName, tableName);
DeleteDataset.deleteDataset(PROJECT_ID, datasetName);
public void tearDown() {
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
@@ -120,18 +90,19 @@ public void tearDown() throws IOException {

@Test
public void testCreateExternalTableAws() {
String sourceUri = "s3://cloud-samples-tests/us-states.csv";
String sourceUri = "s3://omni-samples-test-bucket/us-states.csv";
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
CsvOptions options = CsvOptions.newBuilder().setSkipLeadingRows(1).build();
ExternalTableDefinition externalTable =
ExternalTableDefinition externalTableDefinition =
ExternalTableDefinition.newBuilder(sourceUri, options)
.setConnectionId(connectionName)
.setConnectionId(AWS_READ_CONNECTION_ID)
.setSchema(schema)
.build();
CreateExternalTableAws.createExternalTableAws(datasetName, tableName, externalTable);
CreateExternalTableAws.createExternalTableAws(
OMNI_PROJECT_ID, OMNI_DATASET_NAME, tableName, externalTableDefinition);
assertThat(bout.toString()).contains("Aws external table created successfully");
}
}
@@ -0,0 +1,97 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquery;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import com.google.cloud.bigquery.CsvOptions;
import com.google.cloud.bigquery.ExternalTableDefinition;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.connection.v1.AwsCrossAccountRole;
import com.google.cloud.bigquery.connection.v1.AwsProperties;
import com.google.cloud.bigquery.connection.v1.Connection;
import com.google.cloud.bigquery.connection.v1.CreateConnectionRequest;
import com.google.cloud.bigquery.connection.v1.DeleteConnectionRequest;
import com.google.cloud.bigquery.connection.v1.LocationName;
import com.google.cloud.bigqueryconnection.v1.ConnectionServiceClient;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class QueryExternalTableAwsIT {

private final Logger log = Logger.getLogger(this.getClass().getName());
private ByteArrayOutputStream bout;
private PrintStream out;
private PrintStream originalPrintStream;

private static final String OMNI_PROJECT_ID = requireEnvVar("OMNI_PROJECT_ID");
private static final String OMNI_DATASET_NAME = requireEnvVar("OMNI_DATASET_NAME");
private static final String OMNI_EXTERNAL_TABLE_NAME = requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
return value;
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("OMNI_PROJECT_ID");
requireEnvVar("OMNI_DATASET_NAME");
requireEnvVar("OMNI_EXTERNAL_TABLE_NAME");
}

@Before
public void setUp() {
bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
originalPrintStream = System.out;
System.setOut(out);
}

@After
public void tearDown() {
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
log.log(Level.INFO, bout.toString());
}

@Test
public void testQueryExternalTableAws() throws InterruptedException {
String query =
String.format(
"SELECT * FROM %s.%s.%s WHERE name LIKE 'W%%'",
OMNI_PROJECT_ID, OMNI_DATASET_NAME, OMNI_EXTERNAL_TABLE_NAME);
QueryExternalTableAws.queryExternalTableAws(query);
assertThat(bout.toString())
.contains("Query on aws external permanent table performed successfully.");
}
}

0 comments on commit 53a56be

Please sign in to comment.