Skip to content
Permalink
Browse files
docs(samples): add create amazon s3 transfer (#416)
  • Loading branch information
Praful Makani committed Oct 9, 2020
1 parent 4eb4530 commit 52ac0a029c1f47c5cdd9268e320a48ce80f7c212
@@ -0,0 +1,83 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerydatatransfer;

// [START bigquerydatatransfer_create_amazons3_transfer]
import com.google.api.gax.rpc.ApiException;
import com.google.cloud.bigquery.datatransfer.v1.CreateTransferConfigRequest;
import com.google.cloud.bigquery.datatransfer.v1.DataTransferServiceClient;
import com.google.cloud.bigquery.datatransfer.v1.ProjectName;
import com.google.cloud.bigquery.datatransfer.v1.TransferConfig;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;

// Sample to create amazon s3 transfer config.
public class CreateAmazonS3Transfer {

public static void main(String[] args) throws IOException {
// TODO(developer): Replace these variables before running the sample.
final String projectId = "MY_PROJECT_ID";
String datasetId = "MY_DATASET_ID";
String tableId = "MY_TABLE_ID";
// Amazon S3 Bucket Uri with read role permission
String sourceUri = "s3://your-bucket-name/*";
String awsAccessKeyId = "MY_AWS_ACCESS_KEY_ID";
String awsSecretAccessId = "AWS_SECRET_ACCESS_ID";
String sourceFormat = "CSV";
String fieldDelimiter = ",";
String skipLeadingRows = "1";
Map<String, Value> params = new HashMap<>();
params.put(
"destination_table_name_template", Value.newBuilder().setStringValue(tableId).build());
params.put("data_path", Value.newBuilder().setStringValue(sourceUri).build());
params.put("access_key_id", Value.newBuilder().setStringValue(awsAccessKeyId).build());
params.put("secret_access_key", Value.newBuilder().setStringValue(awsSecretAccessId).build());
params.put("source_format", Value.newBuilder().setStringValue(sourceFormat).build());
params.put("field_delimiter", Value.newBuilder().setStringValue(fieldDelimiter).build());
params.put("skip_leading_rows", Value.newBuilder().setStringValue(skipLeadingRows).build());
TransferConfig transferConfig =
TransferConfig.newBuilder()
.setDestinationDatasetId(datasetId)
.setDisplayName("Your Aws S3 Config Name")
.setDataSourceId("amazon_s3")
.setParams(Struct.newBuilder().putAllFields(params).build())
.setSchedule("every 24 hours")
.build();
createAmazonS3Transfer(projectId, transferConfig);
}

public static void createAmazonS3Transfer(String projectId, TransferConfig transferConfig)
throws IOException {
try (DataTransferServiceClient client = DataTransferServiceClient.create()) {
ProjectName parent = ProjectName.of(projectId);
CreateTransferConfigRequest request =
CreateTransferConfigRequest.newBuilder()
.setParent(parent.toString())
.setTransferConfig(transferConfig)
.build();
TransferConfig config = client.createTransferConfig(request);
System.out.println("Amazon s3 transfer created successfully :" + config.getName());
} catch (ApiException ex) {
System.out.print("Amazon s3 transfer was not created." + ex.toString());
}
;
}
}
// [END bigquerydatatransfer_create_amazons3_transfer]
@@ -0,0 +1,148 @@
/*
* Copyright 2020 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.example.bigquerydatatransfer;

import static com.google.common.truth.Truth.assertThat;
import static junit.framework.TestCase.assertNotNull;

import com.google.cloud.bigquery.BigQuery;
import com.google.cloud.bigquery.BigQueryOptions;
import com.google.cloud.bigquery.DatasetInfo;
import com.google.cloud.bigquery.Field;
import com.google.cloud.bigquery.Schema;
import com.google.cloud.bigquery.StandardSQLTypeName;
import com.google.cloud.bigquery.StandardTableDefinition;
import com.google.cloud.bigquery.TableDefinition;
import com.google.cloud.bigquery.TableId;
import com.google.cloud.bigquery.TableInfo;
import com.google.cloud.bigquery.datatransfer.v1.TransferConfig;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;

public class CreateAmazonS3TransferIT {

private static final Logger LOG = Logger.getLogger(CreateAmazonS3TransferIT.class.getName());
private static final String ID = UUID.randomUUID().toString().substring(0, 8);
private BigQuery bigquery;
private ByteArrayOutputStream bout;
private String name;
private String displayName;
private String datasetName;
private String tableName;
private PrintStream out;
private PrintStream originalPrintStream;

private static final String PROJECT_ID = requireEnvVar("GOOGLE_CLOUD_PROJECT");
private static final String AWS_ACCESS_KEY_ID = requireEnvVar("AWS_ACCESS_KEY_ID");
private static final String AWS_SECRET_ACCESS_KEY = requireEnvVar("AWS_SECRET_ACCESS_KEY");
private static final String AWS_BUCKET = requireEnvVar("AWS_BUCKET");

private static String requireEnvVar(String varName) {
String value = System.getenv(varName);
assertNotNull(
"Environment variable " + varName + " is required to perform these tests.",
System.getenv(varName));
return value;
}

@BeforeClass
public static void checkRequirements() {
requireEnvVar("GOOGLE_CLOUD_PROJECT");
requireEnvVar("AWS_ACCESS_KEY_ID");
requireEnvVar("AWS_SECRET_ACCESS_KEY");
requireEnvVar("AWS_BUCKET");
}

@Before
public void setUp() {
displayName = "MY_SCHEDULE_NAME_TEST_" + ID;
datasetName = "MY_DATASET_NAME_TEST_" + ID;
tableName = "MY_TABLE_NAME_TEST_" + ID;
// create a temporary dataset
bigquery = BigQueryOptions.getDefaultInstance().getService();
bigquery.create(DatasetInfo.of(datasetName));
// create a temporary table
Schema schema =
Schema.of(
Field.of("name", StandardSQLTypeName.STRING),
Field.of("post_abbr", StandardSQLTypeName.STRING));
TableDefinition tableDefinition = StandardTableDefinition.of(schema);
TableInfo tableInfo = TableInfo.of(TableId.of(datasetName, tableName), tableDefinition);
bigquery.create(tableInfo);

bout = new ByteArrayOutputStream();
out = new PrintStream(bout);
originalPrintStream = System.out;
System.setOut(out);
}

@After
public void tearDown() throws IOException {
// Clean up
DeleteScheduledQuery.deleteScheduledQuery(name);
// delete a temporary table
bigquery.delete(TableId.of(datasetName, tableName));
// delete a temporary dataset
bigquery.delete(datasetName, BigQuery.DatasetDeleteOption.deleteContents());
// restores print statements in the original method
System.out.flush();
System.setOut(originalPrintStream);
LOG.log(Level.INFO, bout.toString());
}

@Test
public void testCreateAmazonS3Transfer() throws IOException {
String sourceUri = String.format("s3://%s/*", AWS_BUCKET);
String fileFormat = "CSV";
String fieldDelimiter = ",";
String skipLeadingRows = "1";
Map<String, Value> params = new HashMap<>();
params.put(
"destination_table_name_template", Value.newBuilder().setStringValue(tableName).build());
params.put("data_path", Value.newBuilder().setStringValue(sourceUri).build());
params.put("access_key_id", Value.newBuilder().setStringValue(AWS_ACCESS_KEY_ID).build());
params.put(
"secret_access_key", Value.newBuilder().setStringValue(AWS_SECRET_ACCESS_KEY).build());
params.put("file_format", Value.newBuilder().setStringValue(fileFormat).build());
params.put("field_delimiter", Value.newBuilder().setStringValue(fieldDelimiter).build());
params.put("skip_leading_rows", Value.newBuilder().setStringValue(skipLeadingRows).build());
TransferConfig transferConfig =
TransferConfig.newBuilder()
.setDestinationDatasetId(datasetName)
.setDisplayName(displayName)
.setDataSourceId("amazon_s3")
.setParams(Struct.newBuilder().putAllFields(params).build())
.setSchedule("every 24 hours")
.build();
CreateAmazonS3Transfer.createAmazonS3Transfer(PROJECT_ID, transferConfig);
String result = bout.toString();
name = result.substring(result.indexOf(":") + 1, result.length() - 1);
assertThat(result).contains("Amazon s3 transfer created successfully :");
}
}

0 comments on commit 52ac0a0

Please sign in to comment.