From 60eab4ec74f402a63bd76583077c2b6edd8cd1c7 Mon Sep 17 00:00:00 2001 From: PawasChhokra Date: Wed, 2 Aug 2017 00:55:05 -0700 Subject: [PATCH 1/5] Update settings.gradle --- settings.gradle | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/settings.gradle b/settings.gradle index 417ada412a..a4eba944e4 100644 --- a/settings.gradle +++ b/settings.gradle @@ -22,7 +22,8 @@ include \ 'samza-elasticsearch', 'samza-log4j', 'samza-rest', - 'samza-shell' + 'samza-shell', + 'samza-azure' def scalaModules = [ 'samza-core', From 30a34c8ce9132dd9a81ddec5982b6f25aa092f97 Mon Sep 17 00:00:00 2001 From: PawasChhokra Date: Wed, 2 Aug 2017 01:00:21 -0700 Subject: [PATCH 2/5] Update build.gradle --- build.gradle | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/build.gradle b/build.gradle index 8d1b37efdf..02c34ac132 100644 --- a/build.gradle +++ b/build.gradle @@ -134,6 +134,7 @@ project(':samza-api') { } } + project(":samza-core_$scalaVersion") { apply plugin: 'scala' apply plugin: 'checkstyle' @@ -179,6 +180,27 @@ project(":samza-core_$scalaVersion") { } } + +project(':samza-azure') { + apply plugin: 'java' + apply plugin: 'checkstyle' + + dependencies { + compile "com.microsoft.azure:azure-storage:5.3.1" + compile "com.fasterxml.jackson.core:jackson-core:2.8.8" + compile project(':samza-api') + compile project(":samza-core_$scalaVersion") + compile "org.slf4j:slf4j-api:$slf4jVersion" + testCompile "junit:junit:$junitVersion" + testCompile "org.mockito:mockito-all:$mockitoVersion" + } + checkstyle { + configFile = new File(rootDir, "checkstyle/checkstyle.xml") + toolVersion = "$checkstyleVersion" + } +} + + project(":samza-autoscaling_$scalaVersion") { apply plugin: 'scala' apply plugin: 'checkstyle' From 16517858d9a2b6cee4fd537767ceb243f65b2ee1 Mon Sep 17 00:00:00 2001 From: PawasChhokra Date: Wed, 2 Aug 2017 01:04:55 -0700 Subject: [PATCH 3/5] Add Azure Storage Client --- .../java/org/apache/samza/AzureClient.java | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 samza-azure/src/main/java/org/apache/samza/AzureClient.java diff --git a/samza-azure/src/main/java/org/apache/samza/AzureClient.java b/samza-azure/src/main/java/org/apache/samza/AzureClient.java new file mode 100644 index 0000000000..c5b669d375 --- /dev/null +++ b/samza-azure/src/main/java/org/apache/samza/AzureClient.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.samza; + +import com.microsoft.azure.storage.CloudStorageAccount; +import com.microsoft.azure.storage.blob.CloudBlobClient; +import com.microsoft.azure.storage.table.CloudTableClient; +import java.net.URISyntaxException; +import java.security.InvalidKeyException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Creates the client handles for the Azure Storage account, Azure Blob storage and Azure Table storage + */ +public class AzureClient { + + private static final Logger LOG = LoggerFactory.getLogger(AzureClient.class); + private final CloudStorageAccount account; + private final CloudTableClient tableClient; + private final CloudBlobClient blobClient; + + AzureClient(String storageConnectionString) { + try { + account = CloudStorageAccount.parse(storageConnectionString); + blobClient = account.createCloudBlobClient(); + tableClient = account.createCloudTableClient(); + } catch (IllegalArgumentException | URISyntaxException e) { + LOG.info("\nConnection string specifies an invalid URI."); + LOG.info("Please confirm the connection string is in the Azure connection string format."); + throw new SamzaException(e); + } catch (InvalidKeyException e) { + LOG.info("\nConnection string specifies an invalid key."); + LOG.info("Please confirm the AccountName and AccountKey in the connection string are valid."); + throw new SamzaException(e); + } + } + + public CloudBlobClient getBlobClient() { + return blobClient; + } + + public CloudTableClient getTableClient() { + return tableClient; + } +} From bf88e20205af0dc01b7826eeaa71a8ea96b65fb0 Mon Sep 17 00:00:00 2001 From: PawasChhokra Date: Wed, 2 Aug 2017 10:46:17 -0700 Subject: [PATCH 4/5] Add Azure Config --- .../java/org/apache/samza/AzureConfig.java | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 samza-azure/src/main/java/org/apache/samza/AzureConfig.java diff --git a/samza-azure/src/main/java/org/apache/samza/AzureConfig.java b/samza-azure/src/main/java/org/apache/samza/AzureConfig.java new file mode 100644 index 0000000000..c284e50d30 --- /dev/null +++ b/samza-azure/src/main/java/org/apache/samza/AzureConfig.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.samza; + +import org.apache.samza.config.Config; +import org.apache.samza.config.ConfigException; +import org.apache.samza.config.MapConfig; + + +public class AzureConfig extends MapConfig { + + // Connection string for Azure Storage Account, format: "DefaultEndpointsProtocol=;AccountName=<>;AccountKey=<>;" + public static final String AZURE_STORAGE_CONNECT = "job.coordinator.azure.storage.connect"; + public static final String AZURE_CONTAINER_NAME = "job.coordinator.azure.container.name"; + public static final String AZURE_BLOB_NAME = "job.coordinator.azure.blob.name"; + public static final String AZURE_TABLE_NAME = "job.coordinator.azure.table.name"; + public static final String AZURE_PAGEBLOB_LENGTH = "job.coordinator.azure.blob.length"; + + public static final String DEFAULT_AZURE_CONTAINER_NAME = "samzacontainer"; + public static final String DEFAULT_AZURE_BLOB_NAME = "samzablob"; + public static final String DEFAULT_AZURE_TABLE_NAME = "samzatable"; + public static final long DEFAULT_AZURE_PAGEBLOB_LENGTH = 5120000; + + public AzureConfig(Config config) { + super(config); + } + + public String getAzureConnect() { + if (!containsKey(AZURE_STORAGE_CONNECT)) { + throw new ConfigException("Missing " + AZURE_STORAGE_CONNECT + " config!"); + } + return get(AZURE_STORAGE_CONNECT); + } + + public String getAzureContainerName() { + return get(AZURE_CONTAINER_NAME, DEFAULT_AZURE_CONTAINER_NAME); + } + + public String getAzureBlobName() { + return get(AZURE_BLOB_NAME, DEFAULT_AZURE_BLOB_NAME); + } + public long getAzureBlobLength() { + return getLong(AZURE_PAGEBLOB_LENGTH, DEFAULT_AZURE_PAGEBLOB_LENGTH); + } + + public String getAzureTableName() { + return get(AZURE_TABLE_NAME, DEFAULT_AZURE_TABLE_NAME); + } + +} + From fb4835d4ed1eac08e473883c5d4b34ad2dfc51c3 Mon Sep 17 00:00:00 2001 From: PawasChhokra Date: Wed, 2 Aug 2017 14:25:15 -0700 Subject: [PATCH 5/5] Update configuration-table --- build.gradle | 2 -- .../versioned/jobs/configuration-table.html | 22 +++++++++++- samza-azure/README.md | 34 +++++++++++++++++++ .../java/org/apache/samza/AzureClient.java | 12 +++---- .../java/org/apache/samza/AzureConfig.java | 26 ++++++++------ 5 files changed, 76 insertions(+), 20 deletions(-) create mode 100644 samza-azure/README.md diff --git a/build.gradle b/build.gradle index 02c34ac132..16fe2cf5cc 100644 --- a/build.gradle +++ b/build.gradle @@ -134,7 +134,6 @@ project(':samza-api') { } } - project(":samza-core_$scalaVersion") { apply plugin: 'scala' apply plugin: 'checkstyle' @@ -192,7 +191,6 @@ project(':samza-azure') { compile project(":samza-core_$scalaVersion") compile "org.slf4j:slf4j-api:$slf4jVersion" testCompile "junit:junit:$junitVersion" - testCompile "org.mockito:mockito-all:$mockitoVersion" } checkstyle { configFile = new File(rootDir, "checkstyle/checkstyle.xml") diff --git a/docs/learn/documentation/versioned/jobs/configuration-table.html b/docs/learn/documentation/versioned/jobs/configuration-table.html index bdf477aeec..dc1df3078b 100644 --- a/docs/learn/documentation/versioned/jobs/configuration-table.html +++ b/docs/learn/documentation/versioned/jobs/configuration-table.html @@ -424,7 +424,8 @@

Samza Configuration Reference

Fixed partition mapping. No Zoookeeper.
org.apache.samza.zk.ZkJobCoordinatorFactory
Zookeeper-based coordination.
- +
org.apache.samza.AzureJobCoordinatorFactory
+
Azure-based coordination Required only for non-cluster-managed applications. Please see the required value for task-name-grouper-factory @@ -468,6 +469,25 @@

Samza Configuration Reference

How long the Leader processor will wait before recalculating the JobModel on change of registered processors. + + Azure-based job configuration + + + azure.storage.connect + + + Required for applications with Azure-based coordination. This is the storage connection string related to every Azure account. It is of the format: "DefaultEndpointsProtocol=https;AccountName=<Insert your account name>;AccountKey=<Insert your account key>" + + + + + job.coordinator.azure.blob.length + 5120000 + + Length in bytes, of the page blob on which the leader stores the shared data. Different types of data is stored on different pages with predefined lengths. The offsets of these pages are dependent on the total page blob length. + + + Task configuration diff --git a/samza-azure/README.md b/samza-azure/README.md new file mode 100644 index 0000000000..0e9e9cf7fc --- /dev/null +++ b/samza-azure/README.md @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +## Samza on Azure + +* Provides the ability to run Samza Standalone in the cloud, using Azure. +* Removes dependency from Zookeeper +* All coordination services written using services provided by Azure. + +Read [Samza on Azure Design Doc](https://cwiki.apache.org/confluence/display/SAMZA/SEP-7%3A+Samza+on+Azure) to learn more about the implementation details. + +### Running Samza with Azure + +* Change: job.coordinator.factory = org.apache.samza.AzureJobCoordinatorFactory +* Add Azure Storage Connection String. +
azure.storage.connect = DefaultEndpointsProtocol=https;AccountName="Insert your account name";AccountKey="Insert your account key" +* Add blob length in bytes => job.coordinator.azure.blob.length +
Default value = 5120000 \ No newline at end of file diff --git a/samza-azure/src/main/java/org/apache/samza/AzureClient.java b/samza-azure/src/main/java/org/apache/samza/AzureClient.java index c5b669d375..b5884cd88f 100644 --- a/samza-azure/src/main/java/org/apache/samza/AzureClient.java +++ b/samza-azure/src/main/java/org/apache/samza/AzureClient.java @@ -44,13 +44,13 @@ public class AzureClient { blobClient = account.createCloudBlobClient(); tableClient = account.createCloudTableClient(); } catch (IllegalArgumentException | URISyntaxException e) { - LOG.info("\nConnection string specifies an invalid URI."); - LOG.info("Please confirm the connection string is in the Azure connection string format."); - throw new SamzaException(e); + LOG.error("\nConnection string {} specifies an invalid URI.", storageConnectionString); + LOG.error("Please confirm the connection string is in the Azure connection string format."); + throw new SamzaException(e); } catch (InvalidKeyException e) { - LOG.info("\nConnection string specifies an invalid key."); - LOG.info("Please confirm the AccountName and AccountKey in the connection string are valid."); - throw new SamzaException(e); + LOG.error("\nConnection string {} specifies an invalid key.", storageConnectionString); + LOG.error("Please confirm the AccountName and AccountKey in the connection string are valid."); + throw new SamzaException(e); } } diff --git a/samza-azure/src/main/java/org/apache/samza/AzureConfig.java b/samza-azure/src/main/java/org/apache/samza/AzureConfig.java index c284e50d30..b88d3c0531 100644 --- a/samza-azure/src/main/java/org/apache/samza/AzureConfig.java +++ b/samza-azure/src/main/java/org/apache/samza/AzureConfig.java @@ -19,6 +19,7 @@ package org.apache.samza; +import org.apache.samza.config.ApplicationConfig; import org.apache.samza.config.Config; import org.apache.samza.config.ConfigException; import org.apache.samza.config.MapConfig; @@ -26,20 +27,23 @@ public class AzureConfig extends MapConfig { - // Connection string for Azure Storage Account, format: "DefaultEndpointsProtocol=;AccountName=<>;AccountKey=<>;" - public static final String AZURE_STORAGE_CONNECT = "job.coordinator.azure.storage.connect"; - public static final String AZURE_CONTAINER_NAME = "job.coordinator.azure.container.name"; - public static final String AZURE_BLOB_NAME = "job.coordinator.azure.blob.name"; - public static final String AZURE_TABLE_NAME = "job.coordinator.azure.table.name"; + // Connection string for Azure Storage Account, format: "DefaultEndpointsProtocol=;AccountName=<>;AccountKey=<>" + public static final String AZURE_STORAGE_CONNECT = "azure.storage.connect"; public static final String AZURE_PAGEBLOB_LENGTH = "job.coordinator.azure.blob.length"; - public static final String DEFAULT_AZURE_CONTAINER_NAME = "samzacontainer"; - public static final String DEFAULT_AZURE_BLOB_NAME = "samzablob"; - public static final String DEFAULT_AZURE_TABLE_NAME = "samzatable"; + private static String containerName; + private static String blobName; + private static String tableName; public static final long DEFAULT_AZURE_PAGEBLOB_LENGTH = 5120000; public AzureConfig(Config config) { super(config); + ApplicationConfig appConfig = new ApplicationConfig(config); + //Remove all non-alphanumeric characters from id as table name does not allow them. + String id = appConfig.getGlobalAppId().replaceAll("[^A-Za-z0-9]", ""); + containerName = "samzacontainer" + id; + blobName = "samzablob" + id; + tableName = "samzatable" + id; } public String getAzureConnect() { @@ -50,18 +54,18 @@ public String getAzureConnect() { } public String getAzureContainerName() { - return get(AZURE_CONTAINER_NAME, DEFAULT_AZURE_CONTAINER_NAME); + return containerName; } public String getAzureBlobName() { - return get(AZURE_BLOB_NAME, DEFAULT_AZURE_BLOB_NAME); + return blobName; } public long getAzureBlobLength() { return getLong(AZURE_PAGEBLOB_LENGTH, DEFAULT_AZURE_PAGEBLOB_LENGTH); } public String getAzureTableName() { - return get(AZURE_TABLE_NAME, DEFAULT_AZURE_TABLE_NAME); + return tableName; } }