diff --git a/VERSION b/VERSION index e5403775b..0bbe2c116 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.7.26 +3.7.27 diff --git a/gooddata.gemspec b/gooddata.gemspec index 4ecb6a3d5..257c3b6ec 100644 --- a/gooddata.gemspec +++ b/gooddata.gemspec @@ -66,6 +66,8 @@ Gem::Specification.new do |s| else s.add_dependency 'docile', '> 1.1', '< 1.4.0' end + s.add_dependency 'azure-storage-blob', '~> 1.1.0' + s.add_dependency 'nokogiri', '~> 1.10.0' s.add_dependency 'gli', '~> 2.15' s.add_dependency 'gooddata_datawarehouse', '~> 0.0.10' if RUBY_PLATFORM == 'java' s.add_dependency 'highline', '= 2.0.0.pre.develop.14' diff --git a/lib/gooddata/cloud_resources/blobstorage/blobstorage_client.rb b/lib/gooddata/cloud_resources/blobstorage/blobstorage_client.rb new file mode 100644 index 000000000..4e18de926 --- /dev/null +++ b/lib/gooddata/cloud_resources/blobstorage/blobstorage_client.rb @@ -0,0 +1,92 @@ +# encoding: UTF-8 +# frozen_string_literal: true +# +# Copyright (c) 2021 GoodData Corporation. All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +require 'securerandom' +require 'pathname' +require "azure/storage/blob" + +module GoodData + class BlobStorageClient + SAS_URL_PATTERN = %r{(^https?:\/\/[^\/]*)\/.*\?(.*)} + INVALID_BLOB_GENERAL_MESSAGE = "The connection string is not valid." + INVALID_BLOB_SIG_WELL_FORMED_MESSAGE = "The signature format is not valid." + INVALID_BLOB_CONTAINER_MESSAGE = "ContainerNotFound" + INVALID_BLOB_CONTAINER_FORMED_MESSAGE = "The container with the specified name is not found." + INVALID_BLOB_EXPIRED_ORIGINAL_MESSAGE = "Signature not valid in the specified time frame" + INVALID_BLOB_EXPIRED_MESSAGE = "The signature expired." + INVALID_BLOB_INVALID_CONNECTION_STRING_MESSAGE = "The connection string is not valid." + INVALID_BLOB_PATH_MESSAGE = "BlobNotFound" + INVALID_BLOB_INVALID_PATH_MESSAGE = "The path to the data is not found." + + attr_reader :use_sas + + def initialize(options = {}) + raise("Data Source needs a client to Blob Storage to be able to get blob file but 'blobStorage_client' is empty.") unless options['blobStorage_client'] + + if options['blobStorage_client']['connectionString'] && options['blobStorage_client']['container'] + @connection_string = options['blobStorage_client']['connectionString'] + @container = options['blobStorage_client']['container'] + @path = options['blobStorage_client']['path'] + @use_sas = false + build_sas(@connection_string) + else + raise('Missing connection info for Blob Storage client') + end + end + + def realize_blob(file, _params) + GoodData.gd_logger.info("Realizing download from Blob Storage. Container #{@container}.") + filename = '' + begin + connect + filename = "#{SecureRandom.urlsafe_base64(6)}_#{Time.now.to_i}.csv" + blob_name = @path ? "#{@path.delete_suffix('/')}/#{file}" : "#{file}" + + measure = Benchmark.measure do + _blob, content = @client.get_blob(@container, blob_name) + File.open(filename, "wb") { |f| f.write(content) } + end + rescue => e + raise_error(e) + end + GoodData.gd_logger.info("Done downloading file type=blobStorage status=finished duration=#{measure.real}") + filename + end + + def connect + GoodData.logger.info "Setting up connection to Blob Storage" + if use_sas + @client = Azure::Storage::Blob::BlobService.create(:storage_blob_host => @host, :storage_sas_token => @sas_token) + else + @client = Azure::Storage::Blob::BlobService.create_from_connection_string(@connection_string) + end + end + + def build_sas(url) + matches = url.scan(SAS_URL_PATTERN) + return unless matches && matches[0] + + @use_sas = true + @host = matches[0][0] + @sas_token = matches[0][1] + end + + def raise_error(e) + if e.message && e.message.include?(INVALID_BLOB_EXPIRED_ORIGINAL_MESSAGE) + raise INVALID_BLOB_EXPIRED_MESSAGE + elsif e.message && e.message.include?(INVALID_BLOB_SIG_WELL_FORMED_MESSAGE) + raise INVALID_BLOB_SIG_WELL_FORMED_MESSAGE + elsif e.message && e.message.include?(INVALID_BLOB_CONTAINER_MESSAGE) + raise INVALID_BLOB_CONTAINER_FORMED_MESSAGE + elsif e.message && e.message.include?(INVALID_BLOB_PATH_MESSAGE) + raise INVALID_BLOB_INVALID_PATH_MESSAGE + else + raise INVALID_BLOB_GENERAL_MESSAGE + end + end + end +end diff --git a/lib/gooddata/helpers/data_helper.rb b/lib/gooddata/helpers/data_helper.rb index 5052cca12..d4d7ed474 100644 --- a/lib/gooddata/helpers/data_helper.rb +++ b/lib/gooddata/helpers/data_helper.rb @@ -48,6 +48,10 @@ def realize(params = {}) raise GoodData::InvalidEnvError, "DataSource does not support type \"#{source}\" on the platform #{RUBY_PLATFORM}" unless RUBY_PLATFORM =~ /java/ require_relative '../cloud_resources/cloud_resources' realize_cloud_resource(source, params) + when 'blobStorage' + require_relative '../cloud_resources/blobstorage/blobstorage_client' + blob_storage_client = GoodData::BlobStorageClient.new(params) + blob_storage_client.realize_blob(@options[:file], params) else raise "DataSource does not support type \"#{source}\"" end diff --git a/spec/data/blobstorage_data.csv b/spec/data/blobstorage_data.csv new file mode 100644 index 000000000..bc7ebef4a --- /dev/null +++ b/spec/data/blobstorage_data.csv @@ -0,0 +1,3 @@ +segment_id,client_id,project_title,project_id,project_token +Segment,Client1,Client-1,,token +Segment,Client2,Client-2,,token diff --git a/spec/environment/secrets.yaml b/spec/environment/secrets.yaml index 193b056d8..30efd62d6 100644 --- a/spec/environment/secrets.yaml +++ b/spec/environment/secrets.yaml @@ -7,6 +7,7 @@ global: redshift_access_key: 94Ti6/jaRHeyZsiQnYdulo8RAoLSahxlTUb3AzvlNkrND38yvBcHnL7JYBJkZ4bS redshift_secret_key: 1JHYwvoIQinjgdKJXQL6RNGFhx4o4M9DiQf9q4jV+dq6xCPLDBCP/8tXBd0H9y8xdOOI78mY/aOQWjiPzgizLA== snowflake_password: 1zg1PDRMQq2DhBG3SwQOA8/POUkeek3gurrmV4MT2Go= + blob_storage_connection: Md/faNEbH3YOsmVCDaUJEH4/eHkABgp2X1V6BIZyMbuMxlAdlCFxY8gLqM1sJUEt2txBp7I6PmDdnG34+wV1nawRO3U9WAwr8wTPI57pkcNj0fpFN9KLycNA8ms6cVklxFlgO1WmCOqBL+wBnIbqRZ8sl9wx2BTFebt8QQSLucGMZtY0oDjy/YeG6SqH+HCzEW70ipU3whVXWJkZStIK8cHy9uxJZF88uqpphJFTQAFMwgCQQ9+vEF+mpt4xaWtF2KnRkif2a2OuYSsEStuA/A== development: dev_token: 8qWaLsyWwAUJ7MJJTBdriUvtaWKNidnzmfxVThCrL0c= prod_token: RitXvhFjpJ8KEpqUqZm57iV3bwVU1zBGDrXNklvwkaE= diff --git a/spec/lcm/integration/spec/others/data_helper_spec.rb b/spec/lcm/integration/spec/others/data_helper_spec.rb index 2b20c4280..d0e360d60 100644 --- a/spec/lcm/integration/spec/others/data_helper_spec.rb +++ b/spec/lcm/integration/spec/others/data_helper_spec.rb @@ -147,6 +147,18 @@ } } +blob_storage_params = { + "blobStorage_client"=> { + "connectionString"=> ConnectionHelper::SECRETS[:blob_storage_connection], + "container"=> "msftest", + "path"=> "", + }, + "input_source"=> { + "type"=> "blobStorage", + "file"=> "clients.csv" + } +} + describe 'data helper', :vcr do it 'connect to redshift with IAM authentication' do @@ -209,4 +221,11 @@ data = File.open('spec/data/bigquery_data.csv').read expect(data).to eq File.open(file_path).read end + + it 'connect to blob storage with connection string' do + data_helper = GoodData::Helpers::DataSource.new(blob_storage_params['input_source']) + file_path = data_helper.realize(blob_storage_params) + data = File.open('spec/data/blobstorage_data.csv').read + expect(data).to eq File.open(file_path).read + end end