ClickHouse · abyss7 · Aug 20, 2019 · Jul 15, 2019 · Jul 10, 2019 · Jul 11, 2019
diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h
@@ -212,7 +212,7 @@ struct Settings : public SettingsCollection<Settings>
     M(SettingUInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.") \
     M(SettingInt64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite.") \
     M(SettingMilliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.") \
-    M(SettingMilliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from streaming storages.") \
+    M(SettingMilliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.") \
     M(SettingString, format_schema, "", "Schema identifier (used by schema-based formats)") \
     M(SettingBool, insert_allow_materialized_columns, 0, "If setting is enabled, Allow materialized columns in INSERT.") \
     M(SettingSeconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.") \

diff --git a/dbms/src/Formats/BlockOutputStreamFromRowOutputStream.cpp b/dbms/src/Formats/BlockOutputStreamFromRowOutputStream.cpp
diff --git a/dbms/src/Formats/BlockOutputStreamFromRowOutputStream.h b/dbms/src/Formats/BlockOutputStreamFromRowOutputStream.h
diff --git a/dbms/src/Formats/FormatFactory.cpp b/dbms/src/Formats/FormatFactory.cpp
@@ -100,7 +100,8 @@ BlockInputStreamPtr FormatFactory::getInput(
 }
 
 
-BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer & buf, const Block & sample, const Context & context) const
+BlockOutputStreamPtr FormatFactory::getOutput(
+    const String & name, WriteBuffer & buf, const Block & sample, const Context & context, WriteCallback callback) const
 {
     if (name == "PrettyCompactMonoBlock")
     {
@@ -124,14 +125,14 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer &
         const Settings & settings = context.getSettingsRef();
         FormatSettings format_settings = getOutputFormatSetting(settings);
 
-        /** Materialization is needed, because formats can use the functions `IDataType`,
+        /**  Materialization is needed, because formats can use the functions `IDataType`,
           *  which only work with full columns.
           */
         return std::make_shared<MaterializingBlockOutputStream>(
-                output_getter(buf, sample, context, format_settings), sample);
+                output_getter(buf, sample, context, callback, format_settings), sample);
     }
 
-    auto format = getOutputFormat(name, buf, sample, context);
+    auto format = getOutputFormat(name, buf, sample, context, callback);
     return std::make_shared<MaterializingBlockOutputStream>(std::make_shared<OutputStreamToOutputFormat>(format), sample);
 }
 
@@ -165,7 +166,8 @@ InputFormatPtr FormatFactory::getInputFormat(
 }
 
 
-OutputFormatPtr FormatFactory::getOutputFormat(const String & name, WriteBuffer & buf, const Block & sample, const Context & context) const
+OutputFormatPtr FormatFactory::getOutputFormat(
+    const String & name, WriteBuffer & buf, const Block & sample, const Context & context, WriteCallback callback) const
 {
     const auto & output_getter = getCreators(name).output_processor_creator;
     if (!output_getter)
@@ -177,7 +179,7 @@ OutputFormatPtr FormatFactory::getOutputFormat(const String & name, WriteBuffer
     /** TODO: Materialization is needed, because formats can use the functions `IDataType`,
       *  which only work with full columns.
       */
-    return output_getter(buf, sample, context, format_settings);
+    return output_getter(buf, sample, context, callback, format_settings);
 }
 
 

diff --git a/dbms/src/Formats/FormatFactory.h b/dbms/src/Formats/FormatFactory.h
@@ -41,6 +41,10 @@ class FormatFactory final : public ext::singleton<FormatFactory>
     /// It's initial purpose was to extract payload for virtual columns from Kafka Consumer ReadBuffer.
     using ReadCallback = std::function<void()>;
 
+    /// This callback allows to perform some additional actions after writing a single row.
+    /// It's initial purpose was to flush Kafka message for each row.
+    using WriteCallback = std::function<void()>;
+
 private:
     using InputCreator = std::function<BlockInputStreamPtr(
         ReadBuffer & buf,
@@ -55,6 +59,7 @@ class FormatFactory final : public ext::singleton<FormatFactory>
         WriteBuffer & buf,
         const Block & sample,
         const Context & context,
+        WriteCallback callback,
         const FormatSettings & settings)>;
 
     using InputProcessorCreator = std::function<InputFormatPtr(
@@ -68,6 +73,7 @@ class FormatFactory final : public ext::singleton<FormatFactory>
             WriteBuffer & buf,
             const Block & sample,
             const Context & context,
+            WriteCallback callback,
             const FormatSettings & settings)>;
 
     struct Creators
@@ -91,7 +97,7 @@ class FormatFactory final : public ext::singleton<FormatFactory>
         ReadCallback callback = {}) const;
 
     BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
-        const Block & sample, const Context & context) const;
+        const Block & sample, const Context & context, WriteCallback callback = {}) const;
 
     InputFormatPtr getInputFormat(
         const String & name,
@@ -102,8 +108,8 @@ class FormatFactory final : public ext::singleton<FormatFactory>
         UInt64 rows_portion_size = 0,
         ReadCallback callback = {}) const;
 
-    OutputFormatPtr getOutputFormat(const String & name, WriteBuffer & buf,
-        const Block & sample, const Context & context) const;
+    OutputFormatPtr getOutputFormat(
+        const String & name, WriteBuffer & buf, const Block & sample, const Context & context, WriteCallback callback = {}) const;
 
     /// Register format by its name.
     void registerInputFormat(const String & name, InputCreator input_creator);

diff --git a/dbms/src/Formats/NativeFormat.cpp b/dbms/src/Formats/NativeFormat.cpp
@@ -27,6 +27,7 @@ void registerOutputFormatNative(FormatFactory & factory)
         WriteBuffer & buf,
         const Block & sample,
         const Context &,
+        FormatFactory::WriteCallback,
         const FormatSettings &)
     {
         return std::make_shared<NativeBlockOutputStream>(buf, 0, sample);

diff --git a/dbms/src/Formats/NullFormat.cpp b/dbms/src/Formats/NullFormat.cpp
@@ -11,6 +11,7 @@ void registerOutputFormatNull(FormatFactory & factory)
         WriteBuffer &,
         const Block & sample,
         const Context &,
+        FormatFactory::WriteCallback,
         const FormatSettings &)
     {
         return std::make_shared<NullBlockOutputStream>(sample);

diff --git a/dbms/src/Formats/tests/block_row_transforms.cpp b/dbms/src/Formats/tests/block_row_transforms.cpp
@@ -14,7 +14,6 @@
 
 #include <Formats/TabSeparatedRowInputStream.h>
 #include <Formats/BlockInputStreamFromRowInputStream.h>
-#include <Formats/BlockOutputStreamFromRowOutputStream.h>
 
 #include <DataStreams/copyData.h>
 #include <Processors/Formats/Impl/TabSeparatedRowOutputFormat.h>
@@ -47,7 +46,7 @@ try
 
     RowInputStreamPtr row_input = std::make_shared<TabSeparatedRowInputStream>(in_buf, sample, false, false, format_settings);
     BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings);
-    BlockOutputStreamPtr block_output = std::make_shared<OutputStreamToOutputFormat>(std::make_shared<TabSeparatedRowOutputFormat>(out_buf, sample, false, false, format_settings));
+    BlockOutputStreamPtr block_output = std::make_shared<OutputStreamToOutputFormat>(std::make_shared<TabSeparatedRowOutputFormat>(out_buf, sample, false, false, []{}, format_settings));
 
     copyData(block_input, *block_output);
 }

diff --git a/dbms/src/Formats/tests/tab_separated_streams.cpp b/dbms/src/Formats/tests/tab_separated_streams.cpp
@@ -11,7 +11,6 @@
 
 #include <Formats/TabSeparatedRowInputStream.h>
 #include <Formats/BlockInputStreamFromRowInputStream.h>
-#include <Formats/BlockOutputStreamFromRowOutputStream.h>
 
 #include <DataStreams/copyData.h>
 #include <Processors/Formats/OutputStreamToOutputFormat.h>
@@ -44,7 +43,7 @@ try
     BlockInputStreamFromRowInputStream block_input(row_input, sample, DEFAULT_INSERT_BLOCK_SIZE, 0, []{}, format_settings);
 
     BlockOutputStreamPtr block_output = std::make_shared<OutputStreamToOutputFormat>(
-            std::make_shared<TabSeparatedRowOutputFormat>(out_buf, sample, false, false, format_settings));
+        std::make_shared<TabSeparatedRowOutputFormat>(out_buf, sample, false, false, [] {}, format_settings));
 
     copyData(block_input, *block_output);
     return 0;

diff --git a/dbms/src/IO/MemoryReadWriteBuffer.h b/dbms/src/IO/MemoryReadWriteBuffer.h
@@ -11,7 +11,7 @@
 namespace DB
 {
 
-/// Stores data in memory chunks, size of cunks are exponentially increasing during write
+/// Stores data in memory chunks, size of chunks are exponentially increasing during write
 /// Written data could be reread after write
 class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost::noncopyable, private Allocator<false>
 {

diff --git a/dbms/src/IO/WriteBuffer.h b/dbms/src/IO/WriteBuffer.h
@@ -35,7 +35,7 @@ class WriteBuffer : public BufferBase
       */
     inline void next()
     {
-        if (!offset())
+        if (!offset() && available())
             return;
         bytes += offset();
 

diff --git a/dbms/src/Processors/Formats/IRowOutputFormat.cpp b/dbms/src/Processors/Formats/IRowOutputFormat.cpp
@@ -20,6 +20,9 @@ void IRowOutputFormat::consume(DB::Chunk chunk)
         first_row = false;
 
         write(columns, row);
+
+        if (write_single_row_callback)
+            write_single_row_callback();
     }
 }
 
@@ -96,6 +99,3 @@ void IRowOutputFormat::writeTotals(const DB::Columns & columns, size_t row_num)
 }
 
 }
-
-
-
diff --git a/dbms/src/Processors/Formats/IRowOutputFormat.h b/dbms/src/Processors/Formats/IRowOutputFormat.h
@@ -1,8 +1,10 @@
 #pragma once
 
-#include <string>
+#include <Formats/FormatFactory.h>
 #include <Processors/Formats/IOutputFormat.h>
 
+#include <string>
+
 
 namespace DB
 {
@@ -22,8 +24,8 @@ class IRowOutputFormat : public IOutputFormat
     void finalize() override;
 
 public:
-    IRowOutputFormat(const Block & header, WriteBuffer & out_)
-        : IOutputFormat(header, out_), types(header.getDataTypes())
+    IRowOutputFormat(const Block & header, WriteBuffer & out_, FormatFactory::WriteCallback callback)
+        : IOutputFormat(header, out_), types(header.getDataTypes()), write_single_row_callback(callback)
     {
     }
 
@@ -57,6 +59,9 @@ class IRowOutputFormat : public IOutputFormat
     bool prefix_written = false;
     bool suffix_written = false;
 
+    // Callback used to indicate that another row is written.
+    FormatFactory::WriteCallback write_single_row_callback;
+
     void writePrefixIfNot()
     {
         if (!prefix_written)
@@ -76,5 +81,3 @@ class IRowOutputFormat : public IOutputFormat
 };
 
 }
-
-
diff --git a/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.cpp
@@ -9,8 +9,8 @@
 namespace DB
 {
 
-BinaryRowOutputFormat::BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_)
-    : IRowOutputFormat(header, out_), with_names(with_names_), with_types(with_types_)
+BinaryRowOutputFormat::BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, FormatFactory::WriteCallback callback)
+    : IRowOutputFormat(header, out_, callback), with_names(with_names_), with_types(with_types_)
 {
 }
 
@@ -53,18 +53,20 @@ void registerOutputFormatProcessorRowBinary(FormatFactory & factory)
         WriteBuffer & buf,
         const Block & sample,
         const Context &,
+        FormatFactory::WriteCallback callback,
         const FormatSettings &)
     {
-        return std::make_shared<BinaryRowOutputFormat>(buf, sample, false, false);
+        return std::make_shared<BinaryRowOutputFormat>(buf, sample, false, false, callback);
     });
 
     factory.registerOutputFormatProcessor("RowBinaryWithNamesAndTypes", [](
         WriteBuffer & buf,
         const Block & sample,
         const Context &,
+        FormatFactory::WriteCallback callback,
         const FormatSettings &)
     {
-        return std::make_shared<BinaryRowOutputFormat>(buf, sample, true, true);
+        return std::make_shared<BinaryRowOutputFormat>(buf, sample, true, true, callback);
     });
 }
 

diff --git a/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.h b/dbms/src/Processors/Formats/Impl/BinaryRowOutputFormat.h
@@ -17,7 +17,7 @@ class WriteBuffer;
 class BinaryRowOutputFormat: public IRowOutputFormat
 {
 public:
-    BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_);
+    BinaryRowOutputFormat(WriteBuffer & out_, const Block & header, bool with_names_, bool with_types_, FormatFactory::WriteCallback callback);
 
     String getName() const override { return "BinaryRowOutputFormat"; }
 
@@ -32,4 +32,3 @@ class BinaryRowOutputFormat: public IRowOutputFormat
 };
 
 }
-
diff --git a/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp b/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.cpp
@@ -8,8 +8,8 @@ namespace DB
 {
 
 
-CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, const FormatSettings & format_settings_)
-    : IRowOutputFormat(header_, out_), with_names(with_names_), format_settings(format_settings_)
+CSVRowOutputFormat::CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, FormatFactory::WriteCallback callback, const FormatSettings & format_settings_)
+    : IRowOutputFormat(header_, out_, callback), with_names(with_names_), format_settings(format_settings_)
 {
     auto & sample = getPort(PortKind::Main).getHeader();
     size_t columns = sample.columns();
@@ -77,9 +77,10 @@ void registerOutputFormatProcessorCSV(FormatFactory & factory)
             WriteBuffer & buf,
             const Block & sample,
             const Context &,
+            FormatFactory::WriteCallback callback,
             const FormatSettings & format_settings)
         {
-                return std::make_shared<CSVRowOutputFormat>(buf, sample, with_names, format_settings);
+                return std::make_shared<CSVRowOutputFormat>(buf, sample, with_names, callback, format_settings);
         });
     }
 }

diff --git a/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.h b/dbms/src/Processors/Formats/Impl/CSVRowOutputFormat.h
@@ -20,7 +20,7 @@ class CSVRowOutputFormat : public IRowOutputFormat
     /** with_names - output in the first line a header with column names
       * with_types - output in the next line header with the names of the types
       */
-    CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, const FormatSettings & format_settings_);
+    CSVRowOutputFormat(WriteBuffer & out_, const Block & header_, bool with_names_, FormatFactory::WriteCallback callback, const FormatSettings & format_settings_);
 
     String getName() const override { return "CSVRowOutputFormat"; }
 
@@ -45,4 +45,3 @@ class CSVRowOutputFormat : public IRowOutputFormat
 };
 
 }
-