Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function arrayFill and arrayReverseFill #7380

Merged
merged 9 commits into from
Nov 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 4 additions & 4 deletions dbms/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
Original file line number Diff line number Diff line change
Expand Up @@ -673,15 +673,15 @@ struct AggregateFunctionAnyHeavyData : Data
};


template <typename Data, bool AllocatesMemoryInArena>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>>
template <typename Data, bool use_arena>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, use_arena>>
{
private:
DataTypePtr & type;

public:
AggregateFunctionsSingleValue(const DataTypePtr & type_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>>({type_}, {})
: IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data, use_arena>>({type_}, {})
, type(this->argument_types[0])
{
if (StringRef(Data::name()) == StringRef("min")
Expand Down Expand Up @@ -722,7 +722,7 @@ class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<

bool allocatesMemoryInArena() const override
{
return AllocatesMemoryInArena;
return use_arena;
}

void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override
Expand Down
16 changes: 8 additions & 8 deletions dbms/src/AggregateFunctions/AggregateFunctionQuantile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ namespace ErrorCodes
namespace
{

template <typename Value, bool FloatReturn> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<FloatReturn, Float64, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<FloatReturn, Float64, void>, true>;
template <typename Value, bool float_return> using FuncQuantile = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantile, false, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantiles = AggregateFunctionQuantile<Value, QuantileReservoirSampler<Value>, NameQuantiles, false, std::conditional_t<float_return, Float64, void>, true>;

template <typename Value, bool FloatReturn> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<FloatReturn, Float64, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<FloatReturn, Float64, void>, true>;
template <typename Value, bool float_return> using FuncQuantileDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantileDeterministic, true, std::conditional_t<float_return, Float64, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesDeterministic = AggregateFunctionQuantile<Value, QuantileReservoirSamplerDeterministic<Value>, NameQuantilesDeterministic, true, std::conditional_t<float_return, Float64, void>, true>;

template <typename Value, bool _> using FuncQuantileExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantileExact, false, void, false>;
template <typename Value, bool _> using FuncQuantilesExact = AggregateFunctionQuantile<Value, QuantileExact<Value>, NameQuantilesExact, false, void, true>;
Expand All @@ -40,11 +40,11 @@ template <typename Value, bool _> using FuncQuantilesTiming = AggregateFunctionQ
template <typename Value, bool _> using FuncQuantileTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantileTimingWeighted, true, Float32, false>;
template <typename Value, bool _> using FuncQuantilesTimingWeighted = AggregateFunctionQuantile<Value, QuantileTiming<Value>, NameQuantilesTimingWeighted, true, Float32, true>;

template <typename Value, bool FloatReturn> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<FloatReturn, Float32, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<FloatReturn, Float32, void>, true>;
template <typename Value, bool float_return> using FuncQuantileTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigest, false, std::conditional_t<float_return, Float32, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesTDigest = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigest, false, std::conditional_t<float_return, Float32, void>, true>;

template <typename Value, bool FloatReturn> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<FloatReturn, Float32, void>, false>;
template <typename Value, bool FloatReturn> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<FloatReturn, Float32, void>, true>;
template <typename Value, bool float_return> using FuncQuantileTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantileTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, false>;
template <typename Value, bool float_return> using FuncQuantilesTDigestWeighted = AggregateFunctionQuantile<Value, QuantileTDigest<Value>, NameQuantilesTDigestWeighted, true, std::conditional_t<float_return, Float32, void>, true>;


template <template <typename, bool> class Function>
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/AggregateFunctions/ReservoirSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ namespace ReservoirSamplerOnEmpty
};
}

template <typename ResultType, bool IsFloatingPoint>
template <typename ResultType, bool is_float>
struct NanLikeValueConstructor
{
static ResultType getValue()
Expand Down
4 changes: 2 additions & 2 deletions dbms/src/Functions/FunctionBinaryArithmetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ struct BinaryOperationTraits
};


template <template <typename, typename> class Op, typename Name, bool CanBeExecutedOnDefaultArguments = true>
template <template <typename, typename> class Op, typename Name, bool valid_on_default_arguments = true>
class FunctionBinaryArithmetic : public IFunction
{
const Context & context;
Expand Down Expand Up @@ -944,7 +944,7 @@ class FunctionBinaryArithmetic : public IFunction
}
#endif

bool canBeExecutedOnDefaultArguments() const override { return CanBeExecutedOnDefaultArguments; }
bool canBeExecutedOnDefaultArguments() const override { return valid_on_default_arguments; }
};

}
38 changes: 19 additions & 19 deletions dbms/src/Functions/FunctionsStringSimilarity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace DB
* calculation. If the right string size is big (more than 2**15 bytes),
* the strings are not similar at all and we return 1.
*/
template <size_t N, class CodePoint, bool UTF8, bool CaseInsensitive, bool Symmetric>
template <size_t N, class CodePoint, bool UTF8, bool case_insensitive, bool symmetric>
struct NgramDistanceImpl
{
using ResultType = Float32;
Expand Down Expand Up @@ -93,7 +93,7 @@ struct NgramDistanceImpl
/// Such copying allow us to have 3 codepoints from the previous read to produce the 4-grams with them.
memcpy(code_points + (N - 1), pos, default_padding * sizeof(CodePoint));

if constexpr (CaseInsensitive)
if constexpr (case_insensitive)
{
/// We really need template lambdas with C++20 to do it inline
unrollLowering<N - 1>(code_points, std::make_index_sequence<padding_offset>());
Expand Down Expand Up @@ -141,7 +141,7 @@ struct NgramDistanceImpl
/// And first bit of first byte if there are two bytes.
/// For ASCII it works https://catonmat.net/ascii-case-conversion-trick. For most cyrrilic letters also does.
/// For others, we don't care now. Lowering UTF is not a cheap operation.
if constexpr (CaseInsensitive)
if constexpr (case_insensitive)
{
switch (length)
{
Expand All @@ -166,7 +166,7 @@ struct NgramDistanceImpl
return num;
}

template <bool SaveNgrams>
template <bool save_ngrams>
static ALWAYS_INLINE inline size_t calculateNeedleStats(
const char * data,
const size_t size,
Expand All @@ -189,7 +189,7 @@ struct NgramDistanceImpl
{
++len;
UInt16 hash = hash_functor(cp + i);
if constexpr (SaveNgrams)
if constexpr (save_ngrams)
*ngram_storage++ = hash;
++ngram_stats[hash];
}
Expand All @@ -199,7 +199,7 @@ struct NgramDistanceImpl
return len;
}

template <bool ReuseStats>
template <bool reuse_stats>
static ALWAYS_INLINE inline UInt64 calculateHaystackStatsAndMetric(
const char * data,
const size_t size,
Expand Down Expand Up @@ -227,9 +227,9 @@ struct NgramDistanceImpl
/// For symmetric version we should add when we can't subtract to get symmetric difference.
if (static_cast<Int16>(ngram_stats[hash]) > 0)
--distance;
else if constexpr (Symmetric)
else if constexpr (symmetric)
++distance;
if constexpr (ReuseStats)
if constexpr (reuse_stats)
ngram_storage[ngram_cnt] = hash;
++ngram_cnt;
--ngram_stats[hash];
Expand All @@ -238,7 +238,7 @@ struct NgramDistanceImpl
} while (start < end && (found = read_code_points(cp, start, end)));

/// Return the state of hash map to its initial.
if constexpr (ReuseStats)
if constexpr (reuse_stats)
{
for (size_t i = 0; i < ngram_cnt; ++i)
++ngram_stats[ngram_storage[i]];
Expand Down Expand Up @@ -270,15 +270,15 @@ struct NgramDistanceImpl
if (data_size <= max_string_size)
{
size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric<false>, data.data(), data_size, common_stats, distance, nullptr);
/// For !Symmetric version we should not use first_size.
if constexpr (Symmetric)
/// For !symmetric version we should not use first_size.
if constexpr (symmetric)
res = distance * 1.f / std::max(first_size + second_size, size_t(1));
else
res = 1.f - distance * 1.f / std::max(second_size, size_t(1));
}
else
{
if constexpr (Symmetric)
if constexpr (symmetric)
res = 1.f;
else
res = 0.f;
Expand Down Expand Up @@ -338,8 +338,8 @@ struct NgramDistanceImpl
/// For now, common stats is a zero array.


/// For !Symmetric version we should not use haystack_stats_size.
if constexpr (Symmetric)
/// For !symmetric version we should not use haystack_stats_size.
if constexpr (symmetric)
res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
else
res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1));
Expand All @@ -348,7 +348,7 @@ struct NgramDistanceImpl
{
/// Strings are too big, we are assuming they are not the same. This is done because of limiting number
/// of bigrams added and not allocating too much memory.
if constexpr (Symmetric)
if constexpr (symmetric)
res[i] = 1.f;
else
res[i] = 0.f;
Expand All @@ -366,7 +366,7 @@ struct NgramDistanceImpl
PaddedPODArray<Float32> & res)
{
/// For symmetric version it is better to use vector_constant
if constexpr (Symmetric)
if constexpr (symmetric)
{
vector_constant(needle_data, needle_offsets, std::move(haystack), res);
}
Expand Down Expand Up @@ -457,16 +457,16 @@ struct NgramDistanceImpl
haystack_size, common_stats,
distance,
ngram_storage.get());
/// For !Symmetric version we should not use haystack_stats_size.
if constexpr (Symmetric)
/// For !symmetric version we should not use haystack_stats_size.
if constexpr (symmetric)
res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, size_t(1));
else
res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, size_t(1));
}
else
{
/// if the strings are too big, we say they are completely not the same
if constexpr (Symmetric)
if constexpr (symmetric)
res[i] = 1.f;
else
res[i] = 0.f;
Expand Down
8 changes: 4 additions & 4 deletions dbms/src/Functions/Regexps.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ namespace MultiRegexps
std::map<std::pair<std::vector<String>, std::optional<UInt32>>, Regexps> storage;
};

template <bool SaveIndices, bool CompileForEditDistance>
template <bool save_indices, bool CompileForEditDistance>
inline Regexps constructRegexps(const std::vector<String> & str_patterns, std::optional<UInt32> edit_distance)
{
(void)edit_distance;
Expand Down Expand Up @@ -165,7 +165,7 @@ namespace MultiRegexps
std::unique_ptr<unsigned int[]> ids;

/// We mark the patterns to provide the callback results.
if constexpr (SaveIndices)
if constexpr (save_indices)
{
ids.reset(new unsigned int[patterns.size()]);
for (size_t i = 0; i < patterns.size(); ++i)
Expand Down Expand Up @@ -226,7 +226,7 @@ namespace MultiRegexps
/// Also, we use templates here because each instantiation of function
/// template has its own copy of local static variables which must not be the same
/// for different hyperscan compilations.
template <bool SaveIndices, bool CompileForEditDistance>
template <bool save_indices, bool CompileForEditDistance>
inline Regexps * get(const std::vector<StringRef> & patterns, std::optional<UInt32> edit_distance)
{
/// C++11 has thread-safe function-local statics on most modern compilers.
Expand All @@ -247,7 +247,7 @@ namespace MultiRegexps
it = known_regexps.storage
.emplace(
std::pair{str_patterns, edit_distance},
constructRegexps<SaveIndices, CompileForEditDistance>(str_patterns, edit_distance))
constructRegexps<save_indices, CompileForEditDistance>(str_patterns, edit_distance))
.first;
/// If found, unlock and return the database.
lock.unlock();
Expand Down
126 changes: 126 additions & 0 deletions dbms/src/Functions/array/arrayFill.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnsNumber.h>
#include "FunctionArrayMapped.h"
#include <Functions/FunctionFactory.h>


namespace DB
{

template <bool reverse>
struct ArrayFillImpl
{
static bool needBoolean() { return true; }
static bool needExpression() { return true; }
static bool needOneArray() { return false; }

static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypePtr & array_element)
{
return std::make_shared<DataTypeArray>(array_element);
}

static ColumnPtr execute(const ColumnArray & array, ColumnPtr mapped)
{
const ColumnUInt8 * column_fill = typeid_cast<const ColumnUInt8 *>(&*mapped);

const IColumn & in_data = array.getData();
const IColumn::Offsets & in_offsets = array.getOffsets();
auto column_data = in_data.cloneEmpty();
IColumn & out_data = *column_data.get();

if (column_fill)
{
const IColumn::Filter & fill = column_fill->getData();

size_t array_begin = 0;
size_t array_end = 0;
size_t begin = 0;
size_t end = 0;

out_data.reserve(in_data.size());

for (size_t i = 0; i < in_offsets.size(); ++i)
{
array_end = in_offsets[i] - 1;

for (; end <= array_end; ++end)
{
if (end == array_end || fill[end + 1] != fill[begin])
{
if (fill[begin])
out_data.insertRangeFrom(in_data, begin, end + 1 - begin);
else
{
if constexpr (reverse)
{
if (end == array_end)
out_data.insertManyFrom(in_data, array_end, end + 1 - begin);
else
out_data.insertManyFrom(in_data, end + 1, end + 1 - begin);
}
else
{
if (begin == array_begin)
out_data.insertManyFrom(in_data, array_begin, end + 1 - begin);
else
out_data.insertManyFrom(in_data, begin - 1, end + 1 - begin);
}
}

begin = end + 1;
}
}

array_begin = array_end + 1;
}
}
else
{
auto column_fill_const = checkAndGetColumnConst<ColumnUInt8>(&*mapped);

if (!column_fill_const)
throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN);

if (column_fill_const->getValue<UInt8>())
return ColumnArray::create(
array.getDataPtr(),
array.getOffsetsPtr()
);

size_t array_begin = 0;
size_t array_end = 0;

out_data.reserve(in_data.size());

for (size_t i = 0; i < in_offsets.size(); ++i)
{
array_end = in_offsets[i] - 1;

if constexpr (reverse)
out_data.insertManyFrom(in_data, array_end, array_end + 1 - array_begin);
else
out_data.insertManyFrom(in_data, array_begin, array_end + 1 - array_begin);

array_begin = array_end + 1;
}
}

return ColumnArray::create(
std::move(column_data),
array.getOffsetsPtr()
);
}
};

struct NameArrayFill { static constexpr auto name = "arrayFill"; };
struct NameArrayReverseFill { static constexpr auto name = "arrayReverseFill"; };
using FunctionArrayFill = FunctionArrayMapped<ArrayFillImpl<false>, NameArrayFill>;
using FunctionArrayReverseFill = FunctionArrayMapped<ArrayFillImpl<true>, NameArrayReverseFill>;

void registerFunctionsArrayFill(FunctionFactory & factory)
{
factory.registerFunction<FunctionArrayFill>();
factory.registerFunction<FunctionArrayReverseFill>();
}

}