Skip to content

Commit 518d8fc

Browse files
committed
Fix simple data types (WIP)
* Removed the use of gperf in favor of using a statically allocated hash table. This fixes the problem of having the value type information in multiple files.
1 parent f0dd800 commit 518d8fc

33 files changed

+579
-837
lines changed

include/cassandra.h

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -530,37 +530,37 @@ typedef enum CassIndexType_ {
530530
} CassIndexType;
531531

532532
#define CASS_VALUE_TYPE_MAPPING(XX) \
533-
XX(CASS_VALUE_TYPE_CUSTOM, 0x0000, "") \
534-
XX(CASS_VALUE_TYPE_ASCII, 0x0001, "ascii") \
535-
XX(CASS_VALUE_TYPE_BIGINT, 0x0002, "bigint") \
536-
XX(CASS_VALUE_TYPE_BLOB, 0x0003, "blob") \
537-
XX(CASS_VALUE_TYPE_BOOLEAN, 0x0004, "boolean") \
538-
XX(CASS_VALUE_TYPE_COUNTER, 0x0005, "counter") \
539-
XX(CASS_VALUE_TYPE_DECIMAL, 0x0006, "decimal") \
540-
XX(CASS_VALUE_TYPE_DOUBLE, 0x0007, "double") \
541-
XX(CASS_VALUE_TYPE_FLOAT, 0x0008, "float") \
542-
XX(CASS_VALUE_TYPE_INT, 0x0009, "int") \
543-
XX(CASS_VALUE_TYPE_TEXT, 0x000A, "text") \
544-
XX(CASS_VALUE_TYPE_TIMESTAMP, 0x000B, "timestamp") \
545-
XX(CASS_VALUE_TYPE_UUID, 0x000C, "uuid") \
546-
XX(CASS_VALUE_TYPE_VARCHAR, 0x000D, "varchar") \
547-
XX(CASS_VALUE_TYPE_VARINT, 0x000E, "varint") \
548-
XX(CASS_VALUE_TYPE_TIMEUUID, 0x000F, "timeuuid") \
549-
XX(CASS_VALUE_TYPE_INET, 0x0010, "inet") \
550-
XX(CASS_VALUE_TYPE_DATE, 0x0011, "date") \
551-
XX(CASS_VALUE_TYPE_TIME, 0x0012, "time") \
552-
XX(CASS_VALUE_TYPE_SMALL_INT, 0x0013, "smallint") \
553-
XX(CASS_VALUE_TYPE_TINY_INT, 0x0014, "tinyint") \
554-
XX(CASS_VALUE_TYPE_DURATION, 0x0015, "duration") \
555-
XX(CASS_VALUE_TYPE_LIST, 0x0020, "list") \
556-
XX(CASS_VALUE_TYPE_MAP, 0x0021, "map") \
557-
XX(CASS_VALUE_TYPE_SET, 0x0022, "set") \
558-
XX(CASS_VALUE_TYPE_UDT, 0x0030, "") \
559-
XX(CASS_VALUE_TYPE_TUPLE, 0x0031, "tuple")
533+
XX(CASS_VALUE_TYPE_CUSTOM, 0x0000, "", "") \
534+
XX(CASS_VALUE_TYPE_ASCII, 0x0001, "ascii", "org.apache.cassandra.db.marshal.AsciiType") \
535+
XX(CASS_VALUE_TYPE_BIGINT, 0x0002, "bigint", "org.apache.cassandra.db.marshal.LongType") \
536+
XX(CASS_VALUE_TYPE_BLOB, 0x0003, "blob", "org.apache.cassandra.db.marshal.BytesType") \
537+
XX(CASS_VALUE_TYPE_BOOLEAN, 0x0004, "boolean", "org.apache.cassandra.db.marshal.BooleanType") \
538+
XX(CASS_VALUE_TYPE_COUNTER, 0x0005, "counter", "org.apache.cassandra.db.marshal.CounterColumnType") \
539+
XX(CASS_VALUE_TYPE_DECIMAL, 0x0006, "decimal", "org.apache.cassandra.db.marshal.DecimalType") \
540+
XX(CASS_VALUE_TYPE_DOUBLE, 0x0007, "double", "org.apache.cassandra.db.marshal.DoubleType") \
541+
XX(CASS_VALUE_TYPE_FLOAT, 0x0008, "float", "org.apache.cassandra.db.marshal.FloatType") \
542+
XX(CASS_VALUE_TYPE_INT, 0x0009, "int", "org.apache.cassandra.db.marshal.Int32Type") \
543+
XX(CASS_VALUE_TYPE_TEXT, 0x000A, "text", "org.apache.cassandra.db.marshal.UTF8Type") \
544+
XX(CASS_VALUE_TYPE_TIMESTAMP, 0x000B, "timestamp", "org.apache.cassandra.db.marshal.TimestampType") \
545+
XX(CASS_VALUE_TYPE_UUID, 0x000C, "uuid", "org.apache.cassandra.db.marshal.UUIDType") \
546+
XX(CASS_VALUE_TYPE_VARCHAR, 0x000D, "varchar", "") \
547+
XX(CASS_VALUE_TYPE_VARINT, 0x000E, "varint", "org.apache.cassandra.db.marshal.IntegerType") \
548+
XX(CASS_VALUE_TYPE_TIMEUUID, 0x000F, "timeuuid", "org.apache.cassandra.db.marshal.TimeUUIDType") \
549+
XX(CASS_VALUE_TYPE_INET, 0x0010, "inet", "org.apache.cassandra.db.marshal.InetAddressType") \
550+
XX(CASS_VALUE_TYPE_DATE, 0x0011, "date", "org.apache.cassandra.db.marshal.SimpleDateType") \
551+
XX(CASS_VALUE_TYPE_TIME, 0x0012, "time", "org.apache.cassandra.db.marshal.TimeType") \
552+
XX(CASS_VALUE_TYPE_SMALL_INT, 0x0013, "smallint", "org.apache.cassandra.db.marshal.ShortType") \
553+
XX(CASS_VALUE_TYPE_TINY_INT, 0x0014, "tinyint", "org.apache.cassandra.db.marshal.ByteType") \
554+
XX(CASS_VALUE_TYPE_DURATION, 0x0015, "duration", "org.apache.cassandra.db.marshal.DurationType") \
555+
XX(CASS_VALUE_TYPE_LIST, 0x0020, "list", "org.apache.cassandra.db.marshal.ListType") \
556+
XX(CASS_VALUE_TYPE_MAP, 0x0021, "map", "org.apache.cassandra.db.marshal.MapType") \
557+
XX(CASS_VALUE_TYPE_SET, 0x0022, "set", "org.apache.cassandra.db.marshal.SetType") \
558+
XX(CASS_VALUE_TYPE_UDT, 0x0030, "", "") \
559+
XX(CASS_VALUE_TYPE_TUPLE, 0x0031, "tuple", "org.apache.cassandra.db.marshal.TupleType")
560560

561561
typedef enum CassValueType_ {
562562
CASS_VALUE_TYPE_UNKNOWN = 0xFFFF,
563-
#define XX_VALUE_TYPE(name, type, cql) name = type,
563+
#define XX_VALUE_TYPE(name, type, cql, klass) name = type,
564564
CASS_VALUE_TYPE_MAPPING(XX_VALUE_TYPE)
565565
#undef XX_VALUE_TYPE
566566
/* @cond IGNORE */

src/aligned_storage.hpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,16 @@ namespace cass {
3737
// constructor.
3838

3939
template<size_t N, size_t A>
40-
struct AlignedStorage;
40+
class AlignedStorage;
4141

42-
#define ALIGNED_STORAGE(Alignment) \
43-
template<size_t N> \
44-
struct AlignedStorage<N, Alignment> { \
45-
void* address() { return data_; } \
46-
const void* address() const { return data_; } \
47-
template<class T> \
48-
T* as() { return reinterpret_cast<T*>(data_); } \
49-
template<class T> \
50-
const T* as() const { return reinterpret_cast<const T*>(data_); } \
51-
private: \
52-
ALIGN_AS(Alignment) char data_[N]; \
42+
#define ALIGNED_STORAGE(Alignment) \
43+
template<size_t N> \
44+
class AlignedStorage<N, Alignment> { \
45+
public: \
46+
void* address() { return data_; } \
47+
const void* address() const { return data_; } \
48+
private: \
49+
ALIGN_AS(Alignment) char data_[N]; \
5350
}
5451

5552
ALIGNED_STORAGE(1);

src/connection.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1129,7 +1129,7 @@ void Connection::PendingWriteSsl::flush() {
11291129

11301130
encrypt();
11311131

1132-
FixedVector<uv_buf_t, SSL_ENCRYPTED_BUFS_COUNT> bufs;
1132+
SmallVector<uv_buf_t, SSL_ENCRYPTED_BUFS_COUNT> bufs;
11331133
encrypted_size_ = ssl_session->outgoing().peek_multiple(prev_pos, &bufs);
11341134

11351135
LOG_TRACE("Sending %u encrypted bytes", static_cast<unsigned int>(encrypted_size_));

src/data_type.cpp

Lines changed: 55 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,12 @@
1616

1717
#include "data_type.hpp"
1818

19-
#include "aligned_storage.hpp"
2019
#include "collection.hpp"
2120
#include "external.hpp"
2221
#include "tuple.hpp"
2322
#include "types.hpp"
2423
#include "utils.hpp"
2524
#include "user_type_value.hpp"
26-
#include "value_types_by_class.hpp"
27-
#include "value_types_by_cql.hpp"
2825

2926
#include <string.h>
3027

@@ -387,49 +384,74 @@ namespace cass {
387384

388385
const DataType::ConstPtr DataType::NIL;
389386

390-
SimpleDataTypeCache::SimpleDataTypeCache() {
391-
// Add a reference so that the static memory is never deleted by the heap
392-
// allocator.
393-
#define XX_VALUE_TYPE(name, type, cql) \
394-
for (int i = 0; i < COPIES_PER_SIMPLE_DATA_TYPE; ++i) { \
395-
(new(&DataType::data_types_[name][i]) DataType(name))->inc_ref(); \
396-
} \
387+
DataType::ConstPtr DataType::create_by_class(StringRef name) {
388+
CassValueType value_type = ValueTypes::by_class(name);
389+
if (value_type == CASS_VALUE_TYPE_UNKNOWN) {
390+
return DataType::NIL;
391+
}
392+
return ConstPtr(new DataType(value_type));
393+
}
394+
395+
DataType::ConstPtr DataType::create_by_cql(StringRef name) {
396+
CassValueType value_type = ValueTypes::by_cql(name);
397+
if (value_type == CASS_VALUE_TYPE_UNKNOWN) {
398+
return DataType::NIL;
399+
}
400+
return ConstPtr(new DataType(value_type));
401+
}
402+
403+
ValueTypes::HashMap ValueTypes::value_types_by_class_;
404+
ValueTypes::HashMap ValueTypes::value_types_by_cql_;
405+
406+
static ValueTypes __value_types__; // Initializer
407+
408+
ValueTypes::ValueTypes() {
409+
value_types_by_class_.set_empty_key("");
410+
value_types_by_cql_.set_empty_key("");
411+
412+
#define XX_VALUE_TYPE(name, type, cql, klass) \
413+
if (strlen(klass) > 0) value_types_by_class_[klass] = name; \
414+
if (strlen(cql) > 0) value_types_by_cql_[cql] = name;
415+
397416
CASS_VALUE_TYPE_MAPPING(XX_VALUE_TYPE)
398417
#undef XX_VALUE_TYPE
399-
400418
}
401419

402-
DataType::ConstPtr SimpleDataTypeCache::by_class(StringRef name) {
403-
ValueTypeByClassMapping* mapping =
404-
ValueTypeByClass::in_word_set(name.data(), name.length());
405-
if (mapping == NULL) return DataType::NIL;
406-
return DataType::ConstPtr(get_for_thread(mapping->value_type));
420+
CassValueType ValueTypes::by_class(StringRef name) {
421+
HashMap::const_iterator i = value_types_by_class_.find(name);
422+
if (i == value_types_by_class_.end()) {
423+
return CASS_VALUE_TYPE_UNKNOWN;
424+
}
425+
return i->second;
407426
}
408427

409-
DataType::ConstPtr SimpleDataTypeCache::by_cql(StringRef name) {
410-
ValueTypeByCqlMapping* mapping =
411-
ValueTypeByCql::in_word_set(name.data(), name.length());
412-
if (mapping == NULL) return DataType::NIL;
413-
return DataType::ConstPtr(get_for_thread(mapping->value_type));
428+
CassValueType ValueTypes::by_cql(StringRef name) {
429+
HashMap::const_iterator i = value_types_by_cql_.find(name);
430+
if (i == value_types_by_cql_.end()) {
431+
return CASS_VALUE_TYPE_UNKNOWN;
432+
}
433+
return i->second;
414434
}
415435

416-
DataType::ConstPtr SimpleDataTypeCache::by_value_type(uint16_t value_type) {
436+
const DataType::ConstPtr& SimpleDataTypeCache::by_value_type(uint16_t value_type) {
417437
if (value_type == CASS_VALUE_TYPE_UNKNOWN ||
418-
value_type <= CASS_VALUE_TYPE_LAST_ENTRY) {
438+
value_type == CASS_VALUE_TYPE_CUSTOM ||
439+
value_type == CASS_VALUE_TYPE_LIST ||
440+
value_type == CASS_VALUE_TYPE_MAP ||
441+
value_type == CASS_VALUE_TYPE_SET ||
442+
value_type == CASS_VALUE_TYPE_UDT ||
443+
value_type == CASS_VALUE_TYPE_TUPLE ||
444+
value_type >= CASS_VALUE_TYPE_LAST_ENTRY) {
419445
return DataType::NIL;
420446
}
421-
return DataType::ConstPtr(get_for_thread(value_type));
422-
}
423-
424-
DataType* SimpleDataTypeCache::get_for_thread(uint16_t value_type) {
425-
unsigned long thread_id = get_thread_id() % COPIES_PER_SIMPLE_DATA_TYPE;
426-
return data_types_[value_type][thread_id].as<DataType>();
447+
DataType::ConstPtr& data_type = cache_[value_type];
448+
if (!data_type) {
449+
data_type = DataType::ConstPtr(
450+
new DataType(static_cast<CassValueType>(value_type)));
451+
}
452+
return data_type;
427453
}
428454

429-
SimpleDataTypeCache::Storage SimpleDataTypeCache::data_types_[CASS_VALUE_TYPE_LAST_ENTRY][COPIES_PER_SIMPLE_DATA_TYPE];
430-
431-
static SimpleDataTypeCache __simple_data_type_cache__; // Initializer
432-
433455
bool cass::IsValidDataType<const Collection*>::operator()(const Collection* value,
434456
const DataType::ConstPtr& data_type) const {
435457
return value->data_type()->equals(data_type);

src/data_type.hpp

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "aligned_storage.hpp"
2121
#include "cassandra.h"
2222
#include "external.hpp"
23+
#include "small_dense_hash_map.hpp"
2324
#include "hash_table.hpp"
2425
#include "macros.hpp"
2526
#include "ref_counted.hpp"
@@ -73,6 +74,9 @@ class DataType : public RefCounted<DataType> {
7374

7475
static const DataType::ConstPtr NIL;
7576

77+
static ConstPtr create_by_class(StringRef name);
78+
static ConstPtr create_by_cql(StringRef name);
79+
7680
DataType(CassValueType value_type = CASS_VALUE_TYPE_UNKNOWN,
7781
bool is_frozen = false)
7882
: value_type_(value_type)
@@ -113,7 +117,7 @@ class DataType : public RefCounted<DataType> {
113117

114118
virtual std::string to_string() const {
115119
switch (value_type_) {
116-
#define XX_VALUE_TYPE(name, type, cql) case name: return cql;
120+
#define XX_VALUE_TYPE(name, type, cql, klass) case name: return cql;
117121
CASS_VALUE_TYPE_MAPPING(XX_VALUE_TYPE)
118122
#undef XX_VALUE_TYPE
119123
default: return "";
@@ -432,33 +436,37 @@ class UserType : public DataType {
432436
CaseInsensitiveHashTable<Field> fields_;
433437
};
434438

435-
// A small prime number to limit the amount of thrashing when multiple
436-
// threads simultaneously modify the reference count of shared simple
437-
// data type instances.
438-
#define COPIES_PER_SIMPLE_DATA_TYPE 5
439-
440-
// Allocations have been a bottleneck when handling response metadata.
441-
// This class initializes and retrieves simple data types (e.g. varchar, int
442-
// etc) so these data types don't need to be allocated/deallocated over and over.
443-
// `DataType` is reference counted so it could lead to mulitple threads modifying
444-
// a shared reference count. To mitigate this sharing, thread IDs are used
445-
// to distribute mulitple copies of the same data type.
446-
class SimpleDataTypeCache {
439+
class ValueTypes {
447440
public:
448-
SimpleDataTypeCache();
441+
ValueTypes();
449442

450-
static DataType::ConstPtr by_class(StringRef name);
451-
static DataType::ConstPtr by_cql(StringRef name);
452-
static DataType::ConstPtr by_value_type(uint16_t value_type);
443+
static CassValueType by_class(StringRef name);
444+
static CassValueType by_cql(StringRef name);
453445

454446
private:
455-
static DataType* get_for_thread(uint16_t value_type);
447+
typedef SmallDenseHashMap<StringRef, CassValueType,
448+
CASS_VALUE_TYPE_LAST_ENTRY, // Max size
449+
StringRefIHash,
450+
StringRefIEquals> HashMap;
456451

457-
private:
458-
// Use aligned storage to avoid initializing the same memory multiple times.
459-
typedef AlignedStorage<sizeof(DataType), ALIGN_OF(DataType)> Storage;
452+
static HashMap value_types_by_class_;
453+
static HashMap value_types_by_cql_;
454+
};
455+
456+
class SimpleDataTypeCache {
457+
public:
458+
const DataType::ConstPtr& by_class(StringRef name) {
459+
return by_value_type(ValueTypes::by_class(name));
460+
}
461+
462+
const DataType::ConstPtr& by_cql(StringRef name) {
463+
return by_value_type(ValueTypes::by_cql(name));
464+
}
465+
466+
const DataType::ConstPtr& by_value_type(uint16_t value_type);
460467

461-
static Storage data_types_[CASS_VALUE_TYPE_LAST_ENTRY][COPIES_PER_SIMPLE_DATA_TYPE];
468+
private:
469+
DataType::ConstPtr cache_[CASS_VALUE_TYPE_LAST_ENTRY];
462470
};
463471

464472
template<class T>

0 commit comments

Comments
 (0)