Skip to content
This repository has been archived by the owner on Sep 27, 2019. It is now read-only.

Commit

Permalink
Merge f98294d into 2b56468
Browse files Browse the repository at this point in the history
  • Loading branch information
rohit-agrawal10 committed Apr 23, 2017
2 parents 2b56468 + f98294d commit aedbd2a
Show file tree
Hide file tree
Showing 36 changed files with 907 additions and 205 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -313,3 +313,6 @@ third_party/gflags/

# Test output file logs to ignore
stats_log

test.txt
Vagrantfile
53 changes: 53 additions & 0 deletions CodeReview#1_README.md
@@ -0,0 +1,53 @@
## Files Added

* [compressed_tile.cpp](https://github.com/rohit-cmu/compression-peloton/blob/master/src/storage/compressed_tile.cpp)
* [compressed_tile.h](https://github.com/rohit-cmu/compression-peloton/blob/master/src/include/storage/compressed_tile.h)

### Other Existing Files Modified

Some additional functions have been added to support database compression. Here are the following files:

* [tile.h](https://github.com/rohit-cmu/compression-peloton/blob/master/src/include/storage/tile.h)
* [tile.cpp](https://github.com/rohit-cmu/compression-peloton/blob/master/src/storage/tile.cpp)
* [tile_group.h](https://github.com/rohit-cmu/compression-peloton/blob/master/src/include/storage/tile_group.h)
* [tile_group.cpp](https://github.com/rohit-cmu/compression-peloton/blob/master/src/storage/tile_group.cpp)
* [data_table.cpp](https://github.com/rohit-cmu/compression-peloton/blob/master/src/storage/data_table.cpp)


## Strategy for Compressing

* Once a tile gets full (all slots occupied by tuples), the CompressTile is created.
* We scan each column and sort it.
* We compute the median, min and max, this gives us the minoffset(min-median) and maxoffset(max-median).
* If these offsets can be represented in a smaller data type than the original data type, we compress the entire column.
* The median is chosen as the base, and essentially only the offsets are stored in the column.
* Currently we can compress SMALLINT and INTEGER and BIGINT. TINYINT is already 1 byte so we do not compress it.
* We are yet to add support for decimal values.
* This median is also stored as the metadata to later retrieve the original value

## Testing

* [Compression Correctness Test](https://github.com/rohit-cmu/compression-peloton/blob/master/test/sql/compression_sql_test.cpp)
* [Compression Size Test](https://github.com/rohit-cmu/compression-peloton/blob/master/test/storage/compression_test.cpp)


### Compression Correctness Test:
* This test inserts 25 tuples
* Each tuple is of the form (i, i*100) where i belongs to (0,25)
* Since each tile group contains 1 tile and 10 tuples per tile, there are 3 tile groups formed.
* The first 2 tile groups are full and get compressed.
* The third tile group has 5 slots vacant and is still not full and is uncompressed.
* Thus we now have compressed and uncompressed data.
* We now perform a SELECT * on this and expect to correctly recieve the true value of the compressed data and uncompressed data.

### Compression Size Test:
* This test inserts 100 tuples
* Each tuple is of the form [Integer, Integer, Decimal, Varchar]
* Thus the tuple length is (4+ 4+ 8+ 8) = 24 bytes.
* We currently support the compression of integers
* The integers get compressed to TINYINT making the tuple sizes now (1+ 1+ 8+ 8) = 18 bytes
* The test checks this decrease in size.




11 changes: 8 additions & 3 deletions src/catalog/catalog.cpp
Expand Up @@ -284,6 +284,7 @@ ResultType Catalog::CreatePrimaryIndex(oid_t database_oid, oid_t table_oid,

LOG_TRACE("Trying to create primary index for table %d", table_oid);


try {
auto database = GetDatabaseWithOid(database_oid);
try {
Expand All @@ -301,7 +302,7 @@ ResultType Catalog::CreatePrimaryIndex(oid_t database_oid, oid_t table_oid,
if (column.IsPrimary()) {
key_attrs.push_back(column_idx);
}
column_idx++;
column_idx++;
}

if (key_attrs.empty()) return ResultType::FAILURE;
Expand Down Expand Up @@ -398,7 +399,6 @@ ResultType Catalog::CreateIndex(const std::string &database_name,
ResultType success =
CreateIndex(database_oid, table_oid, index_attr, index_name, index_type,
index_constraint, unique_keys, txn);

return success;
}

Expand Down Expand Up @@ -499,6 +499,7 @@ ResultType Catalog::CreateIndex(oid_t database_oid, oid_t table_oid,
// DROP FUNCTIONS
//===----------------------------------------------------------------------===//


/*
* only for test purposes
*/
Expand Down Expand Up @@ -528,6 +529,7 @@ ResultType Catalog::DropDatabaseWithOid(oid_t database_oid,
return ResultType::FAILURE;
}


// Drop actual tables in the database
auto table_oids =
TableCatalog::GetInstance()->GetTableOids(database_oid, txn);
Expand Down Expand Up @@ -581,11 +583,13 @@ ResultType Catalog::DropTable(const std::string &database_name,
return ResultType::FAILURE;
}


// Checking if statement is valid
oid_t database_oid =
DatabaseCatalog::GetInstance()->GetDatabaseOid(database_name, txn);
if (database_oid == INVALID_OID) {
LOG_TRACE("Cannot find database %s!", database_name.c_str());

return ResultType::FAILURE;
}

Expand Down Expand Up @@ -643,6 +647,7 @@ ResultType Catalog::DropIndex(oid_t index_oid, concurrency::Transaction *txn) {
return ResultType::FAILURE;
}


// find table_oid by looking up pg_index using index_oid
// txn is nullptr, one sentence Transaction
oid_t table_oid = IndexCatalog::GetInstance()->GetTableOid(index_oid, txn);
Expand Down Expand Up @@ -714,7 +719,6 @@ storage::Database *Catalog::GetDatabaseWithName(
if (single_statement_txn) {
txn_manager.CommitTransaction(txn);
}

return GetDatabaseWithOid(database_oid);
}

Expand Down Expand Up @@ -845,6 +849,7 @@ storage::Database *Catalog::GetDatabaseWithOffset(oid_t database_offset) const {
return database;
}


//===--------------------------------------------------------------------===//
// FUNCTION
//===--------------------------------------------------------------------===//
Expand Down
18 changes: 4 additions & 14 deletions src/catalog/manager.cpp
Expand Up @@ -36,49 +36,39 @@ Manager &Manager::GetInstance() {

void Manager::AddTileGroup(const oid_t oid,
std::shared_ptr<storage::TileGroup> location) {

// add/update the catalog reference to the tile group
tile_group_locator_.Update(oid, location);
}

void Manager::DropTileGroup(const oid_t oid) {

// drop the catalog reference to the tile group
tile_group_locator_.Erase(oid, empty_tile_group_);
}

std::shared_ptr<storage::TileGroup> Manager::GetTileGroup(const oid_t oid) {
std::shared_ptr<storage::TileGroup> location;

location = tile_group_locator_.Find(oid);

return location;
}

// used for logging test
void Manager::ClearTileGroup() {

tile_group_locator_.Clear(empty_tile_group_);
}


void Manager::AddIndirectionArray(const oid_t oid,
std::shared_ptr<storage::IndirectionArray> location) {
void Manager::ClearTileGroup() { tile_group_locator_.Clear(empty_tile_group_); }

void Manager::AddIndirectionArray(
const oid_t oid, std::shared_ptr<storage::IndirectionArray> location) {
// add/update the catalog reference to the indirection array
indirection_array_locator_.Update(oid, location);
}

void Manager::DropIndirectionArray(const oid_t oid) {

// drop the catalog reference to the tile group
indirection_array_locator_.Erase(oid, empty_indirection_array_);
}


// used for logging test
void Manager::ClearIndirectionArray() {

indirection_array_locator_.Clear(empty_indirection_array_);
}

Expand Down
68 changes: 35 additions & 33 deletions src/gc/gc_manager.cpp
Expand Up @@ -22,40 +22,42 @@ namespace peloton {
namespace gc {

// Check a tuple and reclaim all varlen field
void GCManager::CheckAndReclaimVarlenColumns(storage::TileGroup *tg, oid_t tuple_id) {
oid_t tile_count = tg->tile_count;
oid_t tile_col_count;
type::Type::TypeId type_id;
char *tuple_location;
char *field_location;
char *varlen_ptr;

for (oid_t tile_itr = 0; tile_itr < tile_count; tile_itr++) {
const catalog::Schema &schema = tg->tile_schemas[tile_itr];

tile_col_count = schema.GetColumnCount();

storage::Tile *tile = tg->GetTile(tile_itr);
PL_ASSERT(tile);
for (oid_t tile_col_itr = 0; tile_col_itr < tile_col_count; ++tile_col_itr) {
type_id = schema.GetType(tile_col_itr);

if ((type_id != type::Type::TypeId::VARCHAR && type_id != type::Type::TypeId::VARBINARY)
|| (schema.IsInlined(tile_col_itr) == true)) {
// Not of varlen type, or is inlined, skip
continue;
}
// Get the raw varlen pointer
tuple_location = tile->GetTupleLocation(tuple_id);
field_location = tuple_location + schema.GetOffset(tile_col_itr);
varlen_ptr = type::Value::GetDataFromStorage(type_id, field_location);
// Call the corresponding varlen pool free
if (varlen_ptr != nullptr) {
tile->pool->Free(varlen_ptr);
}
}
void GCManager::CheckAndReclaimVarlenColumns(storage::TileGroup *tg,
oid_t tuple_id) {
oid_t tile_count = tg->tile_count;
oid_t tile_col_count;
type::Type::TypeId type_id;
char *tuple_location;
char *field_location;
char *varlen_ptr;

for (oid_t tile_itr = 0; tile_itr < tile_count; tile_itr++) {
const catalog::Schema &schema = tg->tile_schemas[tile_itr];

tile_col_count = schema.GetColumnCount();

storage::Tile *tile = tg->GetTile(tile_itr);
PL_ASSERT(tile);
for (oid_t tile_col_itr = 0; tile_col_itr < tile_col_count;
++tile_col_itr) {
type_id = schema.GetType(tile_col_itr);

if ((type_id != type::Type::TypeId::VARCHAR &&
type_id != type::Type::TypeId::VARBINARY) ||
(schema.IsInlined(tile_col_itr) == true)) {
// Not of varlen type, or is inlined, skip
continue;
}
// Get the raw varlen pointer
tuple_location = tile->GetTupleLocation(tuple_id);
field_location = tuple_location + schema.GetOffset(tile_col_itr);
varlen_ptr = type::Value::GetDataFromStorage(type_id, field_location);
// Call the corresponding varlen pool free
if (varlen_ptr != nullptr) {
tile->pool->Free(varlen_ptr);
}
}
}

}
}
}
1 change: 0 additions & 1 deletion src/gc/gc_manager_factory.cpp
Expand Up @@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//


#include "gc/gc_manager_factory.h"

namespace peloton {
Expand Down
1 change: 0 additions & 1 deletion src/gc/transaction_level_gc_manager.cpp
Expand Up @@ -26,7 +26,6 @@ bool TransactionLevelGCManager::ResetTuple(const ItemPointer &location) {
auto tile_group = manager.GetTileGroup(location.block).get();

auto tile_group_header = tile_group->GetHeader();

// Reset the header
tile_group_header->SetTransactionId(location.offset, INVALID_TXN_ID);
tile_group_header->SetBeginCommitId(location.offset, MAX_CID);
Expand Down
2 changes: 0 additions & 2 deletions src/include/catalog/constraint.h
Expand Up @@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//


#pragma once

#include <string>
Expand Down Expand Up @@ -68,7 +67,6 @@ class Constraint : public Printable {
oid_t unique_index_list_offset = INVALID_OID;

std::string constraint_name;

};

} // End catalog namespace
Expand Down
1 change: 0 additions & 1 deletion src/include/catalog/foreign_key.h
Expand Up @@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//


#pragma once

#include <string>
Expand Down
4 changes: 2 additions & 2 deletions src/include/catalog/manager.h
Expand Up @@ -64,7 +64,6 @@ class Manager {

void ClearTileGroup(void);


//===--------------------------------------------------------------------===//
// INDIRECTION ARRAY ALLOCATION
//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -103,7 +102,8 @@ class Manager {
//===--------------------------------------------------------------------===//
std::atomic<oid_t> indirection_array_oid_ = ATOMIC_VAR_INIT(START_OID);

LockFreeArray<std::shared_ptr<storage::IndirectionArray>> indirection_array_locator_;
LockFreeArray<std::shared_ptr<storage::IndirectionArray>>
indirection_array_locator_;

static std::shared_ptr<storage::IndirectionArray> empty_indirection_array_;
};
Expand Down

0 comments on commit aedbd2a

Please sign in to comment.