Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,29 @@ CXXFLAGS = -std=c++17 -Wall -Wextra -O2 -I.

BUILD_DIR = build

DBMS_OBJS = $(BUILD_DIR)/main.o $(BUILD_DIR)/src/parser.o
DBMS_OBJS = $(BUILD_DIR)/main.o \
$(BUILD_DIR)/src/parser.o \
$(BUILD_DIR)/src/storage/disk_manager.o \
$(BUILD_DIR)/src/storage/buffer_pool.o \
$(BUILD_DIR)/src/storage/slotted_page.o \
$(BUILD_DIR)/src/storage/heap_file.o \
$(BUILD_DIR)/src/sql/tuple.o \
$(BUILD_DIR)/src/sql/catalog.o
TEST_OBJS = $(BUILD_DIR)/tests/test_parser.o \
$(BUILD_DIR)/tests/storage/test_disk_manager.o \
$(BUILD_DIR)/tests/storage/test_buffer_pool.o \
$(BUILD_DIR)/tests/storage/test_slotted_page.o \
$(BUILD_DIR)/tests/storage/test_heap_file.o \
$(BUILD_DIR)/tests/storage/test_integration.o \
$(BUILD_DIR)/tests/sql/test_tuple.o \
$(BUILD_DIR)/tests/sql/test_catalog.o \
$(BUILD_DIR)/src/parser.o \
$(BUILD_DIR)/src/storage/disk_manager.o \
$(BUILD_DIR)/src/storage/buffer_pool.o \
$(BUILD_DIR)/src/storage/slotted_page.o \
$(BUILD_DIR)/src/storage/heap_file.o
$(BUILD_DIR)/src/storage/heap_file.o \
$(BUILD_DIR)/src/sql/tuple.o \
$(BUILD_DIR)/src/sql/catalog.o

dbms: $(DBMS_OBJS)
$(CXX) $(CXXFLAGS) -o $@ $^
Expand Down
122 changes: 116 additions & 6 deletions main.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
#include "src/parser.h"
#include "src/sql/catalog.h"
#include "src/sql/tuple.h"
#include "src/storage/buffer_pool.h"
#include "src/storage/disk_manager.h"
#include "src/storage/heap_file.h"

#include <cstdint>
#include <filesystem>
#include <iostream>
#include <stdexcept>
#include <string>
#include <tuple>
#include <vector>

// Pretty-print a parsed SelectQuery to stdout in a stable, debuggable format.
// =============================================================================
// Parser demo — parse a handful of SQL strings and print the resulting AST.
// =============================================================================

static void printQuery(const SelectQuery& q) {
std::cout << " columns: ";
if (q.select_all) {
Expand Down Expand Up @@ -34,9 +45,8 @@ static void printQuery(const SelectQuery& q) {
}
}

// Driver: parse a handful of example queries and print the resulting AST.
// The last query is intentionally malformed to exercise the error path.
int main() {
static void runParserDemo() {
std::cout << "=== Parser demo ==========================================\n\n";
const std::vector<std::string> queries = {
"SELECT id, name FROM users WHERE age > 18",
"SELECT * FROM products",
Expand All @@ -46,19 +56,119 @@ int main() {
"SELECT * FROM a JOIN b ON a.x = b.x JOIN c ON b.y = c.y WHERE c.z > 0",
"SELECT FROM users",
};

for (const auto& sql : queries) {
std::cout << "SQL: " << sql << "\n";
try {
Parser p(sql);
SelectQuery q = p.parse();
printQuery(q);
} catch (const std::exception& e) {
// Lex or parse error — keep going so remaining examples still run.
std::cout << " error: " << e.what() << "\n";
}
std::cout << "\n";
}
}

// =============================================================================
// Storage demo — exercise the full storage + catalog stack end-to-end:
//
// 1. open a brand-new database file
// 2. create the catalog (allocates __tables and __columns at pages 0/1)
// 3. createTable("users", schema)
// 4. insert a few rows via TupleCodec → HeapFile
// 5. flush, drop the in-memory state, reopen
// 6. open the catalog with no extra information, look up "users",
// open its heap file, scan and print every row
// =============================================================================

namespace {

void seedUsers(BufferPool& bp, const Catalog::TableInfo& info) {
const std::vector<std::tuple<int32_t, std::string, int32_t>> rows = {
{1, "alice", 30},
{2, "bob", 25},
{3, "carol", 40},
{4, "dave", 19},
{5, "eve", 33},
};
HeapFile hf(&bp, info.root_page);
for (const auto& [id, name, age] : rows) {
const auto bytes = TupleCodec::encode(info.schema, {
Value::Int32(id),
Value::Text(name),
Value::Int32(age),
});
hf.insert(bytes.data(), bytes.size());
}
std::cout << " inserted " << rows.size() << " rows into 'users'\n";
}

void scanUsers(BufferPool& bp, const Catalog::TableInfo& info) {
HeapFile hf(&bp, info.root_page);
for (const auto& [rid, bytes] : hf) {
const auto vals = TupleCodec::decode(info.schema, bytes.data(), bytes.size());
std::cout << " rid=(" << rid.page_id << "," << rid.slot_id << ")"
<< " id=" << vals[0].i32
<< " name=" << vals[1].text
<< " age=" << vals[2].i32 << "\n";
}
}

} // namespace

static void runStorageDemo() {
std::cout << "=== Storage demo =========================================\n\n";
const std::string path = "/tmp/dbms_demo.db";

// Start from a clean slate so the demo is reproducible.
std::error_code ec;
std::filesystem::remove(path, ec);

// ----- Phase 1: create + populate ------------------------------------
{
DiskManager dm(path);
BufferPool bp(8, &dm);
Catalog cat = Catalog::create(&bp);

const Schema users_schema{{
{"id", Type::Int32, false},
{"name", Type::Text, false},
{"age", Type::Int32, true},
}};
cat.createTable("users", users_schema);

std::cout << "[phase 1] created catalog + table 'users'\n";
std::cout << " __tables is at page " << Catalog::TABLES_ROOT << "\n";
std::cout << " __columns is at page " << Catalog::COLUMNS_ROOT << "\n";
std::cout << " users heap is at page "
<< cat.getTable("users")->root_page << "\n";

seedUsers(bp, *cat.getTable("users"));
bp.flushAll();
std::cout << " flushed; file size = "
<< std::filesystem::file_size(path) << " bytes\n\n";
}

// ----- Phase 2: cold reopen + scan -----------------------------------
DiskManager dm(path);
BufferPool bp(8, &dm);
Catalog cat(&bp); // bootstrap from pages 0 + 1

std::cout << "[phase 2] reopened database; tables in catalog:";
for (const auto& n : cat.tableNames()) std::cout << " " << n;
std::cout << "\n";

const auto* info = cat.getTable("users");
std::cout << " scanning 'users':\n";
scanUsers(bp, *info);
std::cout << "\n";

// Tidy up so successive runs always start clean.
std::filesystem::remove(path, ec);
}

int main() {
// runParserDemo();
runStorageDemo();
return 0;
}
161 changes: 161 additions & 0 deletions src/sql/catalog.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#include "src/sql/catalog.h"

#include <algorithm>
#include <cstdint>
#include <map>
#include <stdexcept>
#include <utility>

Schema Catalog::tablesSchema() {
return Schema{{
{"table_id", Type::Int32, false},
{"name", Type::Text, false},
{"first_page_id", Type::Int64, false},
}};
}

Schema Catalog::columnsSchema() {
return Schema{{
{"table_id", Type::Int32, false},
{"position", Type::Int32, false},
{"name", Type::Text, false},
{"type", Type::Int32, false},
{"nullable", Type::Bool, false},
}};
}

Catalog::Catalog(BufferPool* bp)
: bp_(bp),
tables_hf_(bp, TABLES_ROOT),
columns_hf_(bp, COLUMNS_ROOT),
next_table_id_(0) {
loadFromDisk();
}

Catalog Catalog::create(BufferPool* bp) {
// Allocating __tables must yield page 0; __columns must yield page 1.
// Anything else means the database already had data, in which case the
// bootstrap convention is broken and we'd silently mis-read on reopen.
HeapFile tables_hf = HeapFile::create(bp);
if (tables_hf.firstPageId() != TABLES_ROOT) {
throw std::runtime_error(
"Catalog::create: __tables did not land at page 0; "
"is the database already initialized?");
}
HeapFile columns_hf = HeapFile::create(bp);
if (columns_hf.firstPageId() != COLUMNS_ROOT) {
throw std::runtime_error(
"Catalog::create: __columns did not land at page 1; "
"is the database already initialized?");
}
return Catalog(bp);
}

void Catalog::loadFromDisk() {
const Schema tables_s = tablesSchema();
const Schema columns_s = columnsSchema();

// Pass 1: pull every row out of __tables. Schemas are filled in in pass 2.
std::vector<TableInfo> infos;
for (auto it = tables_hf_.begin(); it != tables_hf_.end(); ++it) {
const auto& bytes = it->second;
auto vals = TupleCodec::decode(tables_s, bytes.data(), bytes.size());
TableInfo info;
info.table_id = vals[0].i32;
info.name = vals[1].text;
info.root_page = static_cast<PageId>(vals[2].i64);
infos.push_back(std::move(info));
}

// Pass 2: collect column rows from __columns, group by table_id, sort by
// position. Sorting by position lets us reconstruct the user's column
// order even if the catalog was edited out of order across crashes.
struct ColRow {
int32_t position;
std::string name;
Type type;
bool nullable;
};
std::map<int32_t, std::vector<ColRow>> by_table;
for (auto it = columns_hf_.begin(); it != columns_hf_.end(); ++it) {
const auto& bytes = it->second;
auto vals = TupleCodec::decode(columns_s, bytes.data(), bytes.size());
const int32_t tid = vals[0].i32;
ColRow c{vals[1].i32, vals[2].text, typeFromCode(vals[3].i32), vals[4].b};
by_table[tid].push_back(std::move(c));
}

// Pass 3: stitch each table's columns into its Schema, install in cache,
// and track the next free table_id.
int32_t max_id = -1;
for (auto& info : infos) {
auto cols_it = by_table.find(info.table_id);
if (cols_it != by_table.end()) {
auto& cols = cols_it->second;
std::sort(cols.begin(), cols.end(),
[](const ColRow& a, const ColRow& b) {
return a.position < b.position;
});
info.schema.columns.reserve(cols.size());
for (auto& c : cols) {
info.schema.columns.push_back({std::move(c.name), c.type, c.nullable});
}
}
max_id = std::max(max_id, info.table_id);
tables_.emplace(info.name, std::move(info));
}
next_table_id_ = max_id + 1;
}

void Catalog::createTable(const std::string& name, Schema schema) {
if (hasTable(name)) {
throw std::runtime_error("Catalog: table '" + name + "' already exists");
}

// Allocate the user heap file first. If we crash before recording in
// the catalog, the worst case is one orphan page; the alternative
// (record-first, allocate-after) leaves dangling rows pointing at
// nothing.
HeapFile new_hf = HeapFile::create(bp_);
const PageId root = new_hf.firstPageId();
const int32_t table_id = next_table_id_++;

// Append to __tables.
{
const auto bytes = TupleCodec::encode(tablesSchema(), {
Value::Int32(table_id),
Value::Text(name),
Value::Int64(static_cast<int64_t>(root)),
});
tables_hf_.insert(bytes.data(), bytes.size());
}

// Append one row per column to __columns. Position is the column's
// index in the user's schema and is what we sort by on reload.
const Schema cs = columnsSchema();
for (size_t i = 0; i < schema.columns.size(); ++i) {
const auto& col = schema.columns[i];
const auto bytes = TupleCodec::encode(cs, {
Value::Int32(table_id),
Value::Int32(static_cast<int32_t>(i)),
Value::Text(col.name),
Value::Int32(typeToCode(col.type)),
Value::Bool(col.nullable),
});
columns_hf_.insert(bytes.data(), bytes.size());
}

tables_.emplace(name, TableInfo{table_id, name, std::move(schema), root});
}

const Catalog::TableInfo* Catalog::getTable(const std::string& name) const {
auto it = tables_.find(name);
return it == tables_.end() ? nullptr : &it->second;
}

std::vector<std::string> Catalog::tableNames() const {
std::vector<std::string> out;
out.reserve(tables_.size());
for (const auto& kv : tables_) out.push_back(kv.first);
return out;
}
Loading
Loading