Skip to content

Commit

Permalink
Merge pull request #777 from cwida/codecleanup
Browse files Browse the repository at this point in the history
Code Cleanup and re-add Unity Builds
  • Loading branch information
Mytherin committed Jul 22, 2020
2 parents 989c1b3 + bc4c6c8 commit 981e7cd
Show file tree
Hide file tree
Showing 275 changed files with 666 additions and 393 deletions.
72 changes: 72 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,78 @@ include_directories(third_party/utf8proc/include)
include_directories(third_party/miniparquet)
include_directories(third_party/concurrentqueue)

# todo only regenerate ub file if one of the input files changed hack alert
function(enable_unity_build UB_SUFFIX SOURCE_VARIABLE_NAME)
set(files ${${SOURCE_VARIABLE_NAME}})

# Generate a unique filename for the unity build translation unit
set(unit_build_file ${CMAKE_CURRENT_BINARY_DIR}/ub_${UB_SUFFIX}.cpp)
set(temp_unit_build_file ${CMAKE_CURRENT_BINARY_DIR}/ub_${UB_SUFFIX}.cpp.tmp)
# Exclude all translation units from compilation
set_source_files_properties(${files}
PROPERTIES
HEADER_FILE_ONLY
true)

set(rebuild FALSE)
# check if any of the source files have changed
foreach(source_file ${files})
if(${CMAKE_CURRENT_SOURCE_DIR}/${source_file} IS_NEWER_THAN
${unit_build_file})
set(rebuild TRUE)
endif()
endforeach(source_file)
# write a temporary file
file(WRITE ${temp_unit_build_file} "// Unity Build generated by CMake\n")
foreach(source_file ${files})
file(
APPEND ${temp_unit_build_file}
"#line 0 \"${source_file}\"\n#include <${CMAKE_CURRENT_SOURCE_DIR}/${source_file}>\n"
)
endforeach(source_file)

execute_process(COMMAND ${CMAKE_COMMAND}
-E
compare_files
${unit_build_file}
${temp_unit_build_file}
RESULT_VARIABLE compare_result
OUTPUT_VARIABLE bla
ERROR_VARIABLE bla)
if(compare_result EQUAL 0)
# files are identical: do nothing
elseif(compare_result EQUAL 1)
# files are different: rebuild
set(rebuild TRUE)
else()
# error while compiling: rebuild
set(rebuild TRUE)
endif()

if(${rebuild})
file(WRITE ${unit_build_file} "// Unity Build generated by CMake\n")
foreach(source_file ${files})
file(
APPEND ${unit_build_file}
"#line 0 \"${source_file}\"\n#include <${CMAKE_CURRENT_SOURCE_DIR}/${source_file}>\n"
)
endforeach(source_file)
endif()

# Complement list of translation units with the name of ub
set(${SOURCE_VARIABLE_NAME}
${${SOURCE_VARIABLE_NAME}} ${unit_build_file}
PARENT_SCOPE)
endfunction(enable_unity_build)

function(add_library_unity NAME MODE)
set(SRCS ${ARGN})
if(NOT DISABLE_UNITY)
enable_unity_build(${NAME} SRCS)
endif()
add_library(${NAME} OBJECT ${SRCS})
endfunction()

function(disable_target_warnings NAME)
if(MSVC)
target_compile_options(${NAME} PRIVATE "/W0")
Expand Down
22 changes: 12 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,32 @@ imdb: third_party/imdb/data
GENERATOR=
FORCE_COLOR=
WARNINGS_AS_ERRORS=
DISABLE_UNITY_FLAG=
ifeq ($(GEN),ninja)
GENERATOR=-G "Ninja"
FORCE_COLOR=-DFORCE_COLORED_OUTPUT=1
endif
ifeq (${TREAT_WARNINGS_AS_ERRORS}, 1)
WARNINGS_AS_ERRORS=-DTREAT_WARNINGS_AS_ERRORS=1
endif

EXTENSIONS="-DBUILD_PARQUET_EXTENSION=TRUE -DBUILD_ICU_EXTENSION=TRUE"

ifeq (${DISABLE_UNITY}, 1)
DISABLE_UNITY_FLAG=-DDISABLE_UNITY=1
endif
EXTENSIONS=-DBUILD_PARQUET_EXTENSION=TRUE -DBUILD_ICU_EXTENSION=TRUE

clean:
rm -rf build

debug:
mkdir -p build/debug && \
cd build/debug && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Debug ../.. && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${DISABLE_UNITY_FLAG} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Debug ../.. && \
cmake --build .

release_expanded:
mkdir -p build/release_expanded && \
cd build/release_expanded && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Release ../.. && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${DISABLE_UNITY_FLAG} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Release ../.. && \
cmake --build .

unittest: debug
Expand All @@ -52,34 +54,34 @@ release:
mkdir -p build/release && \
python scripts/amalgamation.py && \
cd build/release && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Release -DAMALGAMATION_BUILD=1 ../.. && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${DISABLE_UNITY_FLAG} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Release -DAMALGAMATION_BUILD=1 ../.. && \
cmake --build .

reldebug:
mkdir -p build/reldebug && \
cd build/reldebug && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=RelWithDebInfo ../.. && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${DISABLE_UNITY_FLAG} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=RelWithDebInfo ../.. && \
cmake --build .

amaldebug:
mkdir -p build/amaldebug && \
python scripts/amalgamation.py && \
cd build/amaldebug && \
cmake $(GENERATOR) $(FORCE_COLOR) -DAMALGAMATION_BUILD=1 ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Debug ../.. && \
cmake $(GENERATOR) $(FORCE_COLOR) ${EXTENSIONS} -DAMALGAMATION_BUILD=1 -DCMAKE_BUILD_TYPE=Debug ../.. && \
cmake --build .

jdbc:
mkdir -p build/release && \
python scripts/amalgamation.py && \
cd build/release && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Release -DAMALGAMATION_BUILD=1 -DJDBC_DRIVER=1 ../.. && \
cmake $(GENERATOR) $(FORCE_COLOR) ${WARNINGS_AS_ERRORS} ${DISABLE_UNITY_FLAG} ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Release -DAMALGAMATION_BUILD=1 -DJDBC_DRIVER=1 ../.. && \
cmake --build .


jdbcdebug:
mkdir -p build/jdbcdebug && \
cd build/jdbcdebug && \
cmake $(GENERATOR) $(FORCE_COLOR) -DJDBC_DRIVER=1 -DENABLE_SANITIZER=FALSE ${EXTENSIONS} -DCMAKE_BUILD_TYPE=Debug ../.. && \
cmake $(GENERATOR) $(FORCE_COLOR) ${EXTENSIONS} -DJDBC_DRIVER=1 -DENABLE_SANITIZER=FALSE -DCMAKE_BUILD_TYPE=Debug ../.. && \
cmake --build .

test_compile: # test compilation of individual cpp files
Expand Down
9 changes: 5 additions & 4 deletions extension/parquet/parquet-extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <fstream>
#include <cstring>
#include <iostream>
#include <sstream>

#include "parquet-extension.hpp"

Expand Down Expand Up @@ -422,7 +423,7 @@ class ParquetScanFunction : public TableFunction {
for (idx_t i = 0; i < count; i++) {
if (col_data.defined_buf.ptr[i]) {
auto offset = col_data.offset_buf.read<int32_t>();
if (offset > col_data.dict_size) {
if ((idx_t) offset > col_data.dict_size) {
throw runtime_error("Offset " + to_string(offset) + " greater than dictionary size " +
to_string(col_data.dict_size) + " at " + to_string(i + target_offset) +
". Corrupt file?");
Expand Down Expand Up @@ -685,7 +686,7 @@ class ParquetScanFunction : public TableFunction {
data.current_group++;
data.group_offset = 0;

if (data.current_group == data.file_meta_data.row_groups.size()) {
if ((idx_t) data.current_group == data.file_meta_data.row_groups.size()) {
data.finished = true;
return;
}
Expand Down Expand Up @@ -789,7 +790,7 @@ class ParquetScanFunction : public TableFunction {
for (idx_t i = 0; i < current_batch_size; i++) {
if (col_data.defined_buf.ptr[i]) {
auto offset = col_data.offset_buf.read<int32_t>();
if (offset >= col_data.string_collection->count) {
if ((idx_t) offset >= col_data.string_collection->count) {
throw runtime_error("string dictionary offset out of bounds");
}
auto &chunk = col_data.string_collection->chunks[offset / STANDARD_VECTOR_SIZE];
Expand All @@ -815,7 +816,7 @@ class ParquetScanFunction : public TableFunction {
// bit packed this
auto target_ptr = FlatVector::GetData<bool>(output.data[out_col_idx]);
int byte_pos = 0;
for (int32_t i = 0; i < current_batch_size; i++) {
for (idx_t i = 0; i < current_batch_size; i++) {
if (!col_data.defined_buf.ptr[i]) {
FlatVector::SetNull(output.data[out_col_idx], i + output_offset, true);
continue;
Expand Down
5 changes: 4 additions & 1 deletion scripts/amalgamation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# this script creates a single header + source file combination out of the DuckDB sources
import os, re, sys, shutil
import os
import re
import sys
import shutil
amal_dir = os.path.join('src', 'amalgamation')
header_file = os.path.join(amal_dir, "duckdb.hpp")
source_file = os.path.join(amal_dir, "duckdb.cpp")
Expand Down
7 changes: 6 additions & 1 deletion scripts/asset-upload.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import json, os, sys, glob, mimetypes, urllib.request
import json
import os
import sys
import glob
import mimetypes
import urllib.request

api_url = 'https://api.github.com/repos/cwida/duckdb/'

Expand Down
4 changes: 3 additions & 1 deletion scripts/generate_grammar.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# use bison to generate the parser files
# the following version of bison is used:
# bison (GNU Bison) 2.3
import os, subprocess, re
import os
import subprocess
import re

bison_location = "bison"
base_dir = 'third_party/libpg_query/grammar'
Expand Down
75 changes: 75 additions & 0 deletions scripts/include_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import amalgamation
import os
import re
import sys
import shutil

include_counts = {}
include_chains = {}
cached_includes = {}

def analyze_include_file(fpath, already_included_files, prev_include = ""):
if fpath in already_included_files:
return
if fpath in amalgamation.always_excluded:
return
if fpath not in cached_includes:
# print(fpath)
with open(fpath, 'r') as f:
text = f.read()
(statements, includes) = amalgamation.get_includes(fpath, text)
cached_includes[fpath] = includes
else:
includes = cached_includes[fpath]

if fpath in include_counts:
include_counts[fpath] += 1
else:
include_counts[fpath] = 1

if fpath not in include_chains:
include_chains[fpath] = {}
if prev_include not in include_chains[fpath]:
include_chains[fpath][prev_include] = 0
include_chains[fpath][prev_include] += 1

already_included_files.append(fpath)
if fpath.endswith('.h') or fpath.endswith('.hpp'):
prev_include = fpath
for include in includes:
analyze_include_file(include, already_included_files, prev_include)

def analyze_includes(dir):
files = os.listdir(dir)
files.sort()
for fname in files:
if fname in amalgamation.excluded_files:
continue
fpath = os.path.join(dir, fname)
if os.path.isdir(fpath):
analyze_includes(fpath)
elif fname.endswith('.cpp') or fname.endswith('.c') or fname.endswith('.cc'):
analyze_include_file(fpath, [])

for compile_dir in amalgamation.compile_directories:
analyze_includes(compile_dir)

kws = []
for entry in include_counts.keys():
kws.append([entry, include_counts[entry]])

kws.sort(key = lambda tup: -tup[1])
for k in range(0, len(kws)):
include_file = kws[k][0]
include_count = kws[k][1]
print("------------------------------------------------------------")
print(include_file + " (" + str(include_count) + ")")
print("------------------------------------------------------------")
print("FILE INCLUDED FROM:")
chainkws = []
for chain in include_chains[include_file]:
chainkws.append([chain, include_chains[include_file][chain]])
chainkws.sort(key = lambda tup: -tup[1])
for l in range(0, min(5, len(chainkws))):
print(chainkws[l])

2 changes: 1 addition & 1 deletion scripts/test_vector_sizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def replace_in_file(fname, regex, replace):

for vector_size in vector_sizes:
print("TESTING STANDARD_VECTOR_SIZE=%d" % (vector_size,))
replace_in_file('src/include/duckdb/common/constants.hpp', '#define STANDARD_VECTOR_SIZE \d+', '#define STANDARD_VECTOR_SIZE %d' % (vector_size,))
replace_in_file('src/include/duckdb/common/vector_size.hpp', '#define STANDARD_VECTOR_SIZE \d+', '#define STANDARD_VECTOR_SIZE %d' % (vector_size,))
execute_system_command('rm -rf build')
execute_system_command('mkdir -p build/release')
os.chdir(build_dir)
Expand Down
2 changes: 1 addition & 1 deletion src/catalog/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
add_subdirectory(catalog_entry)

add_library(duckdb_catalog
add_library_unity(duckdb_catalog
OBJECT
catalog_entry.cpp
catalog.cpp
Expand Down
11 changes: 7 additions & 4 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "duckdb/catalog/catalog.hpp"
#include "duckdb/catalog/catalog_set.hpp"

#include "duckdb/catalog/catalog_entry/list.hpp"
#include "duckdb/common/exception.hpp"
Expand All @@ -17,12 +18,14 @@
#include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
#include "duckdb/storage/storage_manager.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb/catalog/dependency_manager.hpp"

using namespace duckdb;
using namespace std;

Catalog::Catalog(StorageManager &storage) : storage(storage), schemas(*this), dependency_manager(*this) {
Catalog::Catalog(StorageManager &storage) : storage(storage), schemas(make_unique<CatalogSet>(*this)), dependency_manager(make_unique<DependencyManager>(*this)) {
}
Catalog::~Catalog(){}

Catalog &Catalog::GetCatalog(ClientContext &context) {
return context.catalog;
Expand Down Expand Up @@ -69,7 +72,7 @@ CatalogEntry *Catalog::CreateSchema(ClientContext &context, CreateSchemaInfo *in
unordered_set<CatalogEntry *> dependencies;
auto entry = make_unique<SchemaCatalogEntry>(this, info->schema);
auto result = entry.get();
if (!schemas.CreateEntry(context.ActiveTransaction(), info->schema, move(entry), dependencies)) {
if (!schemas->CreateEntry(context.ActiveTransaction(), info->schema, move(entry), dependencies)) {
if (info->on_conflict == OnCreateConflict::ERROR) {
throw CatalogException("Schema with name %s already exists!", info->schema.c_str());
} else {
Expand All @@ -89,7 +92,7 @@ void Catalog::DropSchema(ClientContext &context, DropInfo *info) {
info->name.c_str());
}

if (!schemas.DropEntry(context.ActiveTransaction(), info->name, info->cascade)) {
if (!schemas->DropEntry(context.ActiveTransaction(), info->name, info->cascade)) {
if (!info->if_exists) {
throw CatalogException("Schema with name \"%s\" does not exist!", info->name.c_str());
}
Expand Down Expand Up @@ -118,7 +121,7 @@ SchemaCatalogEntry *Catalog::GetSchema(ClientContext &context, const string &sch
if (schema_name == TEMP_SCHEMA) {
return context.temporary_objects.get();
}
auto entry = schemas.GetEntry(context.ActiveTransaction(), schema_name);
auto entry = schemas->GetEntry(context.ActiveTransaction(), schema_name);
if (!entry) {
throw CatalogException("Schema with name %s does not exist!", schema_name.c_str());
}
Expand Down
2 changes: 1 addition & 1 deletion src/catalog/catalog_entry/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
add_library(duckdb_catalog_entries
add_library_unity(duckdb_catalog_entries
OBJECT
index_catalog_entry.cpp
schema_catalog_entry.cpp
Expand Down
Loading

0 comments on commit 981e7cd

Please sign in to comment.