Skip to content

Commit

Permalink
Merge branch 'main' into feature
Browse files Browse the repository at this point in the history
  • Loading branch information
carlopi committed Nov 6, 2023
2 parents 70dd2f3 + 60ddc31 commit 7661c7a
Show file tree
Hide file tree
Showing 30 changed files with 710 additions and 42 deletions.
2 changes: 1 addition & 1 deletion .github/actions/build_extensions/action.yml
Expand Up @@ -91,7 +91,7 @@ runs:
- name: Setup vcpkg
if: inputs.vcpkg_build == 1
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1

Expand Down
12 changes: 10 additions & 2 deletions .github/actions/manylinux_2014_setup/action.yml
Expand Up @@ -28,6 +28,9 @@ inputs:
glibc32:
description: 'Setup 32bit glibc'
default: 0
nodejs:
description: 'Setup NodeJS'
default: 0
gcc_4_8:
description: 'Setup GCC 4.8 (installs to /usr/bin/g++, default will still be GCC 10)'
default: 0
Expand Down Expand Up @@ -67,6 +70,11 @@ runs:
shell: bash
run: scripts/setup_manylinux2014.sh ssh

- name: Setup NodeJS
if: ${{ inputs.nodejs == 1 }}
shell: bash
run: scripts/setup_manylinux2014.sh nodejs

- name: Setup 32bit compiler
if: ${{ inputs.glibc32 == 1 }}
shell: bash
Expand All @@ -83,10 +91,10 @@ runs:
run: scripts/setup_manylinux2014.sh gcc_4_8

# Note instead of using scripts/setup_manylinux2014.sh vcpkg, we prefer to use
# lukka/run-vcpkg@v11 here as it configures vcpkg to cache to GH actions.
# lukka/run-vcpkg@v11.1 here as it configures vcpkg to cache to GH actions.
- name: Setup vcpkg
if: ${{ inputs.vcpkg == 1 }}
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1

Expand Down
18 changes: 9 additions & 9 deletions .github/actions/ubuntu_18_setup/action.yml
Expand Up @@ -57,32 +57,32 @@ runs:
./cmake-3.21.3-linux-x86_64.sh --skip-license --prefix=/usr/local
cmake --version
- name: Install Python 3.7
- name: Install Python 3.8
if: ${{ inputs.python }} == 1
shell: bash
run: |
wget https://www.python.org/ftp/python/3.7.12/Python-3.7.12.tgz
tar xvf Python-3.7.12.tgz
cd Python-3.7.12
wget https://www.python.org/ftp/python/3.8.17/Python-3.8.17.tgz
tar xvf Python-3.8.17.tgz
cd Python-3.8.17
mkdir -p pythonbin
./configure --with-ensurepip=install
make -j
make install
python3.7 --version
python3.7 -m pip install pip
python3.7 -m pip install requests awscli
python3.8 --version
python3.8 -m pip install pip
python3.8 -m pip install requests awscli
- name: Version Check
shell: bash
run: |
ldd --version ldd
python3.7 --version
python3 --version
git --version
git log -1 --format=%h
- name: Setup vcpkg
if: ${{ inputs.vcpkg == 1 }}
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ExtensionRebuild.yml
Expand Up @@ -71,7 +71,7 @@ jobs:
s3_id: ${{ secrets.S3_ID }}
s3_key: ${{ secrets.S3_KEY }}
signing_pk: ${{ secrets.DUCKDB_EXTENSION_SIGNING_PK }}
python_name: python3.7
python_name: python3
vcpkg_target_triplet: x64-linux

xcode-extensions-rebuild:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/Java.yml
Expand Up @@ -78,7 +78,7 @@ jobs:
- name: Deploy
shell: bash
run: >
python3.7 scripts/asset-upload-gha.py
python3.8 scripts/asset-upload-gha.py
duckdb_jdbc-linux-amd64.jar=build/release/tools/jdbc/duckdb_jdbc.jar
- uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -120,7 +120,7 @@ jobs:
- name: Deploy
shell: bash
run: >
python3.7 scripts/asset-upload-gha.py
python3.8 scripts/asset-upload-gha.py
duckdb_jdbc-linux-aarch64.jar=build/release/tools/jdbc/duckdb_jdbc.jar
- uses: actions/upload-artifact@v3
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/LinuxRelease.yml
Expand Up @@ -66,7 +66,7 @@ jobs:

- name: Install pytest
run: |
python3.7 -m pip install pytest
python3 -m pip install pytest
- name: Build
shell: bash
Expand All @@ -79,13 +79,15 @@ jobs:
- name: Test
shell: bash
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: make allunit
run: |
make
python3 scripts/run_tests_one_by_one.py build/release/test/unittest "*"
- name: Tools Tests
shell: bash
if: ${{ !startsWith(github.ref, 'refs/tags/v') }}
run: |
python3.7 -m pytest tools/shell/tests --shell-binary build/release/duckdb
python3 -m pytest tools/shell/tests --shell-binary build/release/duckdb
- name: Examples
shell: bash
Expand All @@ -99,12 +101,12 @@ jobs:
- name: Deploy
shell: bash
run: |
python3.7 scripts/amalgamation.py
python3 scripts/amalgamation.py
zip -j duckdb_cli-linux-amd64.zip build/release/duckdb
zip -j libduckdb-linux-amd64.zip build/release/src/libduckdb*.* src/amalgamation/duckdb.hpp src/include/duckdb.h
zip -j libduckdb-src.zip src/amalgamation/duckdb.hpp src/amalgamation/duckdb.cpp src/include/duckdb.h
zip -j duckdb_odbc-linux-amd64.zip build/release/tools/odbc/libduckdb_odbc.so tools/odbc/linux_setup/unixodbc_setup.sh
python3.7 scripts/asset-upload-gha.py libduckdb-src.zip libduckdb-linux-amd64.zip duckdb_cli-linux-amd64.zip duckdb_odbc-linux-amd64.zip
python3 scripts/asset-upload-gha.py libduckdb-src.zip libduckdb-linux-amd64.zip duckdb_cli-linux-amd64.zip duckdb_odbc-linux-amd64.zip
- uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -158,11 +160,11 @@ jobs:
- name: Deploy
shell: bash
run: |
python3.7 scripts/amalgamation.py
python3 scripts/amalgamation.py
zip -j duckdb_cli-linux-aarch64.zip build/release/duckdb
zip -j duckdb_odbc-linux-aarch64.zip build/release/tools/odbc/libduckdb_odbc.so
zip -j libduckdb-linux-aarch64.zip build/release/src/libduckdb*.* src/amalgamation/duckdb.hpp src/include/duckdb.h
python3.7 scripts/asset-upload-gha.py libduckdb-linux-aarch64.zip duckdb_cli-linux-aarch64.zip duckdb_odbc-linux-aarch64.zip
python3 scripts/asset-upload-gha.py libduckdb-linux-aarch64.zip duckdb_cli-linux-aarch64.zip duckdb_odbc-linux-aarch64.zip
- uses: actions/upload-artifact@v3
with:
Expand Down Expand Up @@ -326,7 +328,7 @@ jobs:

- name: Symbol Leakage Test
shell: bash
run: python3.7 scripts/exported_symbols_check.py build/release/src/libduckdb*.so
run: python3 scripts/exported_symbols_check.py build/release/src/libduckdb*.so

linux-httpfs:
name: Linux HTTPFS
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/Python.yml
Expand Up @@ -86,7 +86,7 @@ jobs:
manylinux-extensions-x64:
# Builds extensions for linux_md64_gcc4
name: Linux Extensions (ggc4)
name: Linux Extensions (gcc4)
runs-on: ubuntu-latest
container: quay.io/pypa/manylinux2014_x86_64
needs: linux-python3-9
Expand All @@ -103,6 +103,7 @@ jobs:
aws-cli: 1
ninja-build: 1
ccache: 1
nodejs: 1
ssh: 1
python_alias: 1
openssl: 1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/Wasm.yml
Expand Up @@ -36,7 +36,7 @@ jobs:
with:
version: 'latest'
- name: Setup vcpkg
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: 501db0f17ef6df184fcdbfbe0f87cde2313b6ab1

Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/_extension_distribution.yml
Expand Up @@ -150,7 +150,7 @@ jobs:
aarch64_cross_compile: ${{ matrix.duckdb_arch == 'linux_arm64' && 1 }}

- name: Setup vcpkg
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}

Expand Down Expand Up @@ -212,7 +212,7 @@ jobs:
git checkout ${{ inputs.duckdb_version }}
- name: Setup vcpkg
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}

Expand Down Expand Up @@ -267,7 +267,7 @@ jobs:
key: ${{ github.job }}-${{ matrix.duckdb_arch }}

- name: Setup vcpkg
uses: lukka/run-vcpkg@v11
uses: lukka/run-vcpkg@v11.1
with:
vcpkgGitCommitId: ${{ inputs.vcpkg_commit }}

Expand Down
32 changes: 32 additions & 0 deletions benchmark/micro/join/delim_join_no_blowup.benchmark
@@ -0,0 +1,32 @@
# name: benchmark/micro/join/delim_join_no_blowup.benchmark
# description: Delim joins dont result in a blow up and therefore take forever
# group: [join]

name High Cardinality Duplicate elimination join
group join

load
create table big_table (id integer);
insert into big_table select range from range(10000000);
create table medium_1 (id integer, fk_to_big integer, fk_to_medium_2 integer);
insert into medium_1 (select
range,
CASE WHEN range<10 THEN 0 ELSE range END,
range + 9999,
from range(10000));
create table medium_2 (id integer);
insert into medium_2 (select range from range(10000));
pragma disabled_optimizers='statistics_propagation';

run
SELECT *
FROM big_table as bt
WHERE
exists(
SELECT *
FROM medium_2
INNER JOIN medium_1
ON ((medium_2.id = medium_1.fk_to_medium_2))
WHERE
(medium_1.fk_to_big % 7 = bt.id % 7)
)
3 changes: 3 additions & 0 deletions scripts/setup_manylinux2014.sh
Expand Up @@ -67,6 +67,9 @@ install_deps() {
elif [ "$1" = "gcc_4_8" ]; then
yum install -y gcc-c++

elif [ "$1" = "nodejs" ]; then
yum install -y nodejs

else
>&2 echo "unknown input for setup_manylinux2014.sh: '$1'"
exit $exit_code
Expand Down
7 changes: 3 additions & 4 deletions src/catalog/catalog_set.cpp
Expand Up @@ -199,6 +199,8 @@ bool CatalogSet::AlterOwnership(CatalogTransaction transaction, ChangeOwnershipI
bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name, AlterInfo &alter_info) {
// lock the catalog for writing
lock_guard<mutex> write_lock(catalog.GetWriteLock());
// lock this catalog set to disallow reading
lock_guard<mutex> read_lock(catalog_lock);

// first check if the entry exists in the unordered set
EntryIndex entry_index;
Expand All @@ -210,9 +212,6 @@ bool CatalogSet::AlterEntry(CatalogTransaction transaction, const string &name,
throw CatalogException("Cannot alter entry \"%s\" because it is an internal system entry", entry->name);
}

// lock this catalog set to disallow reading
lock_guard<mutex> read_lock(catalog_lock);

// create a new entry and replace the currently stored one
// set the timestamp to the timestamp of the current transaction
// and point it to the updated table node
Expand Down Expand Up @@ -316,6 +315,7 @@ void CatalogSet::DropEntryInternal(CatalogTransaction transaction, EntryIndex en
bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, bool cascade, bool allow_drop_internal) {
// lock the catalog for writing
lock_guard<mutex> write_lock(catalog.GetWriteLock());
lock_guard<mutex> read_lock(catalog_lock);
// we can only delete an entry that exists
EntryIndex entry_index;
auto entry = GetEntryInternal(transaction, name, &entry_index);
Expand All @@ -326,7 +326,6 @@ bool CatalogSet::DropEntry(CatalogTransaction transaction, const string &name, b
throw CatalogException("Cannot drop entry \"%s\" because it is an internal system entry", entry->name);
}

lock_guard<mutex> read_lock(catalog_lock);
DropEntryInternal(transaction, std::move(entry_index), *entry, cascade);
return true;
}
Expand Down
4 changes: 3 additions & 1 deletion src/common/multi_file_reader.cpp
Expand Up @@ -111,7 +111,9 @@ bool MultiFileReader::ComplexFilterPushdown(ClientContext &context, vector<strin

unordered_map<string, column_t> column_map;
for (idx_t i = 0; i < get.column_ids.size(); i++) {
column_map.insert({get.names[get.column_ids[i]], i});
if (!IsRowIdColumnId(get.column_ids[i])) {
column_map.insert({get.names[get.column_ids[i]], i});
}
}

auto start_files = files.size();
Expand Down
Expand Up @@ -122,6 +122,7 @@ SinkCombineResultType PhysicalCopyToFile::Combine(ExecutionContext &context, Ope
{
// create directories
lock_guard<mutex> global_lock(g.lock);
lock_guard<mutex> global_lock_on_partition_state(g.partition_state->lock);
const auto &global_partitions = g.partition_state->partitions;
// global_partitions have partitions added only at the back, so it's fine to only traverse the last part

Expand Down
4 changes: 4 additions & 0 deletions src/include/duckdb/catalog/catalog_set.hpp
Expand Up @@ -122,6 +122,10 @@ class CatalogSet {

void UpdateTimestamp(CatalogEntry &entry, transaction_t timestamp);

mutex &GetCatalogLock() {
return catalog_lock;
}

void Verify(Catalog &catalog);

private:
Expand Down
9 changes: 5 additions & 4 deletions src/optimizer/join_order/relation_manager.cpp
Expand Up @@ -231,10 +231,11 @@ bool RelationManager::ExtractJoinRelations(LogicalOperator &input_op,
return true;
}
case LogicalOperatorType::LOGICAL_DELIM_GET: {
auto &delim_get = op->Cast<LogicalDelimGet>();
auto stats = RelationStatisticsHelper::ExtractDelimGetStats(delim_get, context);
AddRelation(input_op, parent, stats);
return true;
// Removed until we can extract better stats from delim gets. See #596
// auto &delim_get = op->Cast<LogicalDelimGet>();
// auto stats = RelationStatisticsHelper::ExtractDelimGetStats(delim_get, context);
// AddRelation(input_op, parent, stats);
return false;
}
case LogicalOperatorType::LOGICAL_PROJECTION: {
auto child_stats = RelationStats();
Expand Down
8 changes: 7 additions & 1 deletion src/storage/data_table.cpp
Expand Up @@ -1317,8 +1317,14 @@ idx_t DataTable::GetTotalRows() {
}

void DataTable::CommitDropTable() {
// commit a drop of this table: mark all blocks as modified so they can be reclaimed later on
// commit a drop of this table: mark all blocks as modified, so they can be reclaimed later on
row_groups->CommitDropTable();

// propagate dropping this table to its indexes: frees all index memory
info->indexes.Scan([&](Index &index) {
index.CommitDrop();
return false;
});
}

//===--------------------------------------------------------------------===//
Expand Down
1 change: 1 addition & 0 deletions src/transaction/commit_state.cpp
Expand Up @@ -254,6 +254,7 @@ void CommitState::CommitEntry(UndoFlags type, data_ptr_t data) {
// Grab a write lock on the catalog
auto &duck_catalog = catalog.Cast<DuckCatalog>();
lock_guard<mutex> write_lock(duck_catalog.GetWriteLock());
lock_guard<mutex> read_lock(catalog_entry->set->GetCatalogLock());
catalog_entry->set->UpdateTimestamp(*catalog_entry->parent, commit_id);
if (catalog_entry->name != catalog_entry->parent->name) {
catalog_entry->set->UpdateTimestamp(*catalog_entry, commit_id);
Expand Down

0 comments on commit 7661c7a

Please sign in to comment.