Merge branch 'main' into feature

duckdb · Nov 13, 2023 · d198fee · d198fee
2 parents 754991b + 702d22f
commit d198fee
Show file tree

Hide file tree

Showing 19 changed files with 100 additions and 10 deletions.
diff --git a/.github/workflows/LinuxRelease.yml b/.github/workflows/LinuxRelease.yml
@@ -385,8 +385,8 @@ jobs:
     - name: Test
       shell: bash
       run: |
-        python3 scripts/get_test_list.py --file-contains 'require-env S3_TEST_SERVER_AVAILABLE 1' --list '"*"' > test.list
-        build/release/test/unittest -f test.list
+        python3 scripts/get_test_list.py --file-contains 'require httpfs' --list '"*"' > test.list
+        python3 scripts/run_tests_one_by_one.py ./build/release/test/unittest '-f test.list'
 
  amalgamation-tests:
     name: Amalgamation Tests

diff --git a/extension/json/buffered_json_reader.cpp b/extension/json/buffered_json_reader.cpp
@@ -23,7 +23,7 @@ bool JSONFileHandle::IsOpen() const {
 }
 
 void JSONFileHandle::Close() {
-	if (IsOpen() && file_handle->OnDiskFile()) {
+	if (IsOpen() && !file_handle->IsPipe()) {
 		file_handle->Close();
 		file_handle = nullptr;
 	}

diff --git a/src/common/file_system.cpp b/src/common/file_system.cpp
@@ -344,7 +344,7 @@ bool FileSystem::FileExists(const string &filename) {
 }
 
 bool FileSystem::IsPipe(const string &filename) {
-	throw NotImplementedException("%s: IsPipe is not implemented!", GetName());
+	return false;
 }
 
 void FileSystem::RemoveFile(const string &filename) {
@@ -500,6 +500,10 @@ bool FileHandle::CanSeek() {
 	return file_system.CanSeek();
 }
 
+bool FileHandle::IsPipe() {
+	return file_system.IsPipe(path);
+}
+
 string FileHandle::ReadLine() {
 	string result;
 	char buffer[1];

diff --git a/src/include/duckdb/common/file_system.hpp b/src/include/duckdb/common/file_system.hpp
@@ -66,6 +66,7 @@ struct FileHandle {
 	DUCKDB_API string ReadLine();
 
 	DUCKDB_API bool CanSeek();
+	DUCKDB_API bool IsPipe();
 	DUCKDB_API bool OnDiskFile();
 	DUCKDB_API idx_t GetFileSize();
 	DUCKDB_API FileType GetType();

diff --git a/src/include/duckdb/common/pipe_file_system.hpp b/src/include/duckdb/common/pipe_file_system.hpp
@@ -28,6 +28,9 @@ class PipeFileSystem : public FileSystem {
 	bool CanSeek() override {
 		return false;
 	}
+	bool IsPipe(const string &filename) override {
+		return true;
+	}
 	void FileSync(FileHandle &handle) override;
 
 	std::string GetName() const override {

diff --git a/src/optimizer/statistics/operator/propagate_join.cpp b/src/optimizer/statistics/operator/propagate_join.cpp
@@ -108,10 +108,6 @@ void StatisticsPropagator::PropagateStatistics(LogicalComparisonJoin &join, uniq
 						*node_ptr = std::move(cross_product);
 						return;
 					}
-					case JoinType::ANTI:
-						// anti join on true: empty result
-						ReplaceWithEmptyResult(*node_ptr);
-						return;
 					default:
 						// we don't handle mark/single join here yet
 						break;

diff --git a/test/sql/copy/csv/parallel/csv_parallel_clickbench.test_slow b/test/sql/copy/csv/parallel/csv_parallel_clickbench.test_slow
@@ -9,6 +9,9 @@ require httpfs
 statement ok
 pragma threads=4
 
+#FIXME
+mode skip
+
 statement ok
 CREATE TABLE hits_og
 (
@@ -122,7 +125,7 @@ CREATE TABLE hits_og
 
 
 statement ok
-INSERT INTO hits_og SELECT * FROM read_parquet('https://github.com/duckdb/duckdb-data/releases/download/v1.0/hits.parquet');
+INSERT INTO hits_og SELECT * FROM read_parquet('./hits.parquet');
 
 statement ok
 COPY hits_og TO '__TEST_DIR__/hits.csv';
@@ -131,7 +134,7 @@ statement ok
 create table hits as select * from hits_og limit 0;
 
 statement ok
-copy hits from '__TEST_DIR__/hits.csv';
+copy hits from '__TEST_DIR__/hits.csv' (HEADER 1);
 
 #Q 01
 query I

diff --git a/test/sql/copy/csv/test_csv_httpfs.test b/test/sql/copy/csv/test_csv_httpfs.test
@@ -4,6 +4,9 @@
 
 require httpfs
 
+#FIXME this test fails: file is nonexistent
+mode skip
+
 query IIIIII rowsort
 SELECT * from read_csv_auto('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c');
 ----

diff --git a/test/sql/copy/csv/test_csv_httpfs.test_slow b/test/sql/copy/csv/test_csv_httpfs.test_slow
@@ -6,6 +6,9 @@ require httpfs
 
 require parquet
 
+#FIXME: remote changed?
+mode skip
+
 # Add test for 3731
 query I
 SELECT count(*) FROM read_csv_auto('https://datasets.imdbws.com/name.basics.tsv.gz', delim='\t', quote='',header=True)

diff --git a/test/sql/copy/csv/test_csv_httpfs_prepared.test b/test/sql/copy/csv/test_csv_httpfs_prepared.test
@@ -32,6 +32,9 @@ EXECUTE boaz_bug
 
 # Recreate prepared statement with different file
 
+#FIXME: FILE changed?
+mode skip
+
 statement ok
 PREPARE boaz_bug AS SELECT * from read_csv_auto('https://www.data.gouv.fr/fr/datasets/r/6d186965-f41b-41f3-9b23-88241cc6890c') order by all limit 5;
 

diff --git a/test/sql/copy/s3/glob_s3_paging.test_slow b/test/sql/copy/s3/glob_s3_paging.test_slow
@@ -23,6 +23,13 @@ require-env DUCKDB_S3_USE_SSL
 # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
 set ignore_error_messages
 
+statement ok
+set http_timeout=120000;
+
+# More retries (longest wait will be 25600ms)
+statement ok
+set http_retries=6;
+
 # Test should be a bit faster using the metadata cache
 statement ok
 SET enable_http_metadata_cache=true;

diff --git a/test/sql/copy/s3/hive_partitioned_write_s3.test_slow b/test/sql/copy/s3/hive_partitioned_write_s3.test_slow
@@ -26,6 +26,13 @@ set ignore_error_messages
 statement ok
 pragma memory_limit='100mb'
 
+statement ok
+set http_timeout=120000;
+
+# More retries (longest wait will be 25600ms)
+statement ok
+set http_retries=6;
+
 # around 200MB worth of data, will require the PartitionedColumnData to spill to disk
 statement ok
 COPY (SELECT i%2::INT32 as part_col, i::INT32 FROM range(0,25000000) tbl(i)) TO 's3://test-bucket/partitioned_memory_spill' (FORMAT parquet, PARTITION_BY part_col, overwrite_or_ignore TRUE);

diff --git a/test/sql/copy/s3/parquet_s3_tpcds.test_slow b/test/sql/copy/s3/parquet_s3_tpcds.test_slow
@@ -33,6 +33,13 @@ PRAGMA default_null_order='NULLS LAST'
 statement ok
 SET enable_http_metadata_cache=true;
 
+statement ok
+set http_timeout=120000;
+
+# More retries (longest wait will be 25600ms)
+statement ok
+set http_retries=6;
+
 statement ok
 CREATE SCHEMA tpcds;
 

diff --git a/test/sql/copy/s3/parquet_s3_tpch.test_slow b/test/sql/copy/s3/parquet_s3_tpch.test_slow
@@ -28,6 +28,13 @@ set ignore_error_messages
 statement ok
 SET enable_http_metadata_cache=true;
 
+statement ok
+set http_timeout=120000;
+
+# More retries (longest wait will be 25600ms)
+statement ok
+set http_retries=6;
+
 # Copy files to S3 before beginning tests
 statement ok
 CALL DBGEN(sf=0.01);

diff --git a/test/sql/copy/s3/s3_presigned_read.test_slow b/test/sql/copy/s3/s3_presigned_read.test_slow
@@ -26,6 +26,12 @@ require-env S3_LARGE_PARQUET_PRESIGNED_URL
 # override the default behaviour of skipping HTTP errors and connection failures: this test fails on connection issues
 set ignore_error_messages
 
+statement ok
+set http_timeout=120000;
+
+# More retries (longest wait will be 25600ms)
+statement ok
+set http_retries=6;
 
 query I
 SELECT

diff --git a/test/sql/copy/s3/upload_file_parallel.test_slow b/test/sql/copy/s3/upload_file_parallel.test_slow
@@ -28,6 +28,13 @@ set ignore_error_messages
 statement ok
 CALL DBGEN(sf=1)
 
+statement ok
+set http_timeout=120000;
+
+# More retries (longest wait will be 25600ms)
+statement ok
+set http_retries=6;
+
 query I
 SELECT
     sum(l_extendedprice * l_discount) AS revenue

diff --git a/test/sql/copy/s3/upload_large_file.test_slow b/test/sql/copy/s3/upload_large_file.test_slow
@@ -29,6 +29,13 @@ set ignore_error_messages
 statement ok
 SET memory_limit='2.2GB';
 
+statement ok
+set http_timeout=120000;
+
+# More retries (longest wait will be 25600ms)
+statement ok
+set http_retries=6;
+
 # disable tmp dir to force OOM if we exceed our set limit
 statement ok
 PRAGMA temp_directory=''

diff --git a/test/sql/subquery/exists/test_issue_9308.test b/test/sql/subquery/exists/test_issue_9308.test
@@ -0,0 +1,25 @@
+# name: test/sql/subquery/exists/test_issue_9308.test
+# description: Issue #9308: wrong result: NOT EXISTS predicate with correlated non-equality comparison
+# group: [exists]
+
+statement ok
+PRAGMA enable_verification
+
+statement ok
+create or replace table t1(c1 int64);
+
+statement ok
+insert into t1 values (1);
+
+statement ok
+create or replace table t2(c1 int64);
+
+query II
+select c1, not exists (select 1 from t2 where t1.c1 <= t2.c1) from t1;
+----
+1	true
+
+query I
+select c1 from t1 where not exists (select 1 from t2 where t1.c1 <= t2.c1);
+----
+1
diff --git a/tools/shell/shell.c b/tools/shell/shell.c
@@ -17996,6 +17996,7 @@ static int do_meta_command(char *zLine, ShellState *p){
     session_close_all(p);
     close_db(p->db);
     p->db = 0;
+    globalDb = 0;
     p->zDbFilename = 0;
     sqlite3_free(p->zFreeOnClose);
     p->zFreeOnClose = 0;