Skip to content

Commit

Permalink
Merge pull request #1226 from Mytherin/pandasscanfix
Browse files Browse the repository at this point in the history
Fix for parallel pandas scans (fixes #1220)
  • Loading branch information
Mytherin committed Dec 14, 2020
2 parents 2eff215 + 2bc0dfc commit 5eff218
Show file tree
Hide file tree
Showing 8 changed files with 261 additions and 141 deletions.
1 change: 1 addition & 0 deletions scripts/package_build.py
Expand Up @@ -49,6 +49,7 @@ def includes(extensions):
includes = []
includes.append(os.path.join(scripts_dir, '..', 'src', 'include'))
includes.append(os.path.join(scripts_dir, '..'))
includes.append(os.path.join(scripts_dir, '..', 'third_party', 'utf8proc', 'include'))
for ext in extensions:
includes.append(os.path.join(scripts_dir, '..', 'extension', ext, 'include'))
return includes
Expand Down
1 change: 0 additions & 1 deletion src/include/duckdb/common/types/string_type.hpp
Expand Up @@ -25,7 +25,6 @@ struct string_t {
string_t() = default;
string_t(uint32_t len) {
value.inlined.length = len;
memset(value.inlined.inlined, 0, INLINE_LENGTH);
}
string_t(const char *data, uint32_t len) {
value.inlined.length = len;
Expand Down
5 changes: 5 additions & 0 deletions third_party/utf8proc/include/utf8proc_wrapper.hpp
Expand Up @@ -21,6 +21,11 @@ class Utf8Proc {
static size_t NextGraphemeCluster(const char *s, size_t len, size_t pos);
//! Returns the position (in bytes) of the previous grapheme cluster
static size_t PreviousGraphemeCluster(const char *s, size_t len, size_t pos);

//! Transform a codepoint to utf8 and writes it to "c", sets "sz" to the size of the codepoint
static bool CodepointToUtf8(int cp, int &sz, char *c);
//! Returns the codepoint length in bytes when encoded in UTF8
static int CodepointLength(int cp);
};

}
8 changes: 8 additions & 0 deletions third_party/utf8proc/utf8proc_wrapper.cpp
Expand Up @@ -81,6 +81,14 @@ size_t Utf8Proc::PreviousGraphemeCluster(const char *s, size_t len, size_t cpos)
}
}

bool Utf8Proc::CodepointToUtf8(int cp, int &sz, char *c) {
return utf8proc_codepoint_to_utf8(cp, sz, c);
}

int Utf8Proc::CodepointLength(int cp) {
return utf8proc_codepoint_length(cp);
}

}

size_t utf8proc_next_grapheme_cluster(const char *s, size_t len, size_t pos) {
Expand Down

0 comments on commit 5eff218

Please sign in to comment.