diff --git a/hpat/_meminfo.h b/hpat/_meminfo.h index 050c80a5c..e2fc7576b 100644 --- a/hpat/_meminfo.h +++ b/hpat/_meminfo.h @@ -41,7 +41,7 @@ struct MemInfo typedef struct MemInfo NRT_MemInfo; -void nrt_debug_print(const char* fmt, ...) +void nrt_debug_print(char* fmt, ...) { va_list args; diff --git a/hpat/_str_decode.cpp b/hpat/_str_decode.cpp index 15833aaeb..d6c8afadc 100644 --- a/hpat/_str_decode.cpp +++ b/hpat/_str_decode.cpp @@ -329,8 +329,11 @@ static Py_ssize_t ascii_decode(const char* start, const char* end, Py_UCS1* dest void decode_utf8(const char* s, Py_ssize_t size, int* kind, int* is_ascii, int* length, NRT_MemInfo** meminfo) { _C_UnicodeWriter writer; + const char* starts = s; const char* end = s + size; + Py_ssize_t startinpos; + Py_ssize_t endinpos; const char* errmsg = ""; *is_ascii = 0; @@ -392,9 +395,13 @@ void decode_utf8(const char* s, Py_ssize_t size, int* kind, int* is_ascii, int* if (s == end) goto End; errmsg = "unexpected end of data"; + startinpos = s - starts; + endinpos = end - starts; break; case 1: errmsg = "invalid start byte"; + startinpos = s - starts; + endinpos = startinpos + 1; break; case 2: case 3: @@ -404,6 +411,8 @@ void decode_utf8(const char* s, Py_ssize_t size, int* kind, int* is_ascii, int* goto End; } errmsg = "invalid continuation byte"; + startinpos = s - starts; + endinpos = startinpos + ch - 1; break; default: if (_C_UnicodeWriter_WriteCharInline(&writer, ch) < 0) diff --git a/hpat/_str_ext.cpp b/hpat/_str_ext.cpp index 20bca9141..790349e8d 100644 --- a/hpat/_str_ext.cpp +++ b/hpat/_str_ext.cpp @@ -8,9 +8,15 @@ #include "_str_decode.cpp" +#ifdef USE_BOOST_REGEX +#include +using boost::regex; +using boost::regex_search; +#else #include using std::regex; using std::regex_search; +#endif #include diff --git a/hpat/io/_csv.cpp b/hpat/io/_csv.cpp index fcd20cc6a..e9fb8d339 100644 --- a/hpat/io/_csv.cpp +++ b/hpat/io/_csv.cpp @@ -309,14 +309,14 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel std::vector line_offset = count_lines(f, hpat_dist_get_node_portion(fsz, nranks, rank)); size_t no_lines = line_offset.size(); // get total number of lines using allreduce - int64_t tot_no_lines = 0; + size_t tot_no_lines(0); hpat_dist_reduce(reinterpret_cast(&no_lines), reinterpret_cast(&tot_no_lines), HPAT_ReduceOps::SUM, HPAT_CTypes::UINT64); // Now we need to communicate the distribution as we really want it // First determine which is our first line (which is the sum of previous lines) - int64_t byte_first_line = hpat_dist_exscan_i8(no_lines); - int64_t byte_last_line = byte_first_line + no_lines; + size_t byte_first_line = hpat_dist_exscan_i8(no_lines); + size_t byte_last_line = byte_first_line + no_lines; // We now determine the chunks of lines that begin and end in our byte-chunk @@ -351,8 +351,8 @@ static PyObject* csv_chunk_reader(std::istream * f, size_t fsz, bool is_parallel // We iterate through chunk boundaries (defined by line-numbers) // we start with boundary 1 as 0 is the beginning of file - for(size_t i=1; i byte_first_line && i_bndry <= byte_last_line) { diff --git a/hpat/stringlib/codecs.h b/hpat/stringlib/codecs.h index d0a402f5e..25596bca1 100644 --- a/hpat/stringlib/codecs.h +++ b/hpat/stringlib/codecs.h @@ -293,7 +293,7 @@ int64_t STRINGLIB(utf8_encoder)(char* out_data, STRINGLIB_CHAR* data, Py_ssize_t p = (char*)_C_BytesWriter_Alloc(&writer, size * max_char_size); if (p == NULL) - return 0; + return NULL; for (i = 0; i < size;) { @@ -316,7 +316,8 @@ int64_t STRINGLIB(utf8_encoder)(char* out_data, STRINGLIB_CHAR* data, Py_ssize_t #if STRINGLIB_SIZEOF_CHAR > 1 else if (Py_UNICODE_IS_SURROGATE(ch)) { - Py_ssize_t startpos, endpos; + Py_ssize_t startpos, endpos, newpos; + Py_ssize_t k; startpos = i - 1; endpos = startpos + 1; diff --git a/parquet_reader/hpat_parquet_reader.cpp b/parquet_reader/hpat_parquet_reader.cpp index 665e1429e..cb235db76 100644 --- a/parquet_reader/hpat_parquet_reader.cpp +++ b/parquet_reader/hpat_parquet_reader.cpp @@ -518,7 +518,7 @@ int pq_read_string_parallel_single_file(std::shared_ptr arrow_reader } std::shared_ptr<::arrow::Array> arr = chunked_arr->chunk(0); // std::cout << arr->ToString() << std::endl; - + int64_t num_values = arr->length(); auto buffers = arr->data()->buffers; // std::cout<<"num buffs: "<< buffers.size()<