Skip to content

Commit

Permalink
Adding ability to parse float from number with leading decimal (#831) (
Browse files Browse the repository at this point in the history
…#962)

* Adding ability to parse float from number with leading decimal

* Fixing deprecated std::usize::MAX constant per https://doc.rust-lang.org/core/usize/constant.MAX.html and making consistent with other usages

* Add test case for 2. and issue link

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

Co-authored-by: Brian Rackle <brianrackle@hotmail.com>
  • Loading branch information
alamb and brianrackle committed Nov 22, 2021
1 parent 59f96e8 commit f7deba4
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions arrow/src/csv/reader.rs
Expand Up @@ -60,7 +60,7 @@ use crate::record_batch::RecordBatch;
use csv_crate::{ByteRecord, StringRecord};

lazy_static! {
static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d+\.\d+)$").unwrap();
static ref DECIMAL_RE: Regex = Regex::new(r"^-?(\d*\.\d+)$").unwrap();
static ref INTEGER_RE: Regex = Regex::new(r"^-?(\d+)$").unwrap();
static ref BOOLEAN_RE: Regex = RegexBuilder::new(r"^(true)$|^(false)$")
.case_insensitive(true)
Expand Down Expand Up @@ -271,7 +271,7 @@ pub fn infer_schema_from_files(
has_header: bool,
) -> Result<Schema> {
let mut schemas = vec![];
let mut records_to_read = max_read_records.unwrap_or(std::usize::MAX);
let mut records_to_read = max_read_records.unwrap_or(usize::MAX);

for fname in files.iter() {
let (schema, records_read) = infer_file_schema(
Expand Down Expand Up @@ -1342,6 +1342,9 @@ mod tests {
assert_eq!(infer_field_schema("\"123\""), DataType::Utf8);
assert_eq!(infer_field_schema("10"), DataType::Int64);
assert_eq!(infer_field_schema("10.2"), DataType::Float64);
assert_eq!(infer_field_schema(".2"), DataType::Float64);
// Should be parsed as Float or Int. See https://github.com/apache/arrow-rs/issues/929
assert_eq!(infer_field_schema("2."), DataType::Utf8);
assert_eq!(infer_field_schema("true"), DataType::Boolean);
assert_eq!(infer_field_schema("false"), DataType::Boolean);
assert_eq!(infer_field_schema("2020-11-08"), DataType::Date32);
Expand Down

0 comments on commit f7deba4

Please sign in to comment.