diff --git a/.travis.yml b/.travis.yml index f040e14..a838cdc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,7 +11,7 @@ notifications: # uncomment the following lines to override the default test script script: - if [[ -a .git/shallow ]]; then git fetch --unshallow; fi - - julia -e 'Pkg.clone(pwd()); Pkg.build("DateTimeParser"); Pkg.test("DateTimeParser"; coverage=true)' + - julia -e 'Pkg.clone(pwd()); Pkg.build("DateParser"); Pkg.test("DateParser"; coverage=true)' after_success: - - julia -e 'cd(Pkg.dir("DateTimeParser")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'; - - julia -e 'cd(Pkg.dir("DateTimeParser")); Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())' + - julia -e 'cd(Pkg.dir("DateParser")); Pkg.add("Coverage"); using Coverage; Coveralls.submit(Coveralls.process_folder())'; + - julia -e 'cd(Pkg.dir("DateParser")); Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())' diff --git a/LICENSE.md b/LICENSE.md index 03160ab..f9015ed 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,4 +1,4 @@ -The DateTimeParser.jl package is licensed under the Mozilla Public License, Version 2.0: +The DateParser.jl package is licensed under the Mozilla Public License, Version 2.0: > Copyright (c) 2015: Invenia Technical Computing Corporation. > diff --git a/README.md b/README.md index 52fc31b..936b8c2 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,12 @@ -# DateTimeParser +# DateParser [![Build Status](https://travis-ci.org/invenia/DateTimeParser.jl.svg?branch=master)](https://travis-ci.org/invenia/DateTimeParser.jl) +[![Build status](https://ci.appveyor.com/api/projects/status/xbyk0v7m9p369ier/branch/master?svg=true)](https://ci.appveyor.com/project/Michael-Klassen/dateparser-jl/branch/master) [![Coverage Status](https://coveralls.io/repos/invenia/DateTimeParser.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/invenia/DateTimeParser.jl?branch=master) [![codecov.io](http://codecov.io/github/invenia/DateTimeParser.jl/coverage.svg?branch=master)](http://codecov.io/github/invenia/DateTimeParser.jl?branch=master) -Automatic parsing of DateTime strings +Automatic parsing of date strings ## Usage diff --git a/appveyor.yml b/appveyor.yml index 5dc95fc..30ca29f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,7 +28,7 @@ build_script: # Need to convert from shallow to complete for Pkg.clone to work - IF EXIST .git\shallow (git fetch --unshallow) - C:\projects\julia\bin\julia -e "versioninfo(); - Pkg.clone(pwd(), \"DateTimeParser\"); Pkg.build(\"DateTimeParser\")" + Pkg.clone(pwd(), \"DateParser\"); Pkg.build(\"DateParser\")" test_script: - - C:\projects\julia\bin\julia --check-bounds=yes -e "Pkg.test(\"DateTimeParser\")" + - C:\projects\julia\bin\julia --check-bounds=yes -e "Pkg.test(\"DateParser\")" diff --git a/src/DateParser.jl b/src/DateParser.jl new file mode 100644 index 0000000..93c1bae --- /dev/null +++ b/src/DateParser.jl @@ -0,0 +1,639 @@ +module DateParser + +using Base.Dates +using TimeZones + +import Base.Dates: VALUETODAYOFWEEK, VALUETODAYOFWEEKABBR, VALUETOMONTH, VALUETOMONTHABBR +import TimeZones: localtime + +# Re-export from Base with ZonedDateTime, DateTime, and Date +export parse, tryparse + +# Automatic parsing of DateTime strings. Based upon Python's dateutil parser +# https://labix.org/python-dateutil#head-a23e8ae0a661d77b89dfb3476f85b26f0b30349c + +# Some pointers: +# http://www.cl.cam.ac.uk/~mgk25/iso-time.html +# http://www.w3.org/TR/NOTE-datetime +# http://new-pds-rings-2.seti.org/tools/time_formats.html +# http://search.cpan.org/~muir/Time-modules-2003.0211/lib/Time/ParseDate.pm + +immutable DayOfWeek <: DatePeriod + value::Int64 + DayOfWeek(v::Number) = new(v) +end + +const english_hms = Dict( + "h" => :hour, "hour" => :hour, "hours" => :hour, + "m" => :minute, "minute" => :minute, "minutes" => :minute, + "s" => :second, "second" => :second, "seconds" => :second, +) +const HMS = Dict{UTF8String,Dict{UTF8String,Symbol}}("english"=>english_hms) +const english_ampm = Dict( + "am" => :am, "a" => :am, + "pm" => :pm, "p" => :pm, +) +const AMPM = Dict{UTF8String,Dict{UTF8String,Symbol}}("english"=>english_ampm) + +# Name to value translations +for name in ("DAYOFWEEK", "DAYOFWEEKABBR", "MONTH", "MONTHABBR") + valueto = Symbol("VALUETO" * name) + tovalue = Symbol(name * "TOVALUE") + @eval begin + const $tovalue = [locale => Dict{UTF8String,Int}( + zip(map(lowercase, values(d)), keys(d))) for (locale, d) in $valueto] + end +end + +const JUMP = [ + " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st", + "nd", "rd", "th", "the", +] +const PERTAIN = ["of",] +const UTCZONE = ["utc", "gmt", "z",] + +function Base.tryparse{T<:TimeType}(::Type{T}, s::AbstractString; args...) + try + return Nullable{T}(parse(T, s; args...)) + catch + return Nullable{T}() + end +end + +function Base.parse(::Type{ZonedDateTime}, zdt::AbstractString; + default::ZonedDateTime=ZonedDateTime(DateTime(year(today())), FixedTimeZone("UTC", 0)), + args... +) + res = _parsedate(zdt; args...) + + return ZonedDateTime( + DateTime( + get(res.year, Year(default)), + get(res.month, Month(default)), + get(res.day, Day(default)), + get(res.hour, Hour(default)), + get(res.minute, Minute(default)), + get(res.second, Second(default)), + get(res.millisecond, Millisecond(default)) + ), + get(res.timezone, default.timezone) + ) +end + +function Base.parse(::Type{DateTime}, dt::AbstractString; + default::DateTime=DateTime(year(today())), args... +) + res = _parsedate(dt; args...) + + return DateTime( + get(res.year, Year(default)), + get(res.month, Month(default)), + get(res.day, Day(default)), + get(res.hour, Hour(default)), + get(res.minute, Minute(default)), + get(res.second, Second(default)), + get(res.millisecond, Millisecond(default)) + ) +end + +function Base.parse(::Type{Date}, d::AbstractString; + default::Date=Date(year(today())), args... +) + res = _parsedate(d; args...) + + return Date( + get(res.year, Year(default)), + get(res.month, Month(default)), + get(res.day, Day(default)) + ) +end + +type Parts + year::Nullable{Year} + month::Nullable{Month} + day::Nullable{Day} + hour::Nullable{Hour} + minute::Nullable{Minute} + second::Nullable{Second} + millisecond::Nullable{Millisecond} + timezone::Nullable{TimeZone} + dayofweek::Nullable{DayOfWeek} + tzoffset::Nullable{Int} + tzname::Nullable{AbstractString} + Parts() = new(nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, + nothing, nothing, nothing) +end +Base.convert{T}(::Type{Nullable{T}}, x::Any) = Nullable{T}(T(x)) + +function _parsedate(s::AbstractString; fuzzy::Bool=false, + timezone_infos::Dict{AbstractString, TimeZone}=Dict{AbstractString, TimeZone}(), # Specify what a timezone is + dayfirst::Bool=false, # MM-DD-YY vs DD-MM-YY + yearfirst::Bool=false, # MM-DD-YY vs YY-MM-DD + locale::AbstractString="english", # Locale in Dates.VALUETOMONTH and VALUETODAYOFWEEK +) + res = Parts() + + ymd = sizehint!(Int[], 3) # year/month/day list + monthindex = -1 # Index of a month string in ymd + + tokens = tokenize(s) + len = length(tokens) + + i = 1 + while i <= len + token = tokens[i] + tokenlength = length(token) + if isdigit(token) + # Token is a number + i += 1 # We want to look at what comes after the number + if tokenlength == 6 + # YYMMDD or HHMMSS[.ss] + if length(ymd) != 0 || + (i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1])) + # 19990101T235959[.59] + res.hour = token[1:2] + res.minute = token[3:4] + res.second = token[5:6] + if i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1]) + temp = round(Int, 1000 * parsefractional(tokens[i+1])) + res.millisecond = temp + i += 2 + end + else + push!(ymd, parse(Int, token[1:2])) + push!(ymd, parse(Int, token[3:4])) + push!(ymd, parse(Int, token[5:end])) + end + elseif tokenlength in (8, 12, 14) + # YYYYMMDD[hhmm[ss]] + push!(ymd, parse(Int, token[1:4])) + push!(ymd, parse(Int, token[5:6])) + push!(ymd, parse(Int, token[7:8])) + if tokenlength > 8 + res.hour = token[9:10] + res.minute = token[11:12] + if tokenlength > 12 + res.second = token[13:14] + end + end + elseif tokenlength == 9 + # HHMMSS[mil] + res.hour = token[1:2] + res.minute = token[3:4] + res.second = token[5:6] + res.millisecond = token[7:9] + elseif (i <= len && haskey(HMS[locale], lowercase(tokens[i]))) || + (i+2 <= len && tokens[i] == "." && isdigit(tokens[i+1]) && + haskey(HMS[locale], lowercase(tokens[i+2]))) + # HH[ ]h or MM[ ]m or SS[.ss][ ]s + + value = parse(Int, token) + decimal = 0.0 + if tokens[i] == "." + decimal = parsefractional(tokens[i+1]) + i += 2 + end + idx = HMS[locale][lowercase(tokens[i])] + while true + if idx == :hour + res.hour = value + if decimal != 0 + res.minute = get(res.minute, 0) + Minute(round(Int, 60 * decimal)) + end + elseif idx == :minute + res.minute = value + if decimal != 0 + res.second = get(res.second, 0) + Second(round(Int, 60 * decimal)) + end + elseif idx == :second + res.second = value + if decimal != 0 + res.millisecond = get(res.millisecond, 0) + Millisecond(round(Int, 1000 * decimal)) + end + end + i += 1 + if i > len || idx == :second + break + end + # 12h00 + token = tokens[i] + if !isdigit(token) + break + else + i += 1 + value = parse(Int, token) + decimal = 0.0 + if i+1 <= len tokens[i] == "." && isdigit(tokens[i+1]) + decimal = parsefractional(tokens[i+1]) + i += 2 + end + if i <= len && haskey(HMS[locale], lowercase(tokens[i])) + idx = HMS[locale][lowercase(tokens[i])] + elseif idx == :hour + idx = :minute + else + idx = :second + end + end + end + elseif i+1 <= len && tokens[i] == ":" + # HH:MM[:SS[.ss]] + res.hour = token + res.minute = tokens[i+1] + i += 2 + if i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1]) + temp = 60 * parsefractional(tokens[i+1]) + res.second = round(Int, temp) + i += 2 + elseif i < len && tokens[i] == ":" + res.second = tokens[i+1] + i += 2 + if i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1]) + temp = 1000 * parsefractional(tokens[i+1]) + res.millisecond = round(Int, temp) + i += 2 + end + end + elseif i <= len && tokens[i] in ("-","/",".") + sep = tokens[i] + push!(ymd, parse(Int, token)) + i += 1 + if i <= len && !(lowercase(tokens[i]) in JUMP) + if isdigit(tokens[i]) + push!(ymd, parse(Int, tokens[i])) + else + m = _tryparse(Month, tokens[i], locale=locale) + if !isnull(m) + push!(ymd, get(m)) + monthindex = length(ymd) + end + end + i += 1 + if i <= len && tokens[i] == sep + # We have three members + i += 1 + m = _tryparse(Month, tokens[i], locale=locale) + if !isnull(m) + push!(ymd, get(m)) + monthindex = length(ymd) + else + push!(ymd, parse(Int, tokens[i])) + end + i += 1 + end + end + elseif i <= len && haskey(AMPM[locale], lowercase(tokens[i])) + # 12am + res.hour = token + res.hour = converthour(get(res.hour).value, AMPM[locale][lowercase(tokens[i])]) + i += 1 + else + if length(ymd) < 3 + push!(ymd, parse(Int, token)) + elseif tokenlength <= 2 + if isnull(res.hour) + res.hour = token + elseif isnull(res.minute) + res.minute = token + elseif isnull(res.second) + res.second = token + elseif isnull(res.millisecond) + res.millisecond = token + elseif !fuzzy + error("Failed to parse date") + end + elseif tokenlength == 3 && isnull(res.millisecond) + res.millisecond = token + elseif tokenlength == 4 && isnull(res.hour) && isnull(res.minute) + res.hour = token[1:2] + res.minute = token[3:4] + elseif !fuzzy + error("Failed to parse date") + end + end + else + # Token is not a number + w = _tryparse(DayOfWeek, lowercase(token), locale=locale) + m = _tryparse(Month, lowercase(token), locale=locale) + if !isnull(w) + # Weekday + res.dayofweek = get(w) + i += 1 + elseif !isnull(m) + # Month name + push!(ymd, get(m)) + monthindex = length(ymd) + i += 1 + if i <= len + if tokens[i] in ("-", "/", ".") + # Jan-01[-99] + sep = tokens[i] + i += 1 + push!(ymd, parse(Int, tokens[i])) + i += 1 + if i <= len && tokens[i] == sep + # Jan-01-99 + i += 1 + push!(ymd, parse(Int, tokens[i])) + i += 1 + end + elseif i+1 <= len && tokens[i] in PERTAIN && isdigit(tokens[i+1]) + # Jan of 01 + # In this case, 01 is clearly year + value = parse(Int, tokens[i+1]) + # Convert it here to become unambiguous + push!(ymd, convertyear(value)) + i += 2 + end + end + elseif haskey(AMPM[locale], lowercase(tokens[i])) + # am/pm + if isnull(res.hour) + error("Failed to parse date") + end + res.hour = converthour(get(res.hour).value, AMPM[locale][lowercase(tokens[i])]) + i += 1 + elseif i+1 <= len && tokens[i] in ("+", "-") && + isnull(res.tzoffset) && isdigit(tokens[i+1]) + i = _parsetimezone_offset!(res, tokens, i) + else + newindex = _tryparsetimezone!(res, tokens, i, timezone_infos) + if i != newindex + # We found a timezone + i = newindex + elseif !(lowercase(tokens[i]) in JUMP) && !fuzzy + error("Failed to parse date") + else + i += 1 + end + end + end + end + + processymd!(res, ymd, monthindex=monthindex, yearfirst=yearfirst, dayfirst=dayfirst) + + if isnull(res.timezone) && !isnull(res.tzoffset) + res.tzname = get(res.tzname, "local") + res.timezone = FixedTimeZone(get(res.tzname), get(res.tzoffset)) + end + + return res +end + +function _parsetimezone_offset!(res::Parts, tokens::Array{ASCIIString}, i::Int) + # Numbered timzone + signal = tokens[i] == "+" ? 1 : -1 + + i += 1 + tokenlength = length(tokens[i]) + h = mi = 0 + if tokenlength == 4 + # -0300 + h, mi = parse(Int, tokens[i][1:2]), parse(Int, tokens[i][3:end]) + elseif i+2 <= length(tokens) && tokens[i+1] == ":" && isdigit(tokens[i+2]) + # -03:00 + h, mi = parse(Int, tokens[i]), parse(Int, tokens[i+2]) + i += 2 + elseif tokenlength <= 2 + # -[0]3 + h = parse(Int, tokens[i]) + else + error("Failed to read timezone offset") + end + h < 24 || error("Hour: $h out of range (0:23)") + mi < 60 || error("Minute: $mi out of range (0:59)") + res.tzoffset = signal * (h * 3600 + mi * 60) + i += 1 + + return i +end + +function _tryparsetimezone!(res::Parts, tokens::Array{ASCIIString}, i::Int, + timezone_infos::Dict{AbstractString,TimeZone} +) + len = length(tokens) + oldindex = i + inbrackets = false + + if i <= len && tokens[i] == "(" + inbrackets = true + i += 1 + end + + if i <= len && ismatch(r"^\w+$", tokens[i]) + res.tzname = tokens[i] + while i+2 <= len && ismatch(r"^\w+$", tokens[i]) && + (tokens[i+1] in ("/", "-", "_") || ismatch(r"^\d+$", tokens[i+1])) + res.tzname = string(get(res.tzname), tokens[i+1], tokens[i+2]) + i += 2 + end + i += 1 + end + + # Check for something like GMT+3, or BRST+3 + if i+1 <= len && tokens[i] in ("+", "-") && isdigit(tokens[i+1]) + newindex = _parsetimezone_offset!(res, tokens, i) + while i < newindex + res.tzname = string(get(res.tzname, ""), tokens[i]) + i += 1 + end + end + + if inbrackets && i <= len && tokens[i] == ")" + i += 1 + end + + value = _tryparse(TimeZone, get(res.tzname, ""), translation=timezone_infos) + if !isnull(value) + res.timezone = get(value) + elseif !inbrackets || isnull(res.tzoffset) + res.tzname = Nullable{AbstractString}() + i = oldindex + end + + return i +end + +function processymd!(res::Parts, ymd::Array{Int}; + monthindex=-1, yearfirst=false, dayfirst=false +) + # Process year/month/day + len_ymd = length(ymd) + + if len_ymd > 3 + # More than three members!? + error("Failed to parse date") + elseif len_ymd == 1 || (monthindex != -1 && len_ymd == 2) + # One member, or two members with a month string + if monthindex != -1 + res.month = ymd[monthindex] + deleteat!(ymd, monthindex) + end + if len_ymd > 1 || monthindex == -1 + if ymd[1] > 31 + res.year = ymd[1] + else + res.day = ymd[1] + end + end + elseif len_ymd == 2 + # Two members with numbers + if ymd[1] > 31 + # 99-01 + res.year, res.month = ymd + elseif ymd[2] > 31 + # 01-99 + res.month, res.year = ymd + elseif dayfirst && ymd[2] <= 12 + # 13-01 + res.day, res.month = ymd + else + # 01-13 + res.month, res.day = ymd + end + elseif len_ymd == 3 + # Three members + if monthindex == 1 + res.month, res.day, res.year = ymd + elseif monthindex == 2 + if ymd[1] > 31 || (yearfirst && ymd[3] <= 31) + # 99-Jan-01 + res.year, res.month, res.day = ymd + else + # 01-Jan-01 + # Give precendence to day-first, since + # two-digit years is usually hand-written. + res.day, res.month, res.year = ymd + end + elseif monthindex == 3 + # WTF + if ymd[2] > 31 + # 01-99-Jan + res.day, res.year, res.month = ymd + else + res.year, res.day, res.month = ymd + end + else + if ymd[1] > 31 || (yearfirst && ymd[2] <= 12 && ymd[3] <= 31) + # 99-01-01 + res.year, res.month, res.day = ymd + elseif ymd[1] > 12 || (dayfirst && ymd[2] <= 12) + # 13-01-01 + res.day, res.month, res.year = ymd + else + # 01-13-01 + res.month, res.day, res.year = ymd + end + end + end + if !isnull(res.year) + res.year = convertyear(get(res.year).value) + end +end + +"Helper function. Parses a `String` containing a `Int` into the fraction part of a +`Float64`. e.g \"5\" becomes `0.5` and \"450\" becomes `0.450`" +function parsefractional(s::AbstractString) + parse(Float64, string(".", s)) +end + +function _tryparse(::Type{Month}, s::AbstractString; locale::AbstractString="english") + name = lowercase(s) + temp = Nullable{Int}(get(MONTHTOVALUE[locale], name, + get(MONTHABBRTOVALUE[locale], name, nothing))) + if isnull(temp) + Nullable{Month}() + else + Nullable{Month}(Month(get(temp))) + end +end + +function _tryparse(::Type{DayOfWeek}, s::AbstractString; locale::AbstractString="english") + name = lowercase(s) + temp = Nullable{Int}(get(DAYOFWEEKTOVALUE[locale], name, + get(DAYOFWEEKABBRTOVALUE[locale], name, nothing))) + if isnull(temp) + Nullable{DayOfWeek}() + else + Nullable{DayOfWeek}(DayOfWeek(get(temp))) + end +end + +function _tryparse(::Type{TimeZone}, name::AbstractString; + translation::Dict{AbstractString,TimeZone}=Dict{AbstractString,TimeZone}() +) + if haskey(translation, name) + return Nullable{TimeZone}(translation[name]) + elseif name in TimeZones.timezone_names() + return Nullable{TimeZone}(TimeZone(name)) + elseif lowercase(name) in UTCZONE + return Nullable{TimeZone}(FixedTimeZone("UTC", 0)) + else + return Nullable{TimeZone}() + end +end + +"Converts a 2 digit year to a 4 digit one within 50 years of convert_year. At the momment + convert_year defaults to 2000, if people are still using 2 digit years after year 2049 + (hopefully not) then we can change the default to today()" +function convertyear(year::Int, convert_year=2000) + if year <= 99 + century = convert_year - (convert_year % 100) + year += century + if abs(year - convert_year) >= 50 + if year < convert_year + year += 100 + else + year -= 100 + end + end + end + return year +end + +function converthour(h::Int, ampm::Symbol) + if h < 12 && ampm == :pm + h = h + 12 + elseif h == 12 && ampm == :am + h = 0 + end + return h +end + +function tokenize{Str<:AbstractString}(input::Str) + tokens = Str[] + token = sizehint!(Char[], 10) + + # Note: A regular expression can handle almost all of this task + # with the exception of identifying Unicode punctuation. + state = last_state = :none + for c in input + if isspace(c) + state = :none + elseif isdigit(c) + state = :number + elseif isalpha(c) + state = :word + else + state = :other + end + + if state != :none + if state != last_state && !isempty(token) + push!(tokens, Str(token)) + empty!(token) + end + + push!(token, c) + end + + last_state = state + end + + # Token will only be empty here if the entire input was whitespace + !isempty(token) && push!(tokens, Str(token)) + + return tokens +end + +end # module diff --git a/src/DateTimeParser.jl b/src/DateTimeParser.jl deleted file mode 100644 index 011227d..0000000 --- a/src/DateTimeParser.jl +++ /dev/null @@ -1,588 +0,0 @@ -module DateTimeParser - -using Base.Dates -using TimeZones - -import Base.Dates: VALUETODAYOFWEEK, VALUETODAYOFWEEKABBR, VALUETOMONTH, VALUETOMONTHABBR -import TimeZones: localtime - -export parse, tryparse - -# Automatic parsing of DateTime strings. Based upon Python's dateutil parser -# https://labix.org/python-dateutil#head-a23e8ae0a661d77b89dfb3476f85b26f0b30349c - -# Some pointers: -# http://www.cl.cam.ac.uk/~mgk25/iso-time.html -# http://www.w3.org/TR/NOTE-datetime -# http://new-pds-rings-2.seti.org/tools/time_formats.html -# http://search.cpan.org/~muir/Time-modules-2003.0211/lib/Time/ParseDate.pm - -const HMS = Dict{AbstractString, Symbol}( - "h" => :hour, "hour" => :hour, "hours" => :hour, - "m" => :minute, "minute" => :minute, "minutes" => :minute, - "s" => :second, "second" => :second, "seconds" => :second, -) -const AMPM = Dict{AbstractString, Symbol}( - "am" => :am, "a" => :am, - "pm" => :pm, "p" => :pm, -) -const JUMP = ( - " ", ".", ",", ";", "-", "/", "'", "at", "on", "and", "ad", "m", "t", "of", "st", - "nd", "rd", "th", "the", -) -const PERTAIN = ("of",) -const UTCZONE = ("utc", "gmt", "z",) - -function Base.tryparse{T<:TimeType}(::Type{T}, str::AbstractString; args...) - try - return Nullable{T}(parse(T, str; args...)) - catch - return Nullable{T}() - end -end - -function Base.parse(::Type{ZonedDateTime}, datetimestring::AbstractString; fuzzy::Bool=false, - default::ZonedDateTime=ZonedDateTime(DateTime(year(today())), FixedTimeZone("UTC", 0)), - timezone_infos::Dict{AbstractString, TimeZone}=Dict{AbstractString, TimeZone}(), # Specify what a timezone is - dayfirst::Bool=false, # MM-DD-YY vs DD-MM-YY - yearfirst::Bool=false, # MM-DD-YY vs YY-MM-DD - locale::AbstractString="english", # Locale in Dates.VALUETOMONTH and VALUETODAYOFWEEK -) - datetimestring = strip(datetimestring) - - if isempty(datetimestring) - return default - end - - res = _parsedate(datetimestring, fuzzy=fuzzy, timezone_infos=timezone_infos, - dayfirst=dayfirst, yearfirst=yearfirst, locale=locale) - - # Fill in default values if none exits - res["year"] = convertyear(get(res, "year", year(default))) - get!(res, "month", month(default)) - get!(res, "day", day(default)) - get!(res, "hour", hour(default)) - get!(res, "minute", minute(default)) - get!(res, "second", second(default)) - get!(res, "millisecond", millisecond(default)) - if !haskey(res, "timezone") - if haskey(res, "tzoffset") - tzname = get(res, "tzname", "local") - res["timezone"] = FixedTimeZone(tzname, res["tzoffset"]) - else - res["timezone"] = default.timezone - end - end - - return ZonedDateTime(DateTime(res["year"], res["month"], res["day"], res["hour"], - res["minute"], res["second"], res["millisecond"]), res["timezone"]) -end - -function Base.parse(::Type{DateTime}, datetimestring::AbstractString; fuzzy::Bool=false, - default::DateTime=DateTime(year(today())), - dayfirst::Bool=false, # MM-DD-YY vs DD-MM-YY - yearfirst::Bool=false, # MM-DD-YY vs YY-MM-DD - locale::AbstractString="english", # Locale in Dates.VALUETOMONTH and VALUETODAYOFWEEK -) - datetimestring = strip(datetimestring) - - if isempty(datetimestring) - return default - end - - res = _parsedate(datetimestring, fuzzy=fuzzy, - dayfirst=dayfirst, yearfirst=yearfirst, locale=locale) - - # Fill in default values if none exits - res["year"] = convertyear(get(res, "year", year(default))) - get!(res, "month", month(default)) - get!(res, "day", day(default)) - get!(res, "hour", hour(default)) - get!(res, "minute", minute(default)) - get!(res, "second", second(default)) - get!(res, "millisecond", millisecond(default)) - - return DateTime(res["year"], res["month"], res["day"], res["hour"], - res["minute"], res["second"], res["millisecond"]) -end - -function Base.parse(::Type{Date}, datetimestring::AbstractString; fuzzy::Bool=false, - default::Date=Date(year(today())), - dayfirst::Bool=false, # MM-DD-YY vs DD-MM-YY - yearfirst::Bool=false, # MM-DD-YY vs YY-MM-DD - locale::AbstractString="english", # Locale in Dates.VALUETOMONTH and VALUETODAYOFWEEK -) - datetimestring = strip(datetimestring) - - if isempty(datetimestring) - return default - end - - res = _parsedate(datetimestring, fuzzy=fuzzy, - dayfirst=dayfirst, yearfirst=yearfirst, locale=locale) - - # Fill in default values if none exits - res["year"] = convertyear(get(res, "year", year(default))) - get!(res, "month", month(default)) - get!(res, "day", day(default)) - - return Date(res["year"], res["month"], res["day"]) -end - -function _parsedate(datetimestring::AbstractString; fuzzy::Bool=false, - timezone_infos::Dict{AbstractString, TimeZone}=Dict{AbstractString, TimeZone}(), - dayfirst::Bool=false, - yearfirst::Bool=false, - locale::AbstractString="english", -) - month = monthtovalue(locale) - weekday = weekdaytovalue(locale) - - ymd = sizehint!(Int[], 3) # year/month/day list - mstridx = -1 # Index of the month string in ymd - tokens = tokenize(datetimestring) - len = length(tokens) - - res = Dict() - i = 1 - while i <= len - token = tokens[i] - tokenlength = length(token) - if isdigit(token) - # Token is a number - i += 1 # We want to look at what comes after the number - if length(ymd) == 3 && tokenlength in (2,4) && - (i>=len || (tokens[i] != ":" && !haskey(HMS, lowercase(tokens[i])))) - # 19990101T23[59] - res["hour"] = parse(Int, token[1:2]) - if tokenlength == 4 - res["minute"] = parse(Int, token[3:4]) - end - elseif tokenlength == 6 - # YYMMDD or HHMMSS[.ss] - if length(ymd) != 0 || (i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1])) - # 19990101T235959[.59] - res["hour"] = parse(Int, token[1:2]) - res["minute"] = parse(Int, token[3:4]) - res["second"] = parse(Int, token[5:6]) - if i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1]) - res["millisecond"] = round(Int, 1000 * parse(Float64, string(tokens[i], tokens[i+1]))) - i += 2 - end - else - push!(ymd, convertyear(parse(Int, token[1:2]))) - push!(ymd, parse(Int, token[3:4])) - push!(ymd, parse(Int, token[5:end])) - end - elseif tokenlength in (8, 12, 14) - # YYYYMMDD[hhmm[ss]] - push!(ymd, parse(Int, token[1:4])) - push!(ymd, parse(Int, token[5:6])) - push!(ymd, parse(Int, token[7:8])) - if tokenlength > 8 - res["hour"] = parse(Int, token[9:10]) - res["minute"] = parse(Int, token[11:12]) - if tokenlength > 12 - res["second"] = parse(Int, token[13:14]) - end - end - elseif (i <= len && haskey(HMS, lowercase(tokens[i]))) || - (i+2 <= len && tokens[i] == "." && isdigit(tokens[i+1]) && - haskey(HMS, lowercase(tokens[i+2]))) - # HH[ ]h or MM[ ]m or SS[.ss][ ]s - - value = parse(Int, token) - decimal = 0.0 - if tokens[i] == "." - decimal = parse(Float64, string(".", tokens[i+1])) - i += 2 - end - idx = HMS[lowercase(tokens[i])] - while true - if idx == :hour - res["hour"] = value - if decimal != 0 - res["minute"] = round(Int, 60 * decimal) - end - elseif idx == :minute - res["minute"] = value - if decimal != 0 - res["second"] = round(Int, 60 * decimal) - end - elseif idx == :second - res["second"] = value - if decimal != 0 - res["millisecond"] = round(Int, 1000 * decimal) - end - end - i += 1 - if i > len || idx == :second - break - end - # 12h00 - token = tokens[i] - if !isdigit(token) - break - else - i += 1 - value = parse(Int, token) - decimal = 0.0 - if i+1 <= len tokens[i] == "." && isdigit(tokens[i+1]) - decimal = parse(Float64, string(".", tokens[i+1])) - i += 2 - end - if i <= len && haskey(HMS, lowercase(tokens[i])) - idx = HMS[lowercase(tokens[i])] - elseif idx == :hour - idx = :minute - else - idx = :second - end - end - end - elseif i+1 <= len && tokens[i] == ":" - # HH:MM[:SS[.ss]] - res["hour"] = parse(Int, token) - res["minute"] = parse(Int, tokens[i+1]) - i += 2 - if i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1]) - res["second"] = round(Int, 60 * parse(Float64, string(".", tokens[i+1]))) - i += 2 - elseif i < len && tokens[i] == ":" - res["second"] = parse(Int, tokens[i+1]) - i += 2 - if i+1 <= len && tokens[i] == "." && isdigit(tokens[i+1]) - res["millisecond"] = round(Int, 1000 * parse(Float64, string(".", tokens[i+1]))) - i += 2 - end - end - elseif i <= len && tokens[i] in ("-","/",".") - sep = tokens[i] - push!(ymd, parse(Int, token)) - i += 1 - if i <= len && !(lowercase(tokens[i]) in JUMP) - if isdigit(tokens[i]) - push!(ymd, parse(Int, tokens[i])) - else - if haskey(month, lowercase(tokens[i])) - push!(ymd, month[lowercase(tokens[i])]) - mstridx = length(ymd) - end - end - i += 1 - if i <= len && tokens[i] == sep - # We have three members - i += 1 - if haskey(month, lowercase(tokens[i])) - push!(ymd, month[lowercase(tokens[i])]) - mstridx = length(ymd) - else - push!(ymd, parse(Int, tokens[i])) - end - i += 1 - end - end - elseif i <= len && haskey(AMPM, lowercase(tokens[i])) - # 12am - res["hour"] = parse(Int, token) - res["hour"] = converthour(res["hour"], AMPM[lowercase(tokens[i])]) - i += 1 - else - push!(ymd, parse(Int, token)) - end - else - # Token is not a number - if haskey(weekday, lowercase(token)) - # Weekday - res["weekday"] = weekday[lowercase(token)] - i += 1 - elseif haskey(month, lowercase(token)) - # Month name - push!(ymd, round(Int, month[lowercase(token)])) - mstridx = length(ymd) - i += 1 - if i <= len - if tokens[i] in ("-", "/", ".") - # Jan-01[-99] - sep = tokens[i] - i += 1 - push!(ymd, parse(Int, tokens[i])) - i += 1 - if i <= len && tokens[i] == sep - # Jan-01-99 - i += 1 - push!(ymd, parse(Int, tokens[i])) - i += 1 - end - elseif i+1 <= len && tokens[i] in PERTAIN && isdigit(tokens[i+1]) - # Jan of 01 - # In this case, 01 is clearly year - value = parse(Int, tokens[i+1]) - # Convert it here to become unambiguous - push!(ymd, convertyear(value)) - i += 2 - end - end - elseif haskey(AMPM, lowercase(tokens[i])) - # am/pm - res["hour"] = converthour(res["hour"], AMPM[lowercase(tokens[i])]) - i += 1 - elseif tokens[i] in ("+", "-") && !haskey(res, "tzoffset") && i+1 <= len && isdigit(tokens[i+1]) - # Numbered timzone - signal = tokens[i] == "+" ? 1 : -1 - - i += 1 - tokenlength = length(tokens[i]) - hour = minute = 0 - if tokenlength == 4 - # -0300 - hour, minute = parse(Int, tokens[i][1:2]), parse(Int, tokens[i][3:end]) - elseif i+2 <= len && tokens[i+1] == ":" && isdigit(tokens[i+2]) - # -03:00 - hour, minute = parse(Int, tokens[i]), parse(Int, tokens[i+2]) - i += 2 - elseif tokenlength <= 2 - # -[0]3 - hour = parse(Int, tokens[i]) - else - error("Faild to read timezone offset after +/-") - end - res["tzoffset"] = hour * 3600 + minute * 60 - - i += 1 - res["tzoffset"] *= signal - elseif !haskey(res, "tzname") && i+2 <= len && tokens[i] == "(" && - ismatch(r"^\w+$", tokens[i+1]) - # Look for a timezone name between parenthesis - oldindex = i - res["tzname"] = tokens[i+1] - i += 2 - while tokens[i] != ")" - # -0300 (BRST) - if i+2 <= len && tokens[i] == "/" - res["tzname"] = string(res["tzname"], "/", tokens[i+1]) - i += 2 - elseif fuzzy == true - delete!(res, "tzname") - i = oldindex - break - else - error("Faild to parse date") - end - end - - if haskey(res, "tzname") - value = trytimezone(res["tzname"], timezone_infos) - if !isnull(value) - res["timezone"] = get(value) - end - end - - i += 1 - elseif !haskey(res, "tzname") && - ismatch(r"^\w+$", tokens[i]) && !(lowercase(tokens[i]) in JUMP) - # Timezone name? - oldindex = i - - res["tzname"] = tokens[i] - while i+2 <= len && tokens[i+1] == "/" - res["tzname"] = string(res["tzname"], "/", tokens[i+2]) - i += 2 - end - i += 1 - # Check for something like GMT+3, or BRST+3 - if i+1 <= len && tokens[i] in ("+", "-") && - isdigit(tokens[i+1]) && length(tokens[i+1]) in (1,2) && - (i+2 > len || tokens[i+2] != ":") - res["tzname"] = string(res["tzname"], tokens[i], tokens[i+1]) - i += 2 - end - - value = trytimezone(res["tzname"], timezone_infos) - if !isnull(value) - res["timezone"] = get(value) - elseif fuzzy == true - delete!(res, "tzname") - i = oldindex+1 - else - error("Faild to parse date") - end - elseif !(lowercase(tokens[i]) in JUMP) && !fuzzy - error("Failed to parse date") - else - i += 1 - end - end - end - - # Process year/month/day - len_ymd = length(ymd) - - if len_ymd > 3 - # More than three members!? - error("Failed to parse date") - elseif len_ymd == 1 || (mstridx != -1 && len_ymd == 2) - # One member, or two members with a month string - if mstridx != -1 - res["month"] = ymd[mstridx] - deleteat!(ymd, mstridx) - end - if len_ymd > 1 || mstridx == -1 - if ymd[1] > 31 - res["year"] = ymd[1] - else - res["day"] = ymd[1] - end - end - elseif len_ymd == 2 - # Two members with numbers - if ymd[1] > 31 - # 99-01 - res["year"], res["month"] = ymd - elseif ymd[2] > 31 - # 01-99 - res["month"], res["year"] = ymd - elseif dayfirst && ymd[2] <= 12 - # 13-01 - res["day"], res["month"] = ymd - else - # 01-13 - res["month"], res["day"] = ymd - end - elseif len_ymd == 3 - # Three members - if mstridx == 1 - res["month"], res["day"], res["year"] = ymd - elseif mstridx == 2 - if ymd[1] > 31 || (yearfirst && ymd[3] <= 31) - # 99-Jan-01 - res["year"], res["month"], res["day"] = ymd - else - # 01-Jan-01 - # Give precendence to day-first, since - # two-digit years is usually hand-written. - res["day"], res["month"], res["year"] = ymd - end - elseif mstridx == 3 - # WTF - if ymd[2] > 31 - # 01-99-Jan - res["day"], res["year"], res["month"] = ymd - else - res["year"], res["day"], res["month"] = ymd - end - else - if ymd[1] > 31 || (yearfirst && ymd[2] <= 12 && ymd[3] <= 31) - # 99-01-01 - res["year"], res["month"], res["day"] = ymd - elseif ymd[1] > 12 || (dayfirst && ymd[2] <= 12) - # 13-01-01 - res["day"], res["month"], res["year"] = ymd - else - # 01-13-01 - res["month"], res["day"], res["year"] = ymd - end - end - end - - return res -end - -function monthtovalue(locale::AbstractString="english") - monthtovalue = Dict{UTF8String, Int}() - for (value, name) in VALUETOMONTH[locale] - monthtovalue[lowercase(name)] = value - end - for (value, name) in VALUETOMONTHABBR[locale] - monthtovalue[lowercase(name)] = value - end - return monthtovalue -end - -function weekdaytovalue(locale::AbstractString="english") - weekdaytovalue = Dict{UTF8String, Int}() - for (value, name) in VALUETODAYOFWEEK[locale] - weekdaytovalue[lowercase(name)] = value - end - for (value, name) in VALUETODAYOFWEEKABBR[locale] - weekdaytovalue[lowercase(name)] = value - end - return weekdaytovalue -end - -function trytimezone(tzname::AbstractString, timezone_infos::Dict{AbstractString,TimeZone}) - if haskey(timezone_infos, tzname) - return Nullable{TimeZone}(timezone_infos[tzname]) - elseif tzname in TimeZones.timezone_names() - return Nullable{TimeZone}(TimeZone(tzname)) - elseif lowercase(tzname) in UTCZONE - return Nullable{TimeZone}(FixedTimeZone("UTC", 0)) - else - return Nullable{TimeZone}() - end -end - -"Converts a 2 digit year to a 4 digit one within 50 years of convert_year. At the momment - convert_year defaults to 2000, if people are still using 2 digit years after year 2049 - (hopefully not) then we can change the default to year(today())" -function convertyear(year::Int, convert_year=2000) - if year <= 99 - century = convert_year - convert_year % 100 - year += century - if abs(year - convert_year) >= 50 - if year < convert_year - year += 100 - else - year -= 100 - end - end - end - return year -end - -function converthour(hour::Int, ampm::Symbol) - if hour < 12 && ampm == :pm - hour += 12 - elseif hour == 12 && ampm == :am - hour = 0 - end - return hour -end - -function tokenize{Str<:AbstractString}(input::Str) - tokens = Str[] - token = sizehint!(Char[], 10) - - # Note: A regular expression can handle almost all of this task - # with the exception of identifying Unicode punctuation. - state = last_state = :none - for c in input - if isspace(c) - state = :none - elseif isdigit(c) - state = :number - elseif isalpha(c) - state = :word - else - state = :other - end - - if state != :none - if state != last_state && !isempty(token) - push!(tokens, Str(token)) - empty!(token) - end - - push!(token, c) - end - - last_state = state - end - - # Token will only be empty here if the entire input was whitespace - !isempty(token) && push!(tokens, Str(token)) - - return tokens -end - -end # module diff --git a/test/runtests.jl b/test/runtests.jl index 7d4b82c..363a689 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,12 +1,8 @@ -using DateTimeParser +using DateParser using Base.Test using TimeZones -@test DateTimeParser.tokenize("⁇.éAû2") == ["⁇.", "éAû", "2"] -@test DateTimeParser.tokenize("1999 Feb 3 12:20:30.5") == ["1999", "Feb", "3", "12", ":", "20", ":", "30", ".", "5"] -@test DateTimeParser.tokenize("GMT+3") == ["GMT", "+", "3"] # Note: ispunct('+') is false - timezone = TimeZone("Europe/Warsaw") default_d = Date(1976, 7, 4) default_dt = DateTime(default_d) @@ -18,7 +14,68 @@ timezone_infos = Dict{AbstractString, TimeZone}( "Etc/GMT+3" => FixedTimeZone("GMT+3", -10800), ) -# Test all code paths +# Weird things +@test parse(ZonedDateTime, "1999 2:30 America / Winnipeg", default=default_zdt).timezone.name == Symbol("America/Winnipeg") +@test parse(ZonedDateTime, "1999 2:30 MST 7 MDT", default=default_zdt).timezone.name == Symbol("MST7MDT") + +# Unsupported formats +@test isnull(tryparse(ZonedDateTime, "1999 2:30 (FOO) +1:00", default=default_zdt)) +@test isnull(tryparse(ZonedDateTime, "1999 2:30 +1:00 FOO", default=default_zdt)) +# MMYYYY is not supported because it will parse as 3 date tokens +@test parse(Date, "102015", default=default_d) == Date(2015, 10, 20) + +# tryparse +@test get(tryparse(ZonedDateTime, "Oct 13, 1994 12:10:14 UTC", default=default_zdt, timezone_infos=timezone_infos)) == ZonedDateTime(DateTime(1994, 10, 13, 12, 10, 14), FixedTimeZone("UTC", 0)) +@test isnull(tryparse(ZonedDateTime, "garbage", default=default_zdt)) +@test get(tryparse(DateTime, "Oct 13, 1994 12:10:14 UTC", default=default_dt)) == DateTime(1994, 10, 13, 12, 10, 14) +@test isnull(tryparse(DateTime, "garbage", default=default_dt)) +@test get(tryparse(Date, "Oct 13, 1994 12:10:14 UTC", default=default_d)) == Date(1994, 10, 13) +@test isnull(tryparse(Date, "garbage", default=default_d)) + +# tokenize +@test DateParser.tokenize("⁇.éAû2") == ["⁇.", "éAû", "2"] +@test DateParser.tokenize("1999 Feb 3 12:20:30.5") == ["1999", "Feb", "3", "12", ":", "20", ":", "30", ".", "5"] +@test DateParser.tokenize("GMT+3") == ["GMT", "+", "3"] # Note: ispunct('+') is false + +# convertyear +@test DateParser.convertyear(10) == 2010 +@test DateParser.convertyear(95) == 1995 +@test DateParser.convertyear(49) == 2049 +@test DateParser.convertyear(50) == 1950 +@test DateParser.convertyear(10, 2075) == 2110 + +# converthour +@test DateParser.converthour(1, :am) == 1 +@test DateParser.converthour(1, :pm) == 13 +@test DateParser.converthour(12, :am) == 0 +@test DateParser.converthour(12, :pm) == 12 + +# _tryparse {TimeZone} +@test get(DateParser._tryparse(TimeZone, "Etc/GMT+3", translation=timezone_infos)).name == Symbol("GMT+3") +@test get(DateParser._tryparse(TimeZone, "America/Winnipeg")).name == Symbol("America/Winnipeg") +@test get(DateParser._tryparse(TimeZone, "MST7MDT")).name == Symbol("MST7MDT") +@test get(DateParser._tryparse(TimeZone, "Asia/Ho_Chi_Minh")).name == Symbol("Asia/Ho_Chi_Minh") +@test get(DateParser._tryparse(TimeZone, "America/North_Dakota/New_Salem")).name == Symbol("America/North_Dakota/New_Salem") +@test get(DateParser._tryparse(TimeZone, "America/Port-au-Prince")).name == Symbol("America/Port-au-Prince") +@test get(DateParser._tryparse(TimeZone, "z")).name == Symbol("UTC") +@test isnull(DateParser._tryparse(TimeZone, "badzone")) + +# _tryparse {Month} +@test get(DateParser._tryparse(Dates.Month, "january")).value == 1 +@test get(DateParser._tryparse(Dates.Month, "oct")).value == 10 +@test isnull(DateParser._tryparse(Dates.Month, "garbage")) + +# _tryparse {DayOfWeek} +@test get(DateParser._tryparse(DateParser.DayOfWeek, "monday")).value == 1 +@test get(DateParser._tryparse(DateParser.DayOfWeek, "wed")).value == 3 +@test isnull(DateParser._tryparse(DateParser.DayOfWeek, "garbage")) + +# parsefractional +@test DateParser.parsefractional("5") == 0.5 +@test DateParser.parsefractional("50") == 0.5 +@test DateParser.parsefractional("999") == 0.999 + +# All code paths @test parse(ZonedDateTime, "", default=default_zdt) == default_zdt @test parse(DateTime, "", default=default_dt) == default_dt @test parse(Date, "", default=default_d) == default_d @@ -66,7 +123,16 @@ timezone_infos = Dict{AbstractString, TimeZone}( @test parse(DateTime, "february the 3rd 1999", default=default_dt) == DateTime(1999, 2, 3) @test isnull(tryparse(DateTime, "hi it's 99 february the 3rd", default=default_dt)) @test parse(DateTime, "hi it's 99 february the 3rd", fuzzy=true, default=default_dt) == DateTime(1999, 2, 3) -@test isnull(tryparse(DateTime, "1, 2, 3, 4", default=default_dt)) +@test parse(DateTime, "1, 2, 3, 4", default=default_dt) == DateTime(2003, 1, 2, 4) +@test parse(DateTime, "1999 04 05 13 59 59 99") == DateTime(1999, 04, 05, 13, 59, 59, 99) +@test isnull(tryparse(DateTime, "1999 04 05 13 59 59 99 92")) +@test parse(DateTime, "1999 04 05 13 59 59 999") == DateTime(1999, 04, 05, 13, 59, 59, 999) +@test parse(DateTime, "1999 04 05 1359") == DateTime(1999, 04, 05, 13, 59) +@test parse(DateTime, "19990405 135959") == DateTime(1999, 04, 05, 13, 59, 59) +@test isnull(tryparse(DateTime, "19990405 1359599")) +@test isnull(tryparse(DateTime, "1999 04 05 13595999")) +@test parse(DateTime, "1999 04 05 135959999") == DateTime(1999, 04, 05, 13, 59, 59, 999) +@test parse(DateTime, "19990405 135959999") == DateTime(1999, 04, 05, 13, 59, 59, 999) @test parse(DateTime, "feb 3", default=default_dt) == DateTime(1976, 2, 3) @test parse(DateTime, "feb 1999", default=default_dt) == DateTime(1999, 2, 4) @test parse(DateTime, "1999", default=default_dt) == DateTime(1999, 7, 4) @@ -101,28 +167,64 @@ timezone_infos = Dict{AbstractString, TimeZone}( @test isnull(tryparse(ZonedDateTime, "1999 2:30 FAIL", default=default_zdt)) @test parse(ZonedDateTime, "1999 2:30 +01:00", default=default_zdt).timezone.name == :local @test parse(ZonedDateTime, "1999 2:30 +01:00", default=default_zdt).timezone.offset.utc == Dates.Second(3600) -@test parse(ZonedDateTime, "1999 2:30 -01:00 (TEST)", default=default_zdt).timezone.name == :TEST -@test parse(ZonedDateTime, "1999 2:30 -01:00 (TEST)", default=default_zdt).timezone.offset.utc == Dates.Second(-3600) -@test parse(ZonedDateTime, "1999 2:30 America/Winnipeg", default=default_zdt).timezone.name == symbol("America/Winnipeg") +@test parse(ZonedDateTime, "1999 2:30 -01:00 (TEST)", timezone_infos=timezone_infos, default=default_zdt).timezone.name == :TEST +# If both a timezone in timezone_infos and a timezone offset exist use the timezone in timezone_infos +@test parse(ZonedDateTime, "1999 2:30 -01:00 (TEST)", timezone_infos=timezone_infos, default=default_zdt).timezone.offset.utc == Dates.Second(3600) + +@test parse(ZonedDateTime, "1999 2:30 America/Winnipeg", default=default_zdt).timezone.name == Symbol("America/Winnipeg") +@test parse(ZonedDateTime, "1999 2:30 MST7MDT", default=default_zdt).timezone.name == Symbol("MST7MDT") +@test parse(ZonedDateTime, "1999 2:30 Asia/Ho_Chi_Minh", default=default_zdt).timezone.name == Symbol("Asia/Ho_Chi_Minh") +@test parse(ZonedDateTime, "1999 2:30 America/North_Dakota/New_Salem", default=default_zdt).timezone.name == Symbol("America/North_Dakota/New_Salem") +@test parse(ZonedDateTime, "1999 2:30 America/Port-au-Prince", default=default_zdt).timezone.name == Symbol("America/Port-au-Prince") -@test parse(ZonedDateTime, "1999 2:30 (America/Winnipeg)", default=default_zdt).timezone.name == symbol("America/Winnipeg") +@test parse(ZonedDateTime, "1999 2:30 (America/Winnipeg)", default=default_zdt).timezone.name == Symbol("America/Winnipeg") @test isnull(tryparse(ZonedDateTime, "1999 2:30 (BAD-)", default=default_zdt)) -@test parse(ZonedDateTime, "1999 2:30 (BAD-)", fuzzy=true, default=default_zdt).timezone.name == symbol("Europe/Warsaw") +@test parse(ZonedDateTime, "1999 2:30 (BAD-)", fuzzy=true, default=default_zdt).timezone.name == Symbol("Europe/Warsaw") @test parse(DateTime, "21:38, 30 May 2006 (UTC)", default=default_dt) == DateTime(2006, 5, 30, 21, 38) @test parse(DateTime, "2015.10.02 10:21:59.45", default=default_dt) == DateTime(2015, 10, 2, 10, 21, 59, 450) -# Test tryparse -@test get(tryparse(ZonedDateTime, "Oct 13, 1994 12:10:14 UTC", default=default_zdt, timezone_infos=timezone_infos)) == ZonedDateTime(DateTime(1994, 10, 13, 12, 10, 14), FixedTimeZone("UTC", 0)) -@test isnull(tryparse(ZonedDateTime, "garbage", default=default_zdt)) -@test get(tryparse(DateTime, "Oct 13, 1994 12:10:14 UTC", default=default_dt)) == DateTime(1994, 10, 13, 12, 10, 14) -@test isnull(tryparse(DateTime, "garbage", default=default_dt)) -@test get(tryparse(Date, "Oct 13, 1994 12:10:14 UTC", default=default_d)) == Date(1994, 10, 13) -@test isnull(tryparse(Date, "garbage", default=default_d)) +@test parse(Date, "301213", yearfirst=true, default=default_d) == Date(2030, 12, 13) +@test parse(Date, "301213", dayfirst=true, default=default_d) == Date(2013, 12, 30) + +@test isnull(tryparse(DateTime, "1999-10-13 pm", default=default_dt)) + +temp = parse(ZonedDateTime, "1999 2:30 (UTC+1:00)", default=default_zdt) +@test temp.timezone.name == Symbol("UTC+1:00") +@test temp.timezone.offset.utc == Dates.Second(3600) +temp = parse(ZonedDateTime, "1999 2:30 +1:00 (FOO)", default=default_zdt) +@test temp.timezone.name == Symbol("FOO") +@test temp.timezone.offset.utc == Dates.Second(3600) + +temp = parse(ZonedDateTime, "19991212 0259+1:00") +@test temp.timezone.offset.utc == Dates.Second(3600) +@test TimeZones.localtime(temp) == DateTime(1999, 12, 12, 2, 59) +# Out of range +@test isnull(tryparse(ZonedDateTime, "1999 2:30 +25:00", default=default_zdt)) +@test isnull(tryparse(ZonedDateTime, "1999 2:30 +00:62", default=default_zdt)) + +# locale +DateParser.DAYOFWEEKTOVALUE["french"] = Dict("lundi" => 1, "mardi" => 2, + "mercredi" => 3, "jeudi" => 4, "vendredi" => 5, "samedi" => 6, "dimanche" => 7) +DateParser.DAYOFWEEKABBRTOVALUE["french"] = Dict("lun" => 1, "mar" => 2, + "mer" => 3, "jeu" => 4, "ven" => 5, "sam" => 6, "dim" => 7) +DateParser.MONTHTOVALUE["french"] = Dict("janvier" => 1, "février" => 2, + "mars" => 3, "avril" => 4, "mai" => 5, "juin" => 6, "juillet" => 7, "août" => 8, + "septembre" => 9, "octobre" => 10, "novembre" => 11, "décembre" => 12) +DateParser.MONTHABBRTOVALUE["french"] = Dict("janv" => 1, "févr" => 2, + "mars" => 3, "avril" => 4, "mai" => 5, "juin" => 6, "juil" => 7, "août" => 8, + "sept" => 9, "oct" => 10, "nov" => 11, "déc" => 12) +DateParser.HMS["french"] = DateParser.HMS["english"] +DateParser.AMPM["french"] = DateParser.AMPM["english"] -# Test convertyear -@test DateTimeParser.convertyear(10, 2075) == 2110 +@test parse(DateTime, "28 mai 2014", locale="french", default=default_dt) == DateTime(2014, 5, 28) +@test parse(DateTime, "28 févr 2014", locale="french", default=default_dt) == DateTime(2014, 2, 28) +@test parse(DateTime, "jeu 28 août 2014", locale="french", default=default_dt) == DateTime(2014, 8, 28) +@test parse(DateTime, "lundi 28 avril 2014", locale="french", default=default_dt) == DateTime(2014, 4, 28) +@test parse(DateTime, "28 févr 2014", locale="french", default=default_dt) == DateTime(2014, 2, 28) +@test parse(DateTime, "12 am", locale="french", default=default_dt) == DateTime(1976, 7, 4, 0) +@test parse(DateTime, "1 pm", locale="french", default=default_dt) == DateTime(1976, 7, 4, 13) # Examples I found in Python's dateutil's pointers links date = ZonedDateTime(DateTime(1995, 2, 4), timezone)