From 24d3f199768be970149412c95b1c1bf5d76f8240 Mon Sep 17 00:00:00 2001 From: dubovinszky Date: Wed, 9 Aug 2017 17:37:55 -0700 Subject: [PATCH] HU Setup + Numeral Summary: - Setup Hungarian (HU) language - Added Numeral Dimension Closes https://github.com/facebookincubator/duckling/pull/79 Reviewed By: blandinw Differential Revision: D5595812 Pulled By: patapizza fbshipit-source-id: 5959938 --- Duckling/Dimensions.hs | 2 + Duckling/Dimensions/HU.hs | 18 +++ Duckling/Lang.hs | 1 + Duckling/Numeral/HU/Corpus.hs | 98 +++++++++++++++++ Duckling/Numeral/HU/Rules.hs | 169 +++++++++++++++++++++++++++++ Duckling/Ranking/Classifiers.hs | 2 + Duckling/Ranking/Classifiers/HU.hs | 22 ++++ Duckling/Rules.hs | 2 + Duckling/Rules/HU.hs | 34 ++++++ duckling.cabal | 6 + exe/Duckling/Ranking/Generate.hs | 1 + tests/Duckling/Numeral/HU/Tests.hs | 22 ++++ tests/Duckling/Numeral/Tests.hs | 2 + 13 files changed, 379 insertions(+) create mode 100644 Duckling/Dimensions/HU.hs create mode 100644 Duckling/Numeral/HU/Corpus.hs create mode 100644 Duckling/Numeral/HU/Rules.hs create mode 100644 Duckling/Ranking/Classifiers/HU.hs create mode 100644 Duckling/Rules/HU.hs create mode 100644 tests/Duckling/Numeral/HU/Tests.hs diff --git a/Duckling/Dimensions.hs b/Duckling/Dimensions.hs index 289510817..cef8d7da8 100644 --- a/Duckling/Dimensions.hs +++ b/Duckling/Dimensions.hs @@ -32,6 +32,7 @@ import qualified Duckling.Dimensions.FR as FRDimensions import qualified Duckling.Dimensions.GA as GADimensions import qualified Duckling.Dimensions.HE as HEDimensions import qualified Duckling.Dimensions.HR as HRDimensions +import qualified Duckling.Dimensions.HU as HUDimensions import qualified Duckling.Dimensions.ID as IDDimensions import qualified Duckling.Dimensions.IT as ITDimensions import qualified Duckling.Dimensions.JA as JADimensions @@ -90,6 +91,7 @@ langDimensions FR = FRDimensions.allDimensions langDimensions GA = GADimensions.allDimensions langDimensions HE = HEDimensions.allDimensions langDimensions HR = HRDimensions.allDimensions +langDimensions HU = HUDimensions.allDimensions langDimensions ID = IDDimensions.allDimensions langDimensions IT = ITDimensions.allDimensions langDimensions JA = JADimensions.allDimensions diff --git a/Duckling/Dimensions/HU.hs b/Duckling/Dimensions/HU.hs new file mode 100644 index 000000000..784706ade --- /dev/null +++ b/Duckling/Dimensions/HU.hs @@ -0,0 +1,18 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +module Duckling.Dimensions.HU + ( allDimensions + ) where + +import Duckling.Dimensions.Types + +allDimensions :: [Some Dimension] +allDimensions = + [ This Numeral + ] diff --git a/Duckling/Lang.hs b/Duckling/Lang.hs index a7e08c9c4..4d1e8955c 100644 --- a/Duckling/Lang.hs +++ b/Duckling/Lang.hs @@ -34,6 +34,7 @@ data Lang | GA | HE | HR + | HU | ID | IT | JA diff --git a/Duckling/Numeral/HU/Corpus.hs b/Duckling/Numeral/HU/Corpus.hs new file mode 100644 index 000000000..c6fd407e5 --- /dev/null +++ b/Duckling/Numeral/HU/Corpus.hs @@ -0,0 +1,98 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Numeral.HU.Corpus + ( corpus ) where + +import Data.String +import Prelude + +import Duckling.Lang +import Duckling.Numeral.Types +import Duckling.Resolve +import Duckling.Testing.Types + +corpus :: Corpus +corpus = (testContext {lang = HU}, allExamples) + +allExamples :: [Example] +allExamples = concat + [ examples (NumeralValue 0) + [ "0" + , "nulla" + , "zéró" + ] + , examples (NumeralValue 1) + [ "1" + , "egy" + ] + , examples (NumeralValue 2) + [ "kettő" + ] + , examples (NumeralValue 3) + [ "három" + ] + , examples (NumeralValue 4) + [ "négy" + ] + , examples (NumeralValue 5) + [ "öt" + ] + , examples (NumeralValue 6) + [ "hat" + ] + , examples (NumeralValue 7) + [ "hét" + ] + , examples (NumeralValue 8) + [ "nyolc" + ] + , examples (NumeralValue 9) + [ "kilenc" + ] + , examples (NumeralValue 11) + [ "tizenegy" + ] + , examples (NumeralValue 15) + [ "tizenöt" + ] + , examples (NumeralValue 17) + [ "tizenhét" + ] + , examples (NumeralValue 20) + [ "20" + , "húsz" + ] + , examples (NumeralValue 22) + [ "huszonkettő" + ] + , examples (NumeralValue 24) + [ "24" + , "huszonnégy" + ] + , examples (NumeralValue 26) + [ "huszonhat" + ] + , examples (NumeralValue 28) + [ "huszonnyolc" + ] + , examples (NumeralValue 10) + [ "tíz" + ] + , examples (NumeralValue 20) + [ "húsz" + ] + , examples (NumeralValue 50) + [ "ötven" + ] + , examples (NumeralValue 34) + [ "harmincnégy" + ] + ] diff --git a/Duckling/Numeral/HU/Rules.hs b/Duckling/Numeral/HU/Rules.hs new file mode 100644 index 000000000..fb13efc83 --- /dev/null +++ b/Duckling/Numeral/HU/Rules.hs @@ -0,0 +1,169 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoRebindableSyntax #-} + +module Duckling.Numeral.HU.Rules + ( rules ) where + +import Data.HashMap.Strict (HashMap) +import Data.Maybe +import Data.String +import Data.Text (Text) +import Prelude +import qualified Data.HashMap.Strict as HashMap +import qualified Data.Text as Text + +import Duckling.Dimensions.Types +import Duckling.Numeral.Helpers +import Duckling.Numeral.Types (NumeralData (..)) +import Duckling.Regex.Types +import Duckling.Types +import qualified Duckling.Numeral.Types as TNumeral + +ruleIntegerNumeric :: Rule +ruleIntegerNumeric = Rule + { name = "integer (numeric)" + , pattern = + [ regex "(\\d{1,18})" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)): + _) -> do + v <- parseInt match + integer $ toInteger v + _ -> Nothing + } + +ruleNumeralMap :: HashMap Text Integer +ruleNumeralMap = HashMap.fromList + [ ( "nulla", 0 ) + , ( "z\x00E9r\x00F3", 0 ) + , ( "egy", 1 ) + , ( "kett\x0151", 2 ) + , ( "h\x00E1rom", 3 ) + , ( "n\x00E9gy", 4 ) + , ( "\x00F6t", 5) + , ( "hat", 6) + , ( "h\x00E9t", 7) + , ( "nyolc", 8) + , ( "kilenc", 9) + , ( "t\x00EDz", 10) + ] + +ruleNumeral :: Rule +ruleNumeral = Rule + { name = "number (0..10)" + , pattern = + [ regex "(nulla|z\x00E9r\x00F3|egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc|t\x00EDz)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) ruleNumeralMap >>= integer + _ -> Nothing + } + +elevenToNineteenMap :: HashMap Text Integer +elevenToNineteenMap = HashMap.fromList + [ ( "tizenegy", 11 ) + , ( "tizenkett\x0151", 12 ) + , ( "tizenh\x00E1rom", 13 ) + , ( "tizenn\x00E9gy", 14 ) + , ( "tizen\x00F6t", 15 ) + , ( "tizenhat", 16 ) + , ( "tizenh\x00E9t", 17 ) + , ( "tizennyolc", 18 ) + , ( "tizenkilenc", 19 ) + ] + +ruleElevenToNineteen :: Rule +ruleElevenToNineteen = Rule + { name = "number (11..19)" + , pattern = + [ regex "(tizenegy|tizenkett\x0151|tizenh\x00E1rom|tizenn\x00E9gy|tizen\x00F6t|tizenhat|tizenh\x00E9t|tizennyolc|tizenkilenc)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) elevenToNineteenMap >>= integer + _ -> Nothing + } + +twentyoneToTwentynineMap :: HashMap Text Integer +twentyoneToTwentynineMap = HashMap.fromList + [ ( "huszonegy", 21 ) + , ( "huszonkett\x0151", 22 ) + , ( "huszonh\x00E1rom", 23 ) + , ( "huszonn\x00E9gy", 24 ) + , ( "huszon\x00F6t", 25 ) + , ( "huszonhat", 26 ) + , ( "huszonh\x00E9t", 27 ) + , ( "huszonnyolc", 28 ) + , ( "huszonkilenc", 29 ) + ] + +ruleTwentyoneToTwentynine :: Rule +ruleTwentyoneToTwentynine = Rule + { name = "number (21..29)" + , pattern = + [ regex "(huszonegy|huszonkett\x0151|huszonh\x00E1rom|huszonn\x00E9gy|huszon\x00F6t|huszonhat|huszonh\x00E9t|huszonnyolc|huszonkilenc)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) twentyoneToTwentynineMap >>= integer + _ -> Nothing + } + +dozensMap :: HashMap Text Integer +dozensMap = HashMap.fromList + [ ( "h\x00FAsz", 20 ) + , ( "harminc", 30 ) + , ( "negyven", 40 ) + , ( "\x00F6tven", 50 ) + , ( "hatvan", 60 ) + , ( "hetven", 70 ) + , ( "nyolcvan", 80 ) + , ( "kilencven", 90 ) + ] + +ruleTens :: Rule +ruleTens = Rule + { name = "integer (20,30..90)" + , pattern = + [ regex "(h\x00FAsz|harminc|negyven|\x00f6tven|hatvan|hetven|nyolcvan|kilencven)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (match:_)):_) -> + HashMap.lookup (Text.toLower match) dozensMap >>= integer + _ -> Nothing + } + +ruleCompositeTens :: Rule +ruleCompositeTens = Rule + { name = "integer ([3-9][1-9])" + , pattern = + [ regex "(harminc|negyven|\x00F6tven|hatvan|hetven|nyolcvan|kilencven)(egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc)" + ] + , prod = \tokens -> case tokens of + (Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do + v1 <- HashMap.lookup (Text.toLower m1) dozensMap + v2 <- HashMap.lookup (Text.toLower m2) ruleNumeralMap + integer $ v1 + v2 + _ -> Nothing + } + +rules :: [Rule] +rules = + [ ruleIntegerNumeric + , ruleNumeral + , ruleElevenToNineteen + , ruleTwentyoneToTwentynine + , ruleTens + , ruleCompositeTens + ] diff --git a/Duckling/Ranking/Classifiers.hs b/Duckling/Ranking/Classifiers.hs index 213aa04bd..9b648e7bb 100644 --- a/Duckling/Ranking/Classifiers.hs +++ b/Duckling/Ranking/Classifiers.hs @@ -23,6 +23,7 @@ import qualified Duckling.Ranking.Classifiers.FR as FRClassifiers import qualified Duckling.Ranking.Classifiers.GA as GAClassifiers import qualified Duckling.Ranking.Classifiers.HE as HEClassifiers import qualified Duckling.Ranking.Classifiers.HR as HRClassifiers +import qualified Duckling.Ranking.Classifiers.HU as HUClassifiers import qualified Duckling.Ranking.Classifiers.ID as IDClassifiers import qualified Duckling.Ranking.Classifiers.IT as ITClassifiers import qualified Duckling.Ranking.Classifiers.JA as JAClassifiers @@ -54,6 +55,7 @@ classifiers FR = FRClassifiers.classifiers classifiers GA = GAClassifiers.classifiers classifiers HE = HEClassifiers.classifiers classifiers HR = HRClassifiers.classifiers +classifiers HU = HUClassifiers.classifiers classifiers ID = IDClassifiers.classifiers classifiers IT = ITClassifiers.classifiers classifiers JA = JAClassifiers.classifiers diff --git a/Duckling/Ranking/Classifiers/HU.hs b/Duckling/Ranking/Classifiers/HU.hs new file mode 100644 index 000000000..86d4832a5 --- /dev/null +++ b/Duckling/Ranking/Classifiers/HU.hs @@ -0,0 +1,22 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + +----------------------------------------------------------------- +-- Auto-generated by regenClassifiers +-- +-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING +-- @generated +----------------------------------------------------------------- +{-# LANGUAGE OverloadedStrings #-} +module Duckling.Ranking.Classifiers.HU (classifiers) where +import Prelude +import Duckling.Ranking.Types +import qualified Data.HashMap.Strict as HashMap +import Data.String + +classifiers :: Classifiers +classifiers = HashMap.fromList [] \ No newline at end of file diff --git a/Duckling/Rules.hs b/Duckling/Rules.hs index 3c6a02968..d9239b408 100644 --- a/Duckling/Rules.hs +++ b/Duckling/Rules.hs @@ -34,6 +34,7 @@ import qualified Duckling.Rules.FR as FRRules import qualified Duckling.Rules.GA as GARules import qualified Duckling.Rules.HE as HERules import qualified Duckling.Rules.HR as HRRules +import qualified Duckling.Rules.HU as HURules import qualified Duckling.Rules.ID as IDRules import qualified Duckling.Rules.IT as ITRules import qualified Duckling.Rules.JA as JARules @@ -81,6 +82,7 @@ langRules FR = FRRules.rules langRules GA = GARules.rules langRules HE = HERules.rules langRules HR = HRRules.rules +langRules HU = HURules.rules langRules ID = IDRules.rules langRules IT = ITRules.rules langRules JA = JARules.rules diff --git a/Duckling/Rules/HU.hs b/Duckling/Rules/HU.hs new file mode 100644 index 000000000..6209e48d5 --- /dev/null +++ b/Duckling/Rules/HU.hs @@ -0,0 +1,34 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + + +{-# LANGUAGE GADTs #-} +{-# LANGUAGE OverloadedStrings #-} + +module Duckling.Rules.HU + ( rules + ) where + +import Duckling.Dimensions.Types +import Duckling.Types +import qualified Duckling.Numeral.HU.Rules as Numeral + +rules :: Some Dimension -> [Rule] +rules (This Distance) = [] +rules (This Duration) = [] +rules (This Numeral) = Numeral.rules +rules (This Email) = [] +rules (This AmountOfMoney) = [] +rules (This Ordinal) = [] +rules (This PhoneNumber) = [] +rules (This Quantity) = [] +rules (This RegexMatch) = [] +rules (This Temperature) = [] +rules (This Time) = [] +rules (This TimeGrain) = [] +rules (This Url) = [] +rules (This Volume) = [] diff --git a/duckling.cabal b/duckling.cabal index d614716a9..dd3dd6988 100644 --- a/duckling.cabal +++ b/duckling.cabal @@ -52,6 +52,7 @@ library , Duckling.Rules.GA , Duckling.Rules.HE , Duckling.Rules.HR + , Duckling.Rules.HU , Duckling.Rules.ID , Duckling.Rules.IT , Duckling.Rules.JA @@ -87,6 +88,7 @@ library , Duckling.Ranking.Classifiers.GA , Duckling.Ranking.Classifiers.HE , Duckling.Ranking.Classifiers.HR + , Duckling.Ranking.Classifiers.HU , Duckling.Ranking.Classifiers.ID , Duckling.Ranking.Classifiers.IT , Duckling.Ranking.Classifiers.JA @@ -121,6 +123,7 @@ library , Duckling.Dimensions.GA , Duckling.Dimensions.HE , Duckling.Dimensions.HR + , Duckling.Dimensions.HU , Duckling.Dimensions.ID , Duckling.Dimensions.IT , Duckling.Dimensions.JA @@ -261,6 +264,8 @@ library , Duckling.Numeral.HE.Rules , Duckling.Numeral.HR.Corpus , Duckling.Numeral.HR.Rules + , Duckling.Numeral.HU.Corpus + , Duckling.Numeral.HU.Rules , Duckling.Numeral.ID.Corpus , Duckling.Numeral.ID.Rules , Duckling.Numeral.IT.Corpus @@ -604,6 +609,7 @@ test-suite duckling-test , Duckling.Numeral.GA.Tests , Duckling.Numeral.HE.Tests , Duckling.Numeral.HR.Tests + , Duckling.Numeral.HU.Tests , Duckling.Numeral.ID.Tests , Duckling.Numeral.IT.Tests , Duckling.Numeral.JA.Tests diff --git a/exe/Duckling/Ranking/Generate.hs b/exe/Duckling/Ranking/Generate.hs index 02d7cc6e7..ac6027d80 100644 --- a/exe/Duckling/Ranking/Generate.hs +++ b/exe/Duckling/Ranking/Generate.hs @@ -80,6 +80,7 @@ regenClassifiers lang = do GA -> GATime.corpus HR -> HRTime.corpus HE -> HETime.corpus + HU -> (testContext, []) ID -> (testContext, []) IT -> ITTime.corpus JA -> (testContext, []) diff --git a/tests/Duckling/Numeral/HU/Tests.hs b/tests/Duckling/Numeral/HU/Tests.hs new file mode 100644 index 000000000..0422033a6 --- /dev/null +++ b/tests/Duckling/Numeral/HU/Tests.hs @@ -0,0 +1,22 @@ +-- Copyright (c) 2016-present, Facebook, Inc. +-- All rights reserved. +-- +-- This source code is licensed under the BSD-style license found in the +-- LICENSE file in the root directory of this source tree. An additional grant +-- of patent rights can be found in the PATENTS file in the same directory. + +module Duckling.Numeral.HU.Tests + ( tests ) where + +import Data.String +import Prelude +import Test.Tasty + +import Duckling.Dimensions.Types +import Duckling.Numeral.HU.Corpus +import Duckling.Testing.Asserts + +tests :: TestTree +tests = testGroup "HU Tests" + [ makeCorpusTest [This Numeral] corpus + ] diff --git a/tests/Duckling/Numeral/Tests.hs b/tests/Duckling/Numeral/Tests.hs index f5db59cd8..192b804b7 100644 --- a/tests/Duckling/Numeral/Tests.hs +++ b/tests/Duckling/Numeral/Tests.hs @@ -24,6 +24,7 @@ import qualified Duckling.Numeral.FR.Tests as FR import qualified Duckling.Numeral.GA.Tests as GA import qualified Duckling.Numeral.HE.Tests as HE import qualified Duckling.Numeral.HR.Tests as HR +import qualified Duckling.Numeral.HU.Tests as HU import qualified Duckling.Numeral.ID.Tests as ID import qualified Duckling.Numeral.IT.Tests as IT import qualified Duckling.Numeral.JA.Tests as JA @@ -55,6 +56,7 @@ tests = testGroup "Numeral Tests" , GA.tests , HE.tests , HR.tests + , HU.tests , ID.tests , IT.tests , JA.tests