Skip to content

Commit

Permalink
HU Setup + Numeral
Browse files Browse the repository at this point in the history
Summary:
- Setup Hungarian (HU) language
- Added Numeral Dimension
Closes #79

Reviewed By: blandinw

Differential Revision: D5595812

Pulled By: patapizza

fbshipit-source-id: 5959938
  • Loading branch information
dubovinszky authored and facebook-github-bot committed Aug 10, 2017
1 parent 5d03b45 commit 24d3f19
Show file tree
Hide file tree
Showing 13 changed files with 379 additions and 0 deletions.
2 changes: 2 additions & 0 deletions Duckling/Dimensions.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import qualified Duckling.Dimensions.FR as FRDimensions
import qualified Duckling.Dimensions.GA as GADimensions
import qualified Duckling.Dimensions.HE as HEDimensions
import qualified Duckling.Dimensions.HR as HRDimensions
import qualified Duckling.Dimensions.HU as HUDimensions
import qualified Duckling.Dimensions.ID as IDDimensions
import qualified Duckling.Dimensions.IT as ITDimensions
import qualified Duckling.Dimensions.JA as JADimensions
Expand Down Expand Up @@ -90,6 +91,7 @@ langDimensions FR = FRDimensions.allDimensions
langDimensions GA = GADimensions.allDimensions
langDimensions HE = HEDimensions.allDimensions
langDimensions HR = HRDimensions.allDimensions
langDimensions HU = HUDimensions.allDimensions
langDimensions ID = IDDimensions.allDimensions
langDimensions IT = ITDimensions.allDimensions
langDimensions JA = JADimensions.allDimensions
Expand Down
18 changes: 18 additions & 0 deletions Duckling/Dimensions/HU.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


module Duckling.Dimensions.HU
( allDimensions
) where

import Duckling.Dimensions.Types

allDimensions :: [Some Dimension]
allDimensions =
[ This Numeral
]
1 change: 1 addition & 0 deletions Duckling/Lang.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ data Lang
| GA
| HE
| HR
| HU
| ID
| IT
| JA
Expand Down
98 changes: 98 additions & 0 deletions Duckling/Numeral/HU/Corpus.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE OverloadedStrings #-}

module Duckling.Numeral.HU.Corpus
( corpus ) where

import Data.String
import Prelude

import Duckling.Lang
import Duckling.Numeral.Types
import Duckling.Resolve
import Duckling.Testing.Types

corpus :: Corpus
corpus = (testContext {lang = HU}, allExamples)

allExamples :: [Example]
allExamples = concat
[ examples (NumeralValue 0)
[ "0"
, "nulla"
, "zéró"
]
, examples (NumeralValue 1)
[ "1"
, "egy"
]
, examples (NumeralValue 2)
[ "kettő"
]
, examples (NumeralValue 3)
[ "három"
]
, examples (NumeralValue 4)
[ "négy"
]
, examples (NumeralValue 5)
[ "öt"
]
, examples (NumeralValue 6)
[ "hat"
]
, examples (NumeralValue 7)
[ "hét"
]
, examples (NumeralValue 8)
[ "nyolc"
]
, examples (NumeralValue 9)
[ "kilenc"
]
, examples (NumeralValue 11)
[ "tizenegy"
]
, examples (NumeralValue 15)
[ "tizenöt"
]
, examples (NumeralValue 17)
[ "tizenhét"
]
, examples (NumeralValue 20)
[ "20"
, "húsz"
]
, examples (NumeralValue 22)
[ "huszonkettő"
]
, examples (NumeralValue 24)
[ "24"
, "huszonnégy"
]
, examples (NumeralValue 26)
[ "huszonhat"
]
, examples (NumeralValue 28)
[ "huszonnyolc"
]
, examples (NumeralValue 10)
[ "tíz"
]
, examples (NumeralValue 20)
[ "húsz"
]
, examples (NumeralValue 50)
[ "ötven"
]
, examples (NumeralValue 34)
[ "harmincnégy"
]
]
169 changes: 169 additions & 0 deletions Duckling/Numeral/HU/Rules.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.


{-# LANGUAGE GADTs #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE NoRebindableSyntax #-}

module Duckling.Numeral.HU.Rules
( rules ) where

import Data.HashMap.Strict (HashMap)
import Data.Maybe
import Data.String
import Data.Text (Text)
import Prelude
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text

import Duckling.Dimensions.Types
import Duckling.Numeral.Helpers
import Duckling.Numeral.Types (NumeralData (..))
import Duckling.Regex.Types
import Duckling.Types
import qualified Duckling.Numeral.Types as TNumeral

ruleIntegerNumeric :: Rule
ruleIntegerNumeric = Rule
{ name = "integer (numeric)"
, pattern =
[ regex "(\\d{1,18})"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):
_) -> do
v <- parseInt match
integer $ toInteger v
_ -> Nothing
}

ruleNumeralMap :: HashMap Text Integer
ruleNumeralMap = HashMap.fromList
[ ( "nulla", 0 )
, ( "z\x00E9r\x00F3", 0 )
, ( "egy", 1 )
, ( "kett\x0151", 2 )
, ( "h\x00E1rom", 3 )
, ( "n\x00E9gy", 4 )
, ( "\x00F6t", 5)
, ( "hat", 6)
, ( "h\x00E9t", 7)
, ( "nyolc", 8)
, ( "kilenc", 9)
, ( "t\x00EDz", 10)
]

ruleNumeral :: Rule
ruleNumeral = Rule
{ name = "number (0..10)"
, pattern =
[ regex "(nulla|z\x00E9r\x00F3|egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc|t\x00EDz)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) ruleNumeralMap >>= integer
_ -> Nothing
}

elevenToNineteenMap :: HashMap Text Integer
elevenToNineteenMap = HashMap.fromList
[ ( "tizenegy", 11 )
, ( "tizenkett\x0151", 12 )
, ( "tizenh\x00E1rom", 13 )
, ( "tizenn\x00E9gy", 14 )
, ( "tizen\x00F6t", 15 )
, ( "tizenhat", 16 )
, ( "tizenh\x00E9t", 17 )
, ( "tizennyolc", 18 )
, ( "tizenkilenc", 19 )
]

ruleElevenToNineteen :: Rule
ruleElevenToNineteen = Rule
{ name = "number (11..19)"
, pattern =
[ regex "(tizenegy|tizenkett\x0151|tizenh\x00E1rom|tizenn\x00E9gy|tizen\x00F6t|tizenhat|tizenh\x00E9t|tizennyolc|tizenkilenc)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) elevenToNineteenMap >>= integer
_ -> Nothing
}

twentyoneToTwentynineMap :: HashMap Text Integer
twentyoneToTwentynineMap = HashMap.fromList
[ ( "huszonegy", 21 )
, ( "huszonkett\x0151", 22 )
, ( "huszonh\x00E1rom", 23 )
, ( "huszonn\x00E9gy", 24 )
, ( "huszon\x00F6t", 25 )
, ( "huszonhat", 26 )
, ( "huszonh\x00E9t", 27 )
, ( "huszonnyolc", 28 )
, ( "huszonkilenc", 29 )
]

ruleTwentyoneToTwentynine :: Rule
ruleTwentyoneToTwentynine = Rule
{ name = "number (21..29)"
, pattern =
[ regex "(huszonegy|huszonkett\x0151|huszonh\x00E1rom|huszonn\x00E9gy|huszon\x00F6t|huszonhat|huszonh\x00E9t|huszonnyolc|huszonkilenc)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) twentyoneToTwentynineMap >>= integer
_ -> Nothing
}

dozensMap :: HashMap Text Integer
dozensMap = HashMap.fromList
[ ( "h\x00FAsz", 20 )
, ( "harminc", 30 )
, ( "negyven", 40 )
, ( "\x00F6tven", 50 )
, ( "hatvan", 60 )
, ( "hetven", 70 )
, ( "nyolcvan", 80 )
, ( "kilencven", 90 )
]

ruleTens :: Rule
ruleTens = Rule
{ name = "integer (20,30..90)"
, pattern =
[ regex "(h\x00FAsz|harminc|negyven|\x00f6tven|hatvan|hetven|nyolcvan|kilencven)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (match:_)):_) ->
HashMap.lookup (Text.toLower match) dozensMap >>= integer
_ -> Nothing
}

ruleCompositeTens :: Rule
ruleCompositeTens = Rule
{ name = "integer ([3-9][1-9])"
, pattern =
[ regex "(harminc|negyven|\x00F6tven|hatvan|hetven|nyolcvan|kilencven)(egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc)"
]
, prod = \tokens -> case tokens of
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
v1 <- HashMap.lookup (Text.toLower m1) dozensMap
v2 <- HashMap.lookup (Text.toLower m2) ruleNumeralMap
integer $ v1 + v2
_ -> Nothing
}

rules :: [Rule]
rules =
[ ruleIntegerNumeric
, ruleNumeral
, ruleElevenToNineteen
, ruleTwentyoneToTwentynine
, ruleTens
, ruleCompositeTens
]
2 changes: 2 additions & 0 deletions Duckling/Ranking/Classifiers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import qualified Duckling.Ranking.Classifiers.FR as FRClassifiers
import qualified Duckling.Ranking.Classifiers.GA as GAClassifiers
import qualified Duckling.Ranking.Classifiers.HE as HEClassifiers
import qualified Duckling.Ranking.Classifiers.HR as HRClassifiers
import qualified Duckling.Ranking.Classifiers.HU as HUClassifiers
import qualified Duckling.Ranking.Classifiers.ID as IDClassifiers
import qualified Duckling.Ranking.Classifiers.IT as ITClassifiers
import qualified Duckling.Ranking.Classifiers.JA as JAClassifiers
Expand Down Expand Up @@ -54,6 +55,7 @@ classifiers FR = FRClassifiers.classifiers
classifiers GA = GAClassifiers.classifiers
classifiers HE = HEClassifiers.classifiers
classifiers HR = HRClassifiers.classifiers
classifiers HU = HUClassifiers.classifiers
classifiers ID = IDClassifiers.classifiers
classifiers IT = ITClassifiers.classifiers
classifiers JA = JAClassifiers.classifiers
Expand Down
22 changes: 22 additions & 0 deletions Duckling/Ranking/Classifiers/HU.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- Copyright (c) 2016-present, Facebook, Inc.
-- All rights reserved.
--
-- This source code is licensed under the BSD-style license found in the
-- LICENSE file in the root directory of this source tree. An additional grant
-- of patent rights can be found in the PATENTS file in the same directory.

-----------------------------------------------------------------
-- Auto-generated by regenClassifiers
--
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-- @generated
-----------------------------------------------------------------
{-# LANGUAGE OverloadedStrings #-}
module Duckling.Ranking.Classifiers.HU (classifiers) where
import Prelude
import Duckling.Ranking.Types
import qualified Data.HashMap.Strict as HashMap
import Data.String

classifiers :: Classifiers
classifiers = HashMap.fromList []
2 changes: 2 additions & 0 deletions Duckling/Rules.hs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import qualified Duckling.Rules.FR as FRRules
import qualified Duckling.Rules.GA as GARules
import qualified Duckling.Rules.HE as HERules
import qualified Duckling.Rules.HR as HRRules
import qualified Duckling.Rules.HU as HURules
import qualified Duckling.Rules.ID as IDRules
import qualified Duckling.Rules.IT as ITRules
import qualified Duckling.Rules.JA as JARules
Expand Down Expand Up @@ -81,6 +82,7 @@ langRules FR = FRRules.rules
langRules GA = GARules.rules
langRules HE = HERules.rules
langRules HR = HRRules.rules
langRules HU = HURules.rules
langRules ID = IDRules.rules
langRules IT = ITRules.rules
langRules JA = JARules.rules
Expand Down
Loading

0 comments on commit 24d3f19

Please sign in to comment.