Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Branch: master
Fetching contributors…

Cannot retrieve contributors at this time

154 lines (115 sloc) 6.518 kB
{-
Copyright © 2012 Daniel Tahara, Kartik Venkatraman
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
-}
module Recognition where
import Deduction
import Data.Maybe
import Data.Char
--------------------------------------------------------------------------
import DTKV_Grammar
data Tree a b = Leaf a | Node b [Tree a b] deriving (Show, Eq)
--------------------------------------------------------------------------
type RecogItem = (Category, Int, Int, Tree Word Category)
getBounds :: RecogItem -> (Int, Int)
getBounds (_,min,max,_) = (min, max)
getLowerBound = fst . getBounds
getUpperBound = snd . getBounds
getCategory :: RecogItem -> Category
getCategory (c,_,_,_) = c
getParseTree :: RecogItem -> Tree Word Category
getParseTree (_,_,_,t) = t
preprocess :: Grammar -> [String] -> [RecogItem]
preprocess g strs = concatMap (getTermCategory g) (zip [0..] strs)
getTermCategory :: Grammar -> (Int, Word) -> [RecogItem]
getTermCategory (grs,c) (index, w1)
| all isDigit w1 = [("INTEGER", index, index + 1, Node "INTEGER" [Leaf w1])]
-- | head w1 == '-' && all isDigit $ tail w1
-- = [("INTEGER", index, index + 1)]
-- negative numbers?
| otherwise =
if cats == [] then [("STRING", index, index + 1, Node "STRING" [Leaf w1])] else cats
where cats = [(c, index, index + 1, Node c [Leaf w1]) |
TerminatingRule c w2 <- grs, w1 == w2]
-- if all isDigit string, then "Integer"
-- if string == "True" or "False" then Bool
binaryRules :: Grammar -> [RecogItem -> RecogItem -> [RecogItem]]
binaryRules (grs, c) =
[convertToBinRule c1 c2 c3 | NonTerminatingBinaryRule c1 c2 c3 <- grs]
convertToBinRule :: Category -> Category -> Category
-> (RecogItem -> RecogItem -> [RecogItem])
convertToBinRule c1 c2 c3 =
\r1 r2 -> if getCategory r1 == c2 && getCategory r2 == c3 && boundsMatch r1 r2
then [(c1, fst (getBounds r1), snd (getBounds r2), Node c1 [getParseTree r1, getParseTree r2])] else []
convertToBinaryGrammarRule :: RecogItem -> RecogItem -> RecogItem -> GrammarRule
convertToBinaryGrammarRule r1 r2 r3 = NonTerminatingBinaryRule (getCategory r1) (getCategory r2) (getCategory r3)
unaryRules :: Grammar -> [RecogItem -> [RecogItem]]
unaryRules (grs, c) =
[convertToUnaryRule c1 c2 | NonTerminatingUnaryRule c1 c2 <- grs]
convertToUnaryRule :: Category -> Category -> (RecogItem -> [RecogItem])
convertToUnaryRule c1 c2 =
\r1 -> if getCategory r1 == c2
then [(c1, fst $ getBounds r1, snd $ getBounds r1, Node c1 [getParseTree r1])] else []
convertToUnaryGrammarRule :: RecogItem -> RecogItem -> GrammarRule
convertToUnaryGrammarRule r1 r2 = NonTerminatingUnaryRule (getCategory r1) (getCategory r2)
boundsMatch :: RecogItem -> RecogItem -> Bool
boundsMatch (_, min1, max1, _) (_, min2, max2, _) = max1 == min2
getAllItems :: Grammar -> [String] -> [RecogItem]
getAllItems g strs = deduce (unaryRules g) (binaryRules g) (preprocess g strs)
validParses :: Grammar -> String -> [RecogItem]
validParses g@(grs, cat) s = [ri | ri@(c, lb, ub, w) <- (getAllItems g items), (lb, ub) == (0, length items) && c == cat]
-- can't check s == w??? why not? -> because multiple spaces can be issue?
where items = words s
recognize :: Grammar -> String -> Bool
recognize g@(grs, cat) s = not . null $ validParses g s
---------------------------------------------------------------------
-- TopDownParse Tree
-- Idea: Take all items, and then recreate:
{-
getParseTree g ri ris = head $ getParseTrees g ri ris
getParseTrees :: Grammar -> RecogItem -> [RecogItem] -> [Tree Word Category]
getParseTrees g@(grs, re) ri@(cat, lb, ub, t) ris
| isTerminatingCategory g cat = [Node cat [Leaf c1]]
| ub == i = map (Node cat) $ [[x] | x <- singles]-- unary rule
| otherwise = map (Node cat) $ concatMap crossProduct pairs
--[[m,n] | m <- getParseTrees g x ris, n <- getParseTrees g y ris | [x,y] <- pairs]
where singles = concat [getParseTrees g r ris | r@(c, l, u, d) <- ris, r /= ri,
(lb, ub) == (l,u), NonTerminatingUnaryRule cat c `elem` grs]
pairs = [(getParseTrees g r1 ris, getParseTrees g r2 ris) | r1@(c1,l1,u1,d1) <- ris, r2@(c2,l2,u2,d2) <- ris,
(l1,u1) == (lb,i), (l2,u2) == (i,ub), NonTerminatingBinaryRule cat c1 c2 `elem` grs] -- binary rules (lb, i), (i, ub)
-- crossProduct :: ([a],[a]) -> [[a,a]]
crossProduct (x,y) = [[m,n] | m <- x, n <- y]
eltToList x = [[m] | m <- x]
-}
lowerBoundsMatch :: RecogItem -> RecogItem -> Bool
lowerBoundsMatch r1 r2 = getLowerBound r1 == getLowerBound r2
upperBoundsMatch :: RecogItem -> RecogItem -> Bool
upperBoundsMatch r1 r2 = getUpperBound r1 == getUpperBound r2
getWord_Terminating :: GrammarRule -> Word
getWord_Terminating (TerminatingRule c w) = w
getYield_NonTerm :: GrammarRule -> [Category]
getYield_NonTerm (NonTerminatingUnaryRule c c1) = [c1]
getYield_NonTerm (NonTerminatingBinaryRule c c1 c2) = [c1, c2]
isTerminatingCategory :: Grammar -> Category -> Bool
isTerminatingCategory g cat = not . null $ getRulesOfCat_Terminating g cat
-- should only ever have one element
getRulesOfCat_Terminating :: Grammar -> Category -> [GrammarRule]
getRulesOfCat_Terminating g cat = [r | r@(TerminatingRule c w) <- fst g, c == cat]
{-
-- if it's a terminating category, just find it with right bounds and done
-- otherwise , for all pairs that form a rule, bounds match and add up, Node cat [getParseTree 1, getParseTree2 ]
-}
Jump to Line
Something went wrong with that request. Please try again.