In [1]:
:opt no-lint

# 파서의 타입
```haskell
type Parser = String -> Tree              -- 문자열을 처리해 Tree로 변환
type Parser = String -> (Tree, String)    -- 처리하고 남은 뒷부분 문자열 고려
type Parser = String -> [(Tree, String)]  -- 실패할 경우 길이 0인 빈 리스트
type Parser a = String -> [(a, String)]   -- 특정 Tree가 대신 타입 a로 일반화
type Parser a = [Char] -> [(a, [Char])]   -- 윗줄과 같음  String = [Char]
type Parser tok a = [tok] -> [(a, [tok])] -- Char 대신 토큰 타입 tok로 일반화
```

In [2]:
type Parser tok a = [tok] -> [(a, [tok])]

In [3]:
return :: a -> Parser tok a -- 입력 토큰열 ts를 소비하지 않고
return v = \ts -> [(v,ts)]  -- 그냥 v를 리턴하며 성공하는 파서

failure :: Parser tok a -- 입력에 관계없이 무조건 실패하는 파서
failure = \_ -> []

In [4]:
(return 1) "abc"
failure    "abc"

[(1,"abc")]

[]

In [5]:
item :: Parser tok tok
item []     = []       -- 길이 0인 토큰열에 대해서는 실패
item (t:ts) = [(t,ts)] -- 맨 앞의 토큰 t하나만 처리해 t를 리턴 

In [6]:
(>>=) :: Parser tok a -> (a -> Parser tok b) -> Parser tok b
p1 >>= pf = \s -> [ (v2,s2) | (v1,s1) <- p1 s, -- 이어붙이기 sequencing
                              (v2,s2) <- (pf v1) s1 ]

(<|>) :: Parser tok a -> Parser tok a -> Parser tok a  -- 선택 choice
p1 <|> p2 = \s -> case p1 s of
                    []  -> p2 s  -- 첫번째 파서가 실패하면 두번째로
                    vs1 -> vs1   -- 첫번째가 성공하면 첫번째만으로

In [7]:
( item >>= \_  ->  item                            ) "abcd"
( item >>= \c1 ->  item >>= \c2 ->  return [c1,c2] ) "abcd" 

[('b',"cd")]

[("ab","cd")]

In [8]:
( item >>= \c -> (if c=='h' then item else failure) ) "hello"
( item >>= \c -> (if c=='h' then item else failure) ) "world"

[('e',"llo")]

[]

In [9]:
( item >>=                                \c1 ->
  (if c1=='h' then item else failure) >>= \c2 ->  return [c1,c2] ) "hello"
( item >>=                                \c1 ->
  (if c1=='h' then item else failure) >>= \c2 ->  return [c1,c2] ) "world"

[("he","llo")]

[]

In [10]:
sat :: (tok -> Bool) -> Parser tok tok
sat test = item >>= \t ->          -- 토큰 하나를 읽어들여
           if test t then return t -- 조건에 맞는 경우에만 성공하고 
                     else failure  -- 그렇지 않으면 실패하는 파서

# 문자열을 처리하는 파서

In [11]:
import Data.Char ( isDigit, isLower, isUpper,
                   isAlpha, isAlphaNum, isSpace )
digit = sat isDigit
lower = sat isLower
upper = sat isUpper
letter = sat isAlpha
alphanum = sat isAlphaNum
space = sat isSpace

In [12]:
:type isDigit
:type digit

In [13]:
digit "123"
digit "a23"

[('1',"23")]

[]

In [14]:
char :: Char -> Parser Char Char    -- char c는 주어진 글자 c와 첫글자가
char c = sat (==c)                  -- 일치하는 경우에만 성공하는 파서

string :: String -> Parser Char String  -- string s는 주어진 문자열 s와
string []     = return []               -- 앞부분이 일치하는 경우에만 성공
string (c:cs) = char c    >>= \_ ->
                string cs >>= \_ ->
                return (c:cs)

In [15]:
:type char
:type string

In [16]:
char 'h' "hello"
char 'h' "world"

[('h',"ello")]

[]

In [17]:
string ("abc") "abcdef"
string ("abc") "ab1234"

[("abc","def")]

[]

In [18]:
many  :: Parser tok a -> Parser tok [a]
many  p = many1 p <|> return []

many1 :: Parser tok a -> Parser tok [a]
many1 p = p      >>= \v  ->
          many p >>= \vs ->
          return (v:vs)

In [19]:
(many  digit) "123def"
(many1 digit) "123def"

[("123","def")]

[("123","def")]

In [20]:
(many  digit) "abcdef"
(many1 digit) "abcdef"

[("","abcdef")]

[]

In [21]:
nat :: Parser Char Int
nat = many1 digit >>= \s ->
      return (read s)

ident :: Parser Char String
ident = lower          >>= \c  ->
        many1 alphanum >>= \cs ->
        return (c:cs)

spaces, spaces1 :: Parser Char ()
spaces  = many  space >>= \_ -> return ()
spaces1 = many1 space >>= \_ -> return ()

In [22]:
(many1 digit) "123def"
nat           "123def"

[("123","def")]

[(123,"def")]

In [23]:
ident "abc123defghi"
ident "abc123d  ghi"
ident "123abcd  ghi"

[("abc123defghi","")]

[("abc123d","  ghi")]

[]

In [24]:
spaces "  abc"
spaces "abc"

[((),"abc")]

[((),"abc")]

In [25]:
spaces1 "  abc"
spaces1 "abc"

[((),"abc")]

[]

# 토큰화

In [26]:
data Tok = KW String -- 키워드
         | ID String -- 변수 이름
         | INT Int   -- 정수
         | LP        -- (
         | RP        -- )
         | LAM       -- \
         | DOT       -- .
         | ADD       -- +
         deriving Show

word = ident  >>= \s ->
       if s `elem` ["if","then","else"]
         then return (KW s)
         else return (ID s)

natural = nat >>= \n -> return (INT n)

In [27]:
:type word
:type natural

In [28]:
"if" `elem` ["if","then","else"]
"hi" `elem` ["if","then","else"]

True

False

In [29]:
word "if then else  "
word "ifthen  else  "

[(KW "if"," then else  ")]

[(ID "ifthen","  else  ")]

In [30]:
(many word) "if then else  "
(many word) "ifthen  else  "

[([KW "if"]," then else  ")]

[([ID "ifthen"],"  else  ")]

In [31]:
tok p = p      >>= \v ->
        spaces >>= \_ ->
        return v

In [32]:
(many (tok word)) "if then else  "
(many (tok word)) "ifthen  else  "

[([KW "if",KW "then",KW "else"],"")]

[([ID "ifthen",KW "else"],"")]

In [33]:
(many (tok word <|> tok natural)) "if b1 then 123 else x3  "
(many (tok word <|> tok natural)) "if b1 then 123 else 3 + (\\y. y) "

[([KW "if",ID "b1",KW "then",INT 123,KW "else",ID "x3"],"")]

[([KW "if",ID "b1",KW "then",INT 123,KW "else",INT 3],"+ (\\y. y) ")]

# 문법분석

In [34]:
-- FACparser :: Parser Tok Expr
-- FACparser = undefined