# 2. Imperative Programming Languages

우선 2.5까지 나오는 내용 중에서 빼고 살펴보는데, 지난번에 `CMa01.ipynb`에 작성했던 컴파일러 코드에서 문제점을 수정해 보자.

---
컴파일 타겟이 되는 VM의 단순화된 버전을 하스켈로 구현

In [2]:
-- {-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}

data Instr pa
    = HALT | NEG | ADD | SUB | MUL | DIV
    | AND | OR | EQU | NEQ | GR | GEQ | LE | LEQ
    | POP | DUP
    | LOADc Int | LOAD -- | LOADr | LOADrc
    | STORE -- | STOREr
    | JUMP pa | JUMPz pa | JUMPi pa
    -- | CALL | RETURN | ENTER | ALLOC | SLIDE | MARK
    -- | NEW
    deriving (Eq, Ord, Show, Functor)

type CMa = (Code, Stack)

type Stack = [Value]
type Value = Int

-- stack address as reverse index of stack
type SA = Int

type Code = [Instr PA]

-- program address representation
newtype PA = PA Code  deriving (Eq,Ord,Show)

In [3]:
import Data.List

data DotDotDot = DotDotDot

instance Show DotDotDot where
    show _ = "..."

-- to prevent infinite printing
instance {-# OVERLAPS #-} Show Code where
    show is = "["++intercalate "," (show . fmap (\(PA _) -> DotDotDot) <$> is)++"]"

-- to prevent infinite printing
instance {-# OVERLAPS #-} Show CMa where
    show (is,vs) = "{ stack = "++show vs++"\n , code = "++show is++" }"

In [4]:
-- load and store operation for Stack
load :: SA -> Stack -> Value
load i vs = reverse vs !! i

store :: SA -> Value -> Stack -> Stack
store i x vs = vs1++x:vs2
    where
    (vs1,_:vs2) = splitAt (length vs - 1 - i) vs

In [5]:
import Data.Bits

step :: CMa -> CMa
step (HALT : _, vs) = ([], vs)
step (NEG : is, v : vs) = (is, (-v):vs)
step (ADD : is, v2:v1:vs) = (is, v1 + v2 : vs)
step (SUB : is, v2:v1:vs) = (is, v1 - v2 : vs)
step (MUL : is, v2:v1:vs) = (is, v1 * v2 : vs)
step (DIV : is, v2:v1:vs) = (is, v1 `div` v2 : vs)
step (AND : is, v2:v1:vs) = (is, (v1 .&. v2) : vs)
step (OR  : is, v2:v1:vs) = (is, (v1 .|. v2) : vs)
step (EQU : is, v2:v1:vs) = (is, b2i(v1 == v2) : vs)
step (NEQ : is, v2:v1:vs) = (is, b2i(v1 /= v2) : vs)
step (GR  : is, v2:v1:vs) = (is, b2i(v1 >  v2) : vs)
step (GEQ : is, v2:v1:vs) = (is, b2i(v1 >= v2) : vs)
step (LE  : is, v2:v1:vs) = (is, b2i(v1 <  v2) : vs)
step (LEQ : is, v2:v1:vs) = (is, b2i(v1 <= v2) : vs)
step (POP : is, _:vs) = (is, vs)
step (DUP : is, v:vs) = (is, v:v:vs)
step (LOADc v : is, vs) = (is, v:vs)
step (LOAD : is, a:vs) = (is, v:vs) where v = load a vs 
step (STORE : is, a:n:vs) = (is, n:vs') where vs' = store a n vs
step (JUMP  (PA c) : _, vs) = (c, vs)
step (JUMPz (PA c) : _, 0:vs) = (c, vs)
step (JUMPz _ : is,     _:vs) = (is, vs)
step vm = error $ "VM is stuck: "++show vm

i2b 0 = False
i2b 1 = True

b2i False = 0
b2i True  = 1

exec :: CMa -> [CMa]
exec vm@([],_) = [vm]
exec vm        = vm : exec (step vm)

run :: CMa -> CMa
run = last . exec

In [6]:
type LabeledCode = [LabeledInstr]
data LabeledInstr = Label :. Instr Label  deriving Show
type Label = String

lbis1 :: LabeledCode
lbis1 =
    [ ""     :. LOADc 3
    , "loop" :. LOADc 1
    , ""     :. SUB
    , ""     :. DUP
    , ""     :. JUMPz "end"
    , ""     :. JUMP "loop"
    , "end"  :. HALT
    ]

In [7]:
import Data.Maybe

assemble :: LabeledCode -> Code
assemble lbis = is'
    where
        is' = map (fmap lb2a) is
        (lbs,is) = unzip [(lb,i) | lb :. i <- lbis]
        lb2a "" = error "empty string label"
        lb2a lb = PA $ tails is' !! elemIndex' lb lbs

elemIndex' x xs = fromJust (elemIndex x xs)

In [8]:
is1 :: Code
is1 = [ LOADc 3 ] ++ loop
loop  = [ LOADc 1
        , SUB
        , DUP
        , JUMPz (PA end)
        , JUMP  (PA loop) ] ++ end
end   = [ HALT ]

In [9]:
assemble lbis1
is1

[LOADc 3,LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT]

[LOADc 3,LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT]

In [10]:
mapM_ print . exec $ (is1,[])

{ stack = []
 , code = [LOADc 3,LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [3]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,3]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [2]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [2,2]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [2]
 , code = [JUMP ...,HALT] }
{ stack = [2]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,2]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,1]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [1]
 , code = [JUMP ...,HALT] }
{ stack = [1]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,1]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [0]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [0,0]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [0]
 , code = [HALT] }
{ stack = [0]
 , code = [] }

In [11]:
mapM_ print . exec $ (assemble lbis1,[])

{ stack = []
 , code = [LOADc 3,LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [3]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,3]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [2]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [2,2]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [2]
 , code = [JUMP ...,HALT] }
{ stack = [2]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,2]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,1]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [1]
 , code = [JUMP ...,HALT] }
{ stack = [1]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,1]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [0]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [0,0]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [0]
 , code = [HALT] }
{ stack = [0]
 , code = [] }

<br>

이제 책 Fig.2.8 (p.13) 에 나온 C언어 코드를 CMa 명령 코드으로 컴파일하는 함수들을 직접 구현해 보자.
**식**(expression)을 컴파일하는 `codeR` 및 `codeL`과
**문**(statement)을 컴파일하는 `code`를 하스켈로 작성해 보자.

In [12]:
data Expr
    = Lit Int        -- n   (integer literal)
    | Var String     -- x
    | Neg Expr       -- -e
    | Add Expr Expr  -- e1 + 2e
    | Sub Expr Expr  -- e1 - e2
    | Mul Expr Expr  -- e1 * e2
    | Div Expr Expr  -- e1 / e2
    | And Expr Expr  -- e1 + e2
    | Or  Expr Expr  -- e1 || e2
    | Equ Expr Expr  -- e1 == e2
    | Neq Expr Expr  -- e1 /= e2
    | Gr  Expr Expr  -- e1 >  e2
    | Geq Expr Expr  -- e1 >= e2
    | Le  Expr Expr  -- e1 <= e2
    | Leq Expr Expr  -- e1 <  e2
    | Assign Expr Expr  -- eL <- eR    (assignment expression. 실제 C문법으로는 eL = eR)
    deriving (Eq,Ord,Show)

data Stmt
    = EStmt Expr                -- e;  (expression as statement)
    | Block [Stmt]              -- { s1; ...; sn; }
    | If Expr Stmt (Maybe Stmt) -- if (e) s  또는  if (e) s1 else s0
    | While Expr Stmt           -- while (e) s
    | For (Expr,Expr,Expr) Stmt -- for (e1;e2;e3) s
    deriving (Eq,Ord,Show)

In [15]:
import Data.Map (Map, (!), (!?))
import qualified Data.Map as Map

type AEnv = Map String SA

codeR :: Expr -> AEnv -> (Code -> Code)
codeR (Lit q) _ = (LOADc q :)
codeR (Var x) ρ = codeL (Var x) ρ . (LOAD :)
codeR (Neg e) ρ = codeR e ρ . (NEG :)
codeR (Add e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (ADD :)
codeR (Sub e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (SUB :)
codeR (Mul e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (MUL :)
codeR (Div e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (DIV :)
codeR (And e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (AND :)
codeR (Or  e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (OR :)
codeR (Equ e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (EQU :)
codeR (Neq e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (NEQ :)
codeR (Gr  e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (GR :)
codeR (Geq e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (GEQ :)
codeR (Le  e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (LE :)
codeR (Leq e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (LEQ :)
codeR (Assign eL eR) ρ = codeR eR ρ . codeL eL ρ . (STORE :)
codeR e _ = error $ "R-value not defined: "++show e

codeL :: Expr -> AEnv -> (Code -> Code)
codeL (Var x) ρ = (LOADc (ρ ! x) :)
codeL e       _   = error $ "L-value not defined: "++show e

code :: Stmt -> AEnv -> (Code -> Code)
code (EStmt e) ρ = codeR e ρ . (POP :)
code (Block ss) ρ = foldr (.) id [code s ρ | s <- ss]
code (If e s Nothing) ρ =
    \k -> codeR e ρ . (JUMPz (PA k) :)
        . code s ρ
        $ k
code (If e s1 (Just s0)) ρ =
    \k -> codeR e ρ . (JUMPz (PA (c0 k)) :)
        . c1 . (JUMP (PA k) :)
        . c0
        $ k
    where
        c1 = code s1 ρ
        c0 = code s0 ρ
code (While e s) ρ = c
    where
    c = \k -> codeR e ρ
            . (JUMPz (PA k) :)
            . code s ρ
            . (JUMP (PA (c k)) :)
            $ k
code (For (e1,e2,e3) s) ρ = code (Block ss) ρ
    where ss = [ EStmt e1
               , While e2 $ Block [s, EStmt e3]
               ] 

지금은 변수 메모리 공간은 미리 할당되어 있다고 가정한다.
즉, 적절한 *주소환경*(address environment)과 그에 맞는 크기의 stack으로 시작한다고 가정한다는 말이다.

예컨대, 아래 코드를 컴파일한다면
$\rho = \{x\mapsto 0,\, i\mapsto 1\}$라는 주소환경으로
$x$와 $i$에 값을 저장할 주소를 미리 정해 놓고 초기 스택도 그에
맞춰 미리 크기를 잡아 놓고 시작하기로 하자. 

```c
int x = 1000;
int i = 1;

x <- x + i;
i <- i + 1;
```

주소환경과 초기 스택을 적절하게 구성해 놓은 상태로 시작한다면 위 코드는 사실상 아래와 같은 코드를 컴파일하는 것과 같다.

```c
x <- 1000;
i <- 1;

x <- x + i;
i <- i + 1;
```

In [13]:
stmt3 = Block 
    [ EStmt $ Assign (Var "x") (Lit 1000)
    , EStmt $ Assign (Var "i") (Lit 1)
    , EStmt $ Assign (Var "x") (Add (Var "x") (Var "i"))
    , EStmt $ Assign (Var "i") (Add (Var "i") (Lit 1))
    ]

In [14]:
is3 = code stmt3 (Map.fromList [("x",0),("i",1)])

In [15]:
is3 []
is3 [HALT]
is3 [DUP,POP,HALT]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP,HALT]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP,DUP,POP,HALT]

In [16]:
mapM_ print $ exec (is3 [],[0,0])

{ stack = [0,0]
 , code = [LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP] }
{ stack = [1000,0,0]
 , code = [LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP] }
{ stack = [0,1000,0,0]
 , code = [STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP] }
{ stack = [1000,0,1000]
 , code = [POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP] }
{ stack = [0,1000]
 , code = [LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP] }
{ stack = [1,0,1000]
 , code = [LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP] }
{ stack = [1,1,0,1000]
 , code = [STO

In [17]:
run (is3 [],[1,1000])

{ stack = [2,1001]
 , code = [] }

<br>

이번엔 이 프로그램을 컴파일해 보자.

```c
int x = 1000;
int i = 1;
while (i < 5) {
    x <- x + i;
    i <- i + 1;
}
```

마찬가지로 $x$와 $i$에 대한 적절한 주소환경 $\{x\mapsto 0,\,i\mapsto 1\}$과 초기 스택으로 시작한다고 가정한다면 아래 코드를 컴파일하면 되는 것이다.
```c
x <- 1000;
i <- 1;
while (i < 5) {
    x <- x + i;
    i <- i + 1;
}
```

In [18]:
stmt41 = Block 
    [ EStmt $ Assign (Var "x") (Lit 1000)  -- x <- 1000;
    , EStmt $ Assign (Var "i") (Lit 1)     -- i <- 1;
    ]

stmt42 = Block
    [ While (Le (Var "i") (Lit 5)) $ Block                   -- while (i < 5) {
        [ EStmt $ Assign (Var "x") (Add (Var "x") (Var "i")) --    x <- x + i;
        , EStmt $ Assign (Var "i") (Add (Var "i") (Lit 1))   --    i <- i + 1;
        ]                                                    -- }
    ]

stmt43 = Block
    [ EStmt $ Assign (Var "x") (Add (Var "x") (Lit 100))     -- x <- x + 100;
    , EStmt $ Assign (Var "i") (Add (Var "i") (Lit 100))     -- i <- i + 100;
    ]

In [19]:
rho4 = Map.fromList [("x",0),("i",1)]
is41 = code stmt41 rho4
is42 = code stmt42 rho4
is43 = code stmt43 rho4

In [20]:
is41 . is42 $ []
is41 . is42 . is43 $ []

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 1,LOAD,LOADc 5,LE,JUMPz ...,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP,JUMP ...]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 1,LOAD,LOADc 5,LE,JUMPz ...,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP,JUMP ...,LOADc 0,LOAD,LOADc 100,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 100,ADD,LOADc 1,STORE,POP]

In [21]:
run (is41 . is41 $ [], [0,0])

{ stack = [1,1000]
 , code = [] }

In [22]:
run (is41 . is42 $ [], [0,0])

{ stack = [5,1010]
 , code = [] }

In [23]:
run (is41 . is42 . is43 $ [], [0,0])

{ stack = [105,1110]
 , code = [] }

In [24]:
run (is41 . is43 . is43 $ [], [0,0]) -- stmt43을 두번 실행했으므로 100을 두번씩 더해 200씩 증가

{ stack = [201,1200]
 , code = [] }

<br>

정리하자면, 컴파일 함수 `codeR`, `codeL`, `code`가 *식*(`Expr`) 또는 *문*(`Stmt`)과 *주소환경*(`AEnv`)을 받아 고정된 코드(`Code`)를 결과로 계산하는 대신,
뒤이어 오는 **나머지 할 일** 코드를 인자로 받아 전체 코드를 계산해내는 코드 변환 함수(`Code -> Code`)를 결과로 계산하도록 수정하였다.
이렇게 함으로써 조건문이나 반복문에서 그 다음 뒤이어 아직 정해지지 않은 코드 위치로 이동하는 코드를 작성하기에 용이해진다.

이렇게 **나머지 할 일**이라는 개념을 전문용어로는 continuation이라고 한다. 순차적으로 진행되지 않는 계산을 표현하기 위한 개념으로 다양한 곳에 활용된다.