# 2. Imperative Programming Languages

이번엔 전역변수 메모리 할당 및 초기화를 추가해보자. 아직 배열은 문법만 정의하고 다음 기회에 ...

---
컴파일 타겟이 되는 VM의 단순화된 버전을 하스켈로 구현

In [28]:
-- {-# LANGUAGE DeriveFoldable #-}
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE NoMonomorphismRestriction #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}

data Instr pa
    = HALT | NEG | ADD | SUB | MUL | DIV
    | AND | OR | EQU | NEQ | GR | GEQ | LE | LEQ
    | POP | DUP
    | LOADc Int | LOAD -- | LOADr | LOADrc
    | STORE -- | STOREr
    | JUMP pa | JUMPz pa | JUMPi pa
    -- | CALL | RETURN | ENTER | ALLOC | SLIDE | MARK
    -- | NEW
    deriving (Eq, Ord, Show, Functor)

type CMa = (Code, Stack)

type Stack = [Value]
type Value = Int

-- stack address as reverse index of stack
type SA = Int

type Code = [Instr PA]

-- program address representation
newtype PA = PA Code  deriving (Eq,Ord,Show)

In [29]:
import Data.List

data DotDotDot = DotDotDot

instance Show DotDotDot where
    show _ = "..."

-- to prevent infinite printing
instance {-# OVERLAPS #-} Show Code where
    show is = "["++intercalate "," (show . fmap (\(PA _) -> DotDotDot) <$> is)++"]"

-- to prevent infinite printing
instance {-# OVERLAPS #-} Show CMa where
    show (is,vs) = "{ stack = "++show vs++"\n , code = "++show is++" }"

In [30]:
-- load and store operation for Stack
load :: SA -> Stack -> Value
load i vs = reverse vs !! i

store :: SA -> Value -> Stack -> Stack
store i x vs = vs1++x:vs2
    where
    (vs1,_:vs2) = splitAt (length vs - 1 - i) vs

In [31]:
import Data.Bits

step :: CMa -> CMa
step (HALT : _, vs) = ([], vs)
step (NEG : is, v : vs) = (is, (-v):vs)
step (ADD : is, v2:v1:vs) = (is, v1 + v2 : vs)
step (SUB : is, v2:v1:vs) = (is, v1 - v2 : vs)
step (MUL : is, v2:v1:vs) = (is, v1 * v2 : vs)
step (DIV : is, v2:v1:vs) = (is, v1 `div` v2 : vs)
step (AND : is, v2:v1:vs) = (is, (v1 .&. v2) : vs)
step (OR  : is, v2:v1:vs) = (is, (v1 .|. v2) : vs)
step (EQU : is, v2:v1:vs) = (is, b2i(v1 == v2) : vs)
step (NEQ : is, v2:v1:vs) = (is, b2i(v1 /= v2) : vs)
step (GR  : is, v2:v1:vs) = (is, b2i(v1 >  v2) : vs)
step (GEQ : is, v2:v1:vs) = (is, b2i(v1 >= v2) : vs)
step (LE  : is, v2:v1:vs) = (is, b2i(v1 <  v2) : vs)
step (LEQ : is, v2:v1:vs) = (is, b2i(v1 <= v2) : vs)
step (POP : is, _:vs) = (is, vs)
step (DUP : is, v:vs) = (is, v:v:vs)
step (LOADc v : is, vs) = (is, v:vs)
step (LOAD : is, a:vs) = (is, v:vs) where v = load a vs 
step (STORE : is, a:n:vs) = (is, n:vs') where vs' = store a n vs
step (JUMP  (PA c) : _, vs) = (c, vs)
step (JUMPz (PA c) : _, 0:vs) = (c, vs)
step (JUMPz _ : is,     _:vs) = (is, vs)
step vm = error $ "VM is stuck: "++show vm

i2b 0 = False
i2b 1 = True

b2i False = 0
b2i True  = 1

exec :: CMa -> [CMa]
exec vm@([],_) = [vm]
exec vm        = vm : exec (step vm)

run :: CMa -> CMa
run = last . exec

In [32]:
type LabeledCode = [LabeledInstr]
data LabeledInstr = Label :. Instr Label  deriving Show
type Label = String

lbis1 :: LabeledCode
lbis1 =
    [ ""     :. LOADc 3
    , "loop" :. LOADc 1
    , ""     :. SUB
    , ""     :. DUP
    , ""     :. JUMPz "end"
    , ""     :. JUMP "loop"
    , "end"  :. HALT
    ]

In [33]:
import Data.Maybe

assemble :: LabeledCode -> Code
assemble lbis = is'
    where
        is' = map (fmap lb2a) is
        (lbs,is) = unzip [(lb,i) | lb :. i <- lbis]
        lb2a "" = error "empty string label"
        lb2a lb = PA $ tails is' !! elemIndex' lb lbs

elemIndex' x xs = fromJust (elemIndex x xs)

In [34]:
mapM_ print . exec $ (assemble lbis1,[])

{ stack = []
 , code = [LOADc 3,LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [3]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,3]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [2]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [2,2]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [2]
 , code = [JUMP ...,HALT] }
{ stack = [2]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,2]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,1]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [1]
 , code = [JUMP ...,HALT] }
{ stack = [1]
 , code = [LOADc 1,SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [1,1]
 , code = [SUB,DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [0]
 , code = [DUP,JUMPz ...,JUMP ...,HALT] }
{ stack = [0,0]
 , code = [JUMPz ...,JUMP ...,HALT] }
{ stack = [0]
 , code = [HALT] }
{ stack = [0]
 , code = [] }

<br>

이제 책 Fig.2.8 (p.13) 에 나온 C언어 코드를 CMa 명령 코드으로 컴파일하는 함수들을 직접 구현해 보자.
**식**(expression)을 컴파일하는 `codeR` 및 `codeL`과
**문**(statement)을 컴파일하는 `code`를 하스켈로 작성해 보자.

In [38]:
data Expr
    = Lit Int        -- n   (integer literal)
    | Var String     -- x
    | Arr Expr Expr  -- e1[e2]
    | Neg Expr       -- -e
    | Add Expr Expr  -- e1 + 2e
    | Sub Expr Expr  -- e1 - e2
    | Mul Expr Expr  -- e1 * e2
    | Div Expr Expr  -- e1 / e2
    | And Expr Expr  -- e1 + e2
    | Or  Expr Expr  -- e1 || e2
    | Equ Expr Expr  -- e1 == e2
    | Neq Expr Expr  -- e1 /= e2
    | Gr  Expr Expr  -- e1 >  e2
    | Geq Expr Expr  -- e1 >= e2
    | Le  Expr Expr  -- e1 <= e2
    | Leq Expr Expr  -- e1 <  e2
    | Assign Expr Expr  -- eL <- eR    (assignment expression. 실제 C문법으로는 eL = eR)
    deriving (Eq,Ord,Show)

data Stmt
    = DStmt Decl                -- int x = e;  선언문 (항상 초기화 포함)
    | EStmt Expr                -- e;  (expression as statement)
    | Block [Stmt]              -- { s1; ...; sn; }
    | If Expr Stmt (Maybe Stmt) -- if (e) s  또는  if (e) s1 else s0
    | While Expr Stmt           -- while (e) s
    | For (Expr,Expr,Expr) Stmt -- for (e1;e2;e3) s
    deriving (Eq,Ord,Show)

data Decl
    = VDec Ty String Expr   -- int x = e;
    | ADec Ty String [Expr] -- int a[n] = {v1,v2,...,vn};
    deriving (Eq,Ord,Show)

data Ty
    = Tint        -- int
    | Tarr Ty Int -- int[n]   for simplicity only consider 1-dim int array
    deriving (Eq,Ord,Show)

In [40]:
import Data.Map (Map, (!), (!?))
import qualified Data.Map as Map

type AEnv = Map String SA -- 주소환경
type TEnv = Map String Ty -- 타입환경

codeR :: Expr -> AEnv -> (Code -> Code)
codeR (Lit q) _ = (LOADc q :)
codeR e@(Var _)   ρ = codeL e ρ . (LOAD :)
codeR e@(Arr _ _) ρ = codeL e ρ . (LOAD :)
codeR (Neg e) ρ = codeR e ρ . (NEG :)
codeR (Add e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (ADD :)
codeR (Sub e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (SUB :)
codeR (Mul e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (MUL :)
codeR (Div e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (DIV :)
codeR (And e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (AND :)
codeR (Or  e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (OR :)
codeR (Equ e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (EQU :)
codeR (Neq e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (NEQ :)
codeR (Gr  e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (GR :)
codeR (Geq e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (GEQ :)
codeR (Le  e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (LE :)
codeR (Leq e1 e2) ρ = codeR e1 ρ . codeR e2 ρ . (LEQ :)
codeR (Assign eL eR) ρ = codeR eR ρ . codeL eL ρ . (STORE :)
codeR e _ = error $ "R-value not defined: "++show e

codeL :: Expr -> AEnv -> (Code -> Code)
codeL (Var x) ρ = (LOADc (ρ ! x) :)
codeL (Arr e1 e2) ρ = codeL e1 ρ . codeR e2 ρ . (ADD :)  -- e1[e2]
codeL e _ = error $ "L-value not defined: "++show e

code :: Stmt -> AEnv -> (Code -> Code)
code (DStmt (VDec Tint x e)) ρ = code (EStmt $ Assign (Var x) e) ρ  -- x <- e;
code s@(DStmt (ADec (Tarr Tint n) a es)) ρ = -- es는 초기화 식 리스트 {e0,e1,..}
    code (Block ss) ρ        -- { a[0] <- e0; a[1] <- e1; ... }
    where
    ss = zipWith ($) [EStmt . Assign (Arr (Var a) (Lit i)) | i<-[0..n-1]]
                     (take n es)
code s@(DStmt _) ρ = error $ "not supported: "++show s
code (EStmt e) ρ = codeR e ρ . (POP :)
code (Block ss) ρ = foldr (.) id [code s ρ | s <- ss]
code (If e s Nothing) ρ =
    \k -> codeR e ρ . (JUMPz (PA k) :)
        . code s ρ
        $ k
code (If e s1 (Just s0)) ρ =
    \k -> codeR e ρ . (JUMPz (PA (c0 k)) :)
        . c1 . (JUMP (PA k) :)
        . c0
        $ k
    where
        c1 = code s1 ρ
        c0 = code s0 ρ
code (While e s) ρ = c
    where
    c = \k -> codeR e ρ . (JUMPz (PA k) :)
            . code s ρ . (JUMP (PA (c k)) :)
            $ k
code (For (e1,e2,e3) s) ρ = code (Block ss) ρ
    where ss = [ EStmt e1
               , While e2 $ Block [s, EStmt e3]
               ]

프로그램을 컴파일하기 전에 할 때 자동으로 주소환경 $\rho$를 계산할 수 있다면 지난번 `CMA02.ipynb`에서처럼 수동으로 주소환경을 구성하지 않아도 된다.
지금은 일단 전역변수만 있다고 가정하므로 비교적 간단히 주소환경을 계산해낼 수 있다.

```c
int x = 1000;
int i = 1;
x <- x + i;
int y = x + 100;
i <- i + 1;
```

이전에 아무 변수도 선언되지 않았다면 나타나는 순서대로 스택 주소 0부터 $\{x\mapsto 0, i\mapsto 1, y\mapsto 2\}$ 이런 식으로 변수 선언만 훑어내려가면서 주소환경을 만들면 된다.

만일 기존에 변수가 예를 들어 두개 선언되어 있고 그에 대한 주소환경이 $\{w\mapsto 0, z\mapsto 1\}$로 구성되어 있는 상황이었다면,
그 이후 스택 주소 2부터 $\{w\mapsto 0, z\mapsto 1, x\mapsto 2, i\mapsto 3, y\mapsto 4\}$ 이런 식으로 확장된 주소환경을 만들면 된다.

이런 방식으로 주소환경을 자동으로 계산하기 위해, 컴파일할 문장(`Stmt`)과 기존의 주소환경(`AEnv`)을 넘겨받아 확장된 주소환경(`AEnv`)을 계산하는 함수 `extend`를 정의하자.
이 `extendS` 함수의 핵심 동작은 선언문(declaration statement)인 경우이므로 이 경우를 처리하를 돕는 함수인 `extendD`도 함께 정의한다.

In [41]:
extend :: Stmt -> (AEnv,TEnv) -> (AEnv,TEnv)
extend (DStmt d) (ρ,σ) = extendD d (ρ,σ)
extend (EStmt _) (ρ,σ) = (ρ,σ)
extend (Block ss) (ρ,σ) = foldl (flip extend) (ρ,σ) ss
extend (If _ s Nothing) (ρ,σ) = extend s (ρ,σ)
extend (If _ s1 (Just s0)) (ρ,σ) = extend s0 (extend s1 (ρ,σ))
extend (While _ s) (ρ,σ) = extend s (ρ,σ)
extend (For _ s) (ρ,σ) = extend s (ρ,σ)

extendD :: Decl -> (AEnv,TEnv) -> (AEnv,TEnv)
extendD (VDec Tint x e) (ρ,σ) = ( Map.insert x nextSA ρ, Map.insert x Tint σ)
    where nextSA = undefined -- 1 + maximum ((-1) : Map.elems ρ)
extendD d@(ADec (Tarr Tint n) a es) ρ = ( Map.insert x nextSA ρ, Map.insert x (Tarr Tint n) σ)
    where nextSA = undefined -- 1 +
extendD d _ = error $ "not supported: "++show d

: 

In [15]:
stmt3 = Block 
    [ DStmt $ VDec Tint "x" (Lit 1000)                    -- int x = 1000;
    , DStmt $ VDec Tint "i" (Lit 1)                       -- int i = 1;
    , EStmt $ Assign (Var "x") (Add (Var "x") (Var "i"))  -- x <- x + 1;
    , DStmt $ VDec Tint "y" (Add (Var "x") (Lit 100))     -- int y = x + 100;
    , EStmt $ Assign (Var "i") (Add (Var "i") (Lit 1))    -- i <- i + 1;
    ]

In [17]:
extend stmt3 Map.empty
extend stmt3 $ Map.fromList [("w",0),("z",1)]

fromList [("i",1),("x",0),("y",2)]

fromList [("i",3),("w",0),("x",2),("y",4),("z",1)]

In [18]:
ρ3 = extend stmt3 Map.empty
is3 = code stmt3 ρ3

In [20]:
:type is3

In [19]:
ρ3

fromList [("i",1),("x",0),("y",2)]

In [165]:
is3 []
is3 [HALT]
is3 [DUP,POP,HALT]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 0,LOAD,LOADc 100,ADD,LOADc 2,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 0,LOAD,LOADc 100,ADD,LOADc 2,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP,HALT]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 0,LOAD,LOADc 100,ADD,LOADc 2,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP,DUP,POP,HALT]

In [21]:
run (is3 [], [0,0,0])

{ stack = [1101,2,1001]
 , code = [] }

In [167]:
Map.size ρ3
replicate (Map.size ρ3) 0

3

[0,0,0]

In [22]:
run (is3 [], replicate (Map.size ρ3) 0)

{ stack = [1101,2,1001]
 , code = [] }

<br>

이번엔 이 프로그램을 컴파일해 보자.

```c
int x = 1000;
int i = 1;
while (i <= 10) {
    x <- x + i;
    i <- i + 1;
}
```

In [23]:
stmt4 = Block 
    [ DStmt $ VDec Tint "x" (Lit 1000)  -- int x = 1000;
    , DStmt $ VDec Tint "i" (Lit 1)     -- int i = 1;
    , While (Leq (Var "i") (Lit 10)) $ Block                   -- while (i <= 10) {
        [ EStmt $ Assign (Var "x") (Add (Var "x") (Var "i")) --    x <- x + i;
        , EStmt $ Assign (Var "i") (Add (Var "i") (Lit 1))   --    i <- i + 1;
        ]                                                    -- }
    ]

In [24]:
ρ4 = extend stmt4 Map.empty
is4 = code stmt4 ρ4

In [25]:
ρ4
is4 []

fromList [("i",1),("x",0)]

[LOADc 1000,LOADc 0,STORE,POP,LOADc 1,LOADc 1,STORE,POP,LOADc 1,LOAD,LOADc 10,LEQ,JUMPz ...,LOADc 0,LOAD,LOADc 1,LOAD,ADD,LOADc 0,STORE,POP,LOADc 1,LOAD,LOADc 1,ADD,LOADc 1,STORE,POP,JUMP ...]

In [26]:
run (is4 [], replicate (Map.size ρ4) 0)

{ stack = [11,1055]
 , code = [] }

<br>

컴파일하기 전에 프로그램을 한번 훑으며 변수 선언문에 나타나는 변수들에 대한 적절한 주소환경을 자동으로 계산할 수 있다.

지금은 모든 변수가 기본타입인 정수형이다.
만일 1차원 정수 배열도 선언할 수 있게 하려면 어떻게 주소환경을 계산해야 할까?

힌트: `extendD`에서 변수 선언(`VDec ...`)에 `nextSA`를 계산할 때 어떤 점을 추가로 고려해야 할지 생각해 보라.