Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Labels in disassembly.

  • Loading branch information...
commit f9889f109a062a22febea6f70f1c3f40e3a6107d 1 parent e44a625
@amtal authored
Showing with 95 additions and 9 deletions.
  1. +91 −5 DCPU16/Disassembler.hs
  2. +4 −4 README.md
View
96 DCPU16/Disassembler.hs
@@ -5,22 +5,45 @@
-- It won't be useful for command line tools, but make more sense for
-- everything else.
module DCPU16.Disassembler
- ( disassemble
+ (
+ -- * High level disassembly
+ disassemble
+ -- * Component functions
+ , readInstructions
+ , annotate
) where
-import DCPU16.Instructions
+import Data.Word (Word16)
+import Data.List (sort)
import Data.ByteString (ByteString)
-import qualified Data.Vector as V
+import qualified Data.ByteString.Char8 as B
import Data.Vector (Vector)
+import qualified Data.Vector as V
+import Text.Printf
import Data.Serialize
+import DCPU16.Instructions
+import DCPU16.Instructions.Size
-- | Disassembler.
--
+-- Annotates result with labels for obvious jumps/calls, but keeps operand
+-- addresses as literals.
+disassemble :: ByteString -> Vector Instruction
+disassemble = annotate . readInstructions
+
+
+
+-- | Naive disassembly.
+--
-- Disassembly can fail at the very end of a buffer, if a partial multi-word
-- instruction is encountered. I'll eventually treat those as data words
-- instead, but for now it crashes... And is a point of weakness to keep in
-- mind for interpreters. What will the official one do?
-disassemble :: ByteString -> Vector Instruction
-disassemble s = case runGet (f []) s of
+--
+-- It can also produce gibberish if data words are inserted that look like
+-- multi-word instructions. This can be hard to catch, and will throw naive
+-- disassemblers.
+readInstructions :: ByteString -> Vector Instruction
+readInstructions s = case runGet (f []) s of
Right is -> V.fromList . reverse $ is
Left s -> error $ "Disassembly should never, ever, ever fail, but: "++s
where
@@ -31,3 +54,66 @@ disassemble s = case runGet (f []) s of
0 -> return
_ -> f
next (i:acc)
+
+
+-- | Add easily visible labels to instructions.
+--
+-- Stuff like JSR foo / SET PC, bar / SUB PC, 5
+--
+-- Labels are not inserted as operands, only defined. The names have the
+-- address in them, to match up with operands.
+annotate :: Vector Instruction -> Vector Instruction
+annotate is = V.fromList . insertLabels ls . V.toList $ is where
+ ls :: [(Word16,LabelType)]
+ ls = extractLabels is
+
+
+data LabelType = Call | DirectJump | RelativeJump deriving (Eq,Show,Read,Ord)
+
+-- | Get destinations of all JSR 'calls'.
+--
+-- Assumes there are no labels, only literals. Any label operands will be
+-- skipped.
+extractLabels :: Vector Instruction -> [(Word16,LabelType)]
+extractLabels = snd . foldl f (0,[]) . V.toList where
+ -- TODO: factor out size-fold pattern? how to handle instruction size
+ -- failure?
+ f (addr,acc) i = (addr',acc') where
+ addr' = addr + size i
+ acc' = accLabels acc addr' i
+ -- labels extracted:
+ accLabels acc addr (NonBasic JSR l) = (lit l, Call):acc
+ accLabels acc addr (Basic SET PC l) | isLit l =
+ (lit l, DirectJump):acc
+ accLabels acc addr (Basic ADD PC l) | isLit l =
+ (lit l+addr+1, RelativeJump):acc
+ accLabels acc addr (Basic SUB PC l) | isLit l =
+ (lit l+addr+1, RelativeJump):acc
+ accLabels acc _ _ = acc
+ -- utility
+ isLit (DirectLiteral (Const _)) = True
+ isLit (ShortLiteral (Const _)) = True
+ isLit _ = False
+ lit (DirectLiteral (Const w)) = w
+ lit (ShortLiteral (Const w)) = w
+
+-- | Insert labels.
+--
+-- Assumes instructions start at 0x0000, and label addresses are only within
+-- the range of instructions.
+insertLabels :: [(Word16,LabelType)] -> [Instruction] -> [Instruction]
+insertLabels ls is = reverse $ f (sort ls) is 0 [] where
+ f labels [] _ acc = acc
+ f [] is _ acc = reverse is ++ acc
+ f (l@(laddr,_):ls) (i:is) addr acc = f labels is addr' acc' where
+ addr' = addr+size i
+ (acc',labels) = if laddr>=addr && laddr<addr'
+ then (i:Label (showLabel l):acc, ls)
+ else (i:acc, l:ls)
+ showLabel :: (Word16,LabelType) -> ByteString
+ showLabel (w,lt) = B.pack $ printf fmt (toInteger w) where
+ fmt = case lt of
+ Call -> "func.%04x"
+ DirectJump -> "jump.%04x"
+ RelativeJump -> "rel.%04x"
+
View
8 README.md
@@ -46,7 +46,7 @@ augur@niflheim:~/code/0x10c/soyuz$ ./soyuz asm/notchspec.asm -a -h --no-optimize
0020: 7dc1 0021
```
-The disassembler mode is straightforward. I'll add function/jump labeling soon, but heuristics to avoid mixed in dat instructions and other pitfalls will have to wait.
+The disassembler mode is straightforward. No heuristics to avoid mixed in dat instructions and other pitfalls - but it does add obvious labels.
```
augur@niflheim:~/code/0x10c/soyuz$ ./soyuz asm/notchspec.asm -a -o notch.bin
@@ -58,16 +58,16 @@ augur@niflheim:~/code/0x10c/soyuz$ ./soyuz notch.bin -d
set pc, 0x17
set i, 0xa
set a, 0x2000
- set [i], [0x2000+a]
+:jump.000c set [i], [0x2000+a]
sub i, 1
ifn i, 0
set pc, 0xc
set x, 4
jsr 0x15
set pc, 0x17
- shl x, 4
+:func.0015 shl x, 4
set pc, pop
- set pc, 0x17
+:jump.0017 set pc, 0x17
```
Finally, pretty-print mode consistently re-formats the input.
Please sign in to comment.
Something went wrong with that request. Please try again.