Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
187 lines (154 sloc) 3.25 KB
bits 64
section .text
global _start
;default rel
%include ""
; registers that we care about:
; rax - TOS
; rsi - pointer to second item on stack
; rsp - top of return stack
; linux x64 syscall abi
; rax: syscall#
; rdi rsi rdx r10 r8 r9: arg1-6
; rcx r11: clobbered
%macro dup 0
lea rsi, [rsi - 8]
mov [rsi], rax
%macro drop 0
; ( syscall# arg1 arg2 arg3 -- kernelret )
; syscall# already in rax
mov rdi, [rsi]
mov rcx, [rsi + 8] ; tmp, move to rsi later
mov rdx, [rsi + 16]
lea rsi, [rsi + 24] ; pop the data stack
movq mm0, rsi ; temporary place to stash it
mov rsi, rcx
movq rsi, mm0
; : key 1 tib 0 sys_read syscall3 drop tib c@ ;
mov eax, 1
mov rax, tib
mov eax, 0 ; STDIN_FILENO
mov eax, sys_read
call syscall3
drop ; kernel ret
mov al, [tib]
; : emit tob ! 1 tob 1 sys_write syscall3 drop ;
mov [tob], al
mov eax, 1 ; count
mov rax, tob ; buf
mov eax, 2 ; stderr fd (no buffering)
mov eax, sys_write
call syscall3
drop ; ignore the kernelret
; TODO write this routine in LLVM IR and see what the LLVM codegen can come
; up with
; h.
; most of the work is just to unpack each nybble into its own byte
bswap rax ; want to print out MSB first
mov rcx, 0xF0F0F0F0F0F0F0F0
and rcx, rax
xor rax, rcx ; low nybbles of old rax
shr rcx, 4 ; high nybbles of old rax
movq xmm0, rax
movq xmm1, rcx
punpcklbw xmm1, xmm0
movdqa xmm0, [hexdigits]
pshufb xmm0, xmm1
movdqa [tob], xmm0
mov eax, 16 ; count
jmp emit_n
; different ways of doing the same thing
; mov ecx, 0xF0F0F0F0
; shrd rcx, rcx, 32 ; fill up the rest
; mov rcx, 0xF0F0F0F0F0F0F0F0
; pcmpeqw mm0, mm0 ; set mm0 to all 0xFFF...
; psllw mm0, 8
; movq rax, mm0
; mov eax, 0xF0F0F0F0
; movd mm0, eax
; punpcklbw mm0, mm0
; mov al, 0xF0
; movd mm0, eax
; pxor mm1, mm1
; pshufb mm0, mm1
; vector idioms
; set a vector register to all 0's
; pxor xmm0, xmm0
; set a vector register to all 1's
; pcmpeq[bwd] xmm0, xmm0
; extend low {byte,word,dword,qword}-wise pattern to high
; punpckl{bw,wd,dq,qdq} xmm0, xmm0
; repeatedly doing pshufb on itself to a register effectively amounts to
; permuting it, based on itself. This could be interesting if there are
; multiple permutation cycles (of different period) inside the vector
; register
; in general, pshufb can compose permutations
; to broadcast the lowest byte of xmm0, do
; punpcklbw xmm0, xmm0
; punpcklbw xmm0, xmm0 ; punpcklwd would work too, but prob be slower
; punpcklbw xmm0, xmm0
; punpcklbw xmm0, xmm0
; or if a scratch register is available:
; pxor xmm1, xmm1
; pshufb xmm0, xmm1
; to propagate the lowest word, do
; punpcklwd xmm0, xmm0
; punpcklwd xmm0, xmm0
; punpcklwd xmm0, xmm0
cld ; data stack grows down
mov rsi, rsp
sub rsi, 0x1000 ; put data stack one page below return stack
call init
mov rax, 0xdeadbeefcafebabe
call hexdot
mov r13, 5
call key
call hexdot
call emit
sub r13, 1
test r13, r13
jnz .loop
mov edi, eax
xor eax, eax
mov eax, sys_exit
section .data
align 16
; text input buffer and text output buffer
tib times 128 db 0
tob times 128 db 0
db '0123456789abcdef'
; note to self: mmap takes 6 args