mirrored from https://gitlab.haskell.org/ghc/ghc.git
-
Notifications
You must be signed in to change notification settings - Fork 704
/
CodeGen.hs
3002 lines (2594 loc) · 114 KB
/
CodeGen.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{-# LANGUAGE CPP, GADTs, NondecreasingIndentation #-}
-----------------------------------------------------------------------------
--
-- Generating machine code (instruction selection)
--
-- (c) The University of Glasgow 1996-2004
--
-----------------------------------------------------------------------------
-- This is a big module, but, if you pay attention to
-- (a) the sectioning, and (b) the type signatures, the
-- structure should not be too overwhelming.
module X86.CodeGen (
cmmTopCodeGen,
generateJumpTableForInstr,
InstrBlock
)
where
#include "HsVersions.h"
#include "nativeGen/NCG.h"
#include "../includes/MachDeps.h"
-- NCG stuff:
import X86.Instr
import X86.Cond
import X86.Regs
import X86.RegInfo
import CodeGen.Platform
import CPrim
import Debug ( DebugBlock(..) )
import Instruction
import PIC
import NCGMonad
import Size
import Reg
import Platform
-- Our intermediate code:
import BasicTypes
import BlockId
import Module ( primPackageKey )
import PprCmm ()
import CmmUtils
import Cmm
import Hoopl
import CLabel
import CoreSyn ( Tickish(..) )
import SrcLoc ( srcSpanFile, srcSpanStartLine, srcSpanStartCol )
-- The rest:
import ForeignCall ( CCallConv(..) )
import OrdList
import Outputable
import Unique
import FastString
import FastBool ( isFastTrue )
import DynFlags
import Util
import Control.Monad
import Data.Bits
import Data.Int
import Data.Maybe
import Data.Word
is32BitPlatform :: NatM Bool
is32BitPlatform = do
dflags <- getDynFlags
return $ target32Bit (targetPlatform dflags)
sse2Enabled :: NatM Bool
sse2Enabled = do
dflags <- getDynFlags
return (isSse2Enabled dflags)
sse4_2Enabled :: NatM Bool
sse4_2Enabled = do
dflags <- getDynFlags
return (isSse4_2Enabled dflags)
if_sse2 :: NatM a -> NatM a -> NatM a
if_sse2 sse2 x87 = do
b <- sse2Enabled
if b then sse2 else x87
cmmTopCodeGen
:: RawCmmDecl
-> NatM [NatCmmDecl (Alignment, CmmStatics) Instr]
cmmTopCodeGen (CmmProc info lab live graph) = do
let blocks = toBlockListEntryFirst graph
(nat_blocks,statics) <- mapAndUnzipM basicBlockCodeGen blocks
picBaseMb <- getPicBaseMaybeNat
dflags <- getDynFlags
let proc = CmmProc info lab live (ListGraph $ concat nat_blocks)
tops = proc : concat statics
os = platformOS $ targetPlatform dflags
case picBaseMb of
Just picBase -> initializePicBase_x86 ArchX86 os picBase tops
Nothing -> return tops
cmmTopCodeGen (CmmData sec dat) = do
return [CmmData sec (1, dat)] -- no translation, we just use CmmStatic
basicBlockCodeGen
:: CmmBlock
-> NatM ( [NatBasicBlock Instr]
, [NatCmmDecl (Alignment, CmmStatics) Instr])
basicBlockCodeGen block = do
let (_, nodes, tail) = blockSplit block
id = entryLabel block
stmts = blockToList nodes
-- Generate location directive
dbg <- getDebugBlock (entryLabel block)
loc_instrs <- case dblSourceTick =<< dbg of
Just (SourceNote span name)
-> do fileId <- getFileId (srcSpanFile span)
let line = srcSpanStartLine span; col = srcSpanStartCol span
return $ unitOL $ LOCATION fileId line col name
_ -> return nilOL
mid_instrs <- stmtsToInstrs stmts
tail_instrs <- stmtToInstrs tail
let instrs = loc_instrs `appOL` mid_instrs `appOL` tail_instrs
-- code generation may introduce new basic block boundaries, which
-- are indicated by the NEWBLOCK instruction. We must split up the
-- instruction stream into basic blocks again. Also, we extract
-- LDATAs here too.
let
(top,other_blocks,statics) = foldrOL mkBlocks ([],[],[]) instrs
mkBlocks (NEWBLOCK id) (instrs,blocks,statics)
= ([], BasicBlock id instrs : blocks, statics)
mkBlocks (LDATA sec dat) (instrs,blocks,statics)
= (instrs, blocks, CmmData sec dat:statics)
mkBlocks instr (instrs,blocks,statics)
= (instr:instrs, blocks, statics)
return (BasicBlock id top : other_blocks, statics)
stmtsToInstrs :: [CmmNode e x] -> NatM InstrBlock
stmtsToInstrs stmts
= do instrss <- mapM stmtToInstrs stmts
return (concatOL instrss)
stmtToInstrs :: CmmNode e x -> NatM InstrBlock
stmtToInstrs stmt = do
dflags <- getDynFlags
is32Bit <- is32BitPlatform
case stmt of
CmmComment s -> return (unitOL (COMMENT s))
CmmTick {} -> return nilOL
CmmUnwind {} -> return nilOL
CmmAssign reg src
| isFloatType ty -> assignReg_FltCode size reg src
| is32Bit && isWord64 ty -> assignReg_I64Code reg src
| otherwise -> assignReg_IntCode size reg src
where ty = cmmRegType dflags reg
size = cmmTypeSize ty
CmmStore addr src
| isFloatType ty -> assignMem_FltCode size addr src
| is32Bit && isWord64 ty -> assignMem_I64Code addr src
| otherwise -> assignMem_IntCode size addr src
where ty = cmmExprType dflags src
size = cmmTypeSize ty
CmmUnsafeForeignCall target result_regs args
-> genCCall dflags is32Bit target result_regs args
CmmBranch id -> genBranch id
CmmCondBranch arg true false -> do b1 <- genCondJump true arg
b2 <- genBranch false
return (b1 `appOL` b2)
CmmSwitch arg ids -> do dflags <- getDynFlags
genSwitch dflags arg ids
CmmCall { cml_target = arg
, cml_args_regs = gregs } -> do
dflags <- getDynFlags
genJump arg (jumpRegs dflags gregs)
_ ->
panic "stmtToInstrs: statement should have been cps'd away"
jumpRegs :: DynFlags -> [GlobalReg] -> [Reg]
jumpRegs dflags gregs = [ RegReal r | Just r <- map (globalRegMaybe platform) gregs ]
where platform = targetPlatform dflags
--------------------------------------------------------------------------------
-- | 'InstrBlock's are the insn sequences generated by the insn selectors.
-- They are really trees of insns to facilitate fast appending, where a
-- left-to-right traversal yields the insns in the correct order.
--
type InstrBlock
= OrdList Instr
-- | Condition codes passed up the tree.
--
data CondCode
= CondCode Bool Cond InstrBlock
-- | a.k.a "Register64"
-- Reg is the lower 32-bit temporary which contains the result.
-- Use getHiVRegFromLo to find the other VRegUnique.
--
-- Rules of this simplified insn selection game are therefore that
-- the returned Reg may be modified
--
data ChildCode64
= ChildCode64
InstrBlock
Reg
-- | Register's passed up the tree. If the stix code forces the register
-- to live in a pre-decided machine register, it comes out as @Fixed@;
-- otherwise, it comes out as @Any@, and the parent can decide which
-- register to put it in.
--
data Register
= Fixed Size Reg InstrBlock
| Any Size (Reg -> InstrBlock)
swizzleRegisterRep :: Register -> Size -> Register
swizzleRegisterRep (Fixed _ reg code) size = Fixed size reg code
swizzleRegisterRep (Any _ codefn) size = Any size codefn
-- | Grab the Reg for a CmmReg
getRegisterReg :: Platform -> Bool -> CmmReg -> Reg
getRegisterReg _ use_sse2 (CmmLocal (LocalReg u pk))
= let sz = cmmTypeSize pk in
if isFloatSize sz && not use_sse2
then RegVirtual (mkVirtualReg u FF80)
else RegVirtual (mkVirtualReg u sz)
getRegisterReg platform _ (CmmGlobal mid)
= case globalRegMaybe platform mid of
Just reg -> RegReal $ reg
Nothing -> pprPanic "getRegisterReg-memory" (ppr $ CmmGlobal mid)
-- By this stage, the only MagicIds remaining should be the
-- ones which map to a real machine register on this
-- platform. Hence ...
-- | Memory addressing modes passed up the tree.
data Amode
= Amode AddrMode InstrBlock
{-
Now, given a tree (the argument to an CmmLoad) that references memory,
produce a suitable addressing mode.
A Rule of the Game (tm) for Amodes: use of the addr bit must
immediately follow use of the code part, since the code part puts
values in registers which the addr then refers to. So you can't put
anything in between, lest it overwrite some of those registers. If
you need to do some other computation between the code part and use of
the addr bit, first store the effective address from the amode in a
temporary, then do the other computation, and then use the temporary:
code
LEA amode, tmp
... other computation ...
... (tmp) ...
-}
-- | Check whether an integer will fit in 32 bits.
-- A CmmInt is intended to be truncated to the appropriate
-- number of bits, so here we truncate it to Int64. This is
-- important because e.g. -1 as a CmmInt might be either
-- -1 or 18446744073709551615.
--
is32BitInteger :: Integer -> Bool
is32BitInteger i = i64 <= 0x7fffffff && i64 >= -0x80000000
where i64 = fromIntegral i :: Int64
-- | Convert a BlockId to some CmmStatic data
jumpTableEntry :: DynFlags -> Maybe BlockId -> CmmStatic
jumpTableEntry dflags Nothing = CmmStaticLit (CmmInt 0 (wordWidth dflags))
jumpTableEntry _ (Just blockid) = CmmStaticLit (CmmLabel blockLabel)
where blockLabel = mkAsmTempLabel (getUnique blockid)
-- -----------------------------------------------------------------------------
-- General things for putting together code sequences
-- Expand CmmRegOff. ToDo: should we do it this way around, or convert
-- CmmExprs into CmmRegOff?
mangleIndexTree :: DynFlags -> CmmReg -> Int -> CmmExpr
mangleIndexTree dflags reg off
= CmmMachOp (MO_Add width) [CmmReg reg, CmmLit (CmmInt (fromIntegral off) width)]
where width = typeWidth (cmmRegType dflags reg)
-- | The dual to getAnyReg: compute an expression into a register, but
-- we don't mind which one it is.
getSomeReg :: CmmExpr -> NatM (Reg, InstrBlock)
getSomeReg expr = do
r <- getRegister expr
case r of
Any rep code -> do
tmp <- getNewRegNat rep
return (tmp, code tmp)
Fixed _ reg code ->
return (reg, code)
assignMem_I64Code :: CmmExpr -> CmmExpr -> NatM InstrBlock
assignMem_I64Code addrTree valueTree = do
Amode addr addr_code <- getAmode addrTree
ChildCode64 vcode rlo <- iselExpr64 valueTree
let
rhi = getHiVRegFromLo rlo
-- Little-endian store
mov_lo = MOV II32 (OpReg rlo) (OpAddr addr)
mov_hi = MOV II32 (OpReg rhi) (OpAddr (fromJust (addrOffset addr 4)))
return (vcode `appOL` addr_code `snocOL` mov_lo `snocOL` mov_hi)
assignReg_I64Code :: CmmReg -> CmmExpr -> NatM InstrBlock
assignReg_I64Code (CmmLocal (LocalReg u_dst _)) valueTree = do
ChildCode64 vcode r_src_lo <- iselExpr64 valueTree
let
r_dst_lo = RegVirtual $ mkVirtualReg u_dst II32
r_dst_hi = getHiVRegFromLo r_dst_lo
r_src_hi = getHiVRegFromLo r_src_lo
mov_lo = MOV II32 (OpReg r_src_lo) (OpReg r_dst_lo)
mov_hi = MOV II32 (OpReg r_src_hi) (OpReg r_dst_hi)
return (
vcode `snocOL` mov_lo `snocOL` mov_hi
)
assignReg_I64Code _ _
= panic "assignReg_I64Code(i386): invalid lvalue"
iselExpr64 :: CmmExpr -> NatM ChildCode64
iselExpr64 (CmmLit (CmmInt i _)) = do
(rlo,rhi) <- getNewRegPairNat II32
let
r = fromIntegral (fromIntegral i :: Word32)
q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
code = toOL [
MOV II32 (OpImm (ImmInteger r)) (OpReg rlo),
MOV II32 (OpImm (ImmInteger q)) (OpReg rhi)
]
return (ChildCode64 code rlo)
iselExpr64 (CmmLoad addrTree ty) | isWord64 ty = do
Amode addr addr_code <- getAmode addrTree
(rlo,rhi) <- getNewRegPairNat II32
let
mov_lo = MOV II32 (OpAddr addr) (OpReg rlo)
mov_hi = MOV II32 (OpAddr (fromJust (addrOffset addr 4))) (OpReg rhi)
return (
ChildCode64 (addr_code `snocOL` mov_lo `snocOL` mov_hi)
rlo
)
iselExpr64 (CmmReg (CmmLocal (LocalReg vu ty))) | isWord64 ty
= return (ChildCode64 nilOL (RegVirtual $ mkVirtualReg vu II32))
-- we handle addition, but rather badly
iselExpr64 (CmmMachOp (MO_Add _) [e1, CmmLit (CmmInt i _)]) = do
ChildCode64 code1 r1lo <- iselExpr64 e1
(rlo,rhi) <- getNewRegPairNat II32
let
r = fromIntegral (fromIntegral i :: Word32)
q = fromIntegral (fromIntegral (i `shiftR` 32) :: Word32)
r1hi = getHiVRegFromLo r1lo
code = code1 `appOL`
toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
ADD II32 (OpImm (ImmInteger r)) (OpReg rlo),
MOV II32 (OpReg r1hi) (OpReg rhi),
ADC II32 (OpImm (ImmInteger q)) (OpReg rhi) ]
return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_Add _) [e1,e2]) = do
ChildCode64 code1 r1lo <- iselExpr64 e1
ChildCode64 code2 r2lo <- iselExpr64 e2
(rlo,rhi) <- getNewRegPairNat II32
let
r1hi = getHiVRegFromLo r1lo
r2hi = getHiVRegFromLo r2lo
code = code1 `appOL`
code2 `appOL`
toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
ADD II32 (OpReg r2lo) (OpReg rlo),
MOV II32 (OpReg r1hi) (OpReg rhi),
ADC II32 (OpReg r2hi) (OpReg rhi) ]
return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_Sub _) [e1,e2]) = do
ChildCode64 code1 r1lo <- iselExpr64 e1
ChildCode64 code2 r2lo <- iselExpr64 e2
(rlo,rhi) <- getNewRegPairNat II32
let
r1hi = getHiVRegFromLo r1lo
r2hi = getHiVRegFromLo r2lo
code = code1 `appOL`
code2 `appOL`
toOL [ MOV II32 (OpReg r1lo) (OpReg rlo),
SUB II32 (OpReg r2lo) (OpReg rlo),
MOV II32 (OpReg r1hi) (OpReg rhi),
SBB II32 (OpReg r2hi) (OpReg rhi) ]
return (ChildCode64 code rlo)
iselExpr64 (CmmMachOp (MO_UU_Conv _ W64) [expr]) = do
fn <- getAnyReg expr
r_dst_lo <- getNewRegNat II32
let r_dst_hi = getHiVRegFromLo r_dst_lo
code = fn r_dst_lo
return (
ChildCode64 (code `snocOL`
MOV II32 (OpImm (ImmInt 0)) (OpReg r_dst_hi))
r_dst_lo
)
iselExpr64 expr
= pprPanic "iselExpr64(i386)" (ppr expr)
--------------------------------------------------------------------------------
getRegister :: CmmExpr -> NatM Register
getRegister e = do dflags <- getDynFlags
is32Bit <- is32BitPlatform
getRegister' dflags is32Bit e
getRegister' :: DynFlags -> Bool -> CmmExpr -> NatM Register
getRegister' dflags is32Bit (CmmReg reg)
= case reg of
CmmGlobal PicBaseReg
| is32Bit ->
-- on x86_64, we have %rip for PicBaseReg, but it's not
-- a full-featured register, it can only be used for
-- rip-relative addressing.
do reg' <- getPicBaseNat (archWordSize is32Bit)
return (Fixed (archWordSize is32Bit) reg' nilOL)
_ ->
do use_sse2 <- sse2Enabled
let
sz = cmmTypeSize (cmmRegType dflags reg)
size | not use_sse2 && isFloatSize sz = FF80
| otherwise = sz
--
let platform = targetPlatform dflags
return (Fixed size (getRegisterReg platform use_sse2 reg) nilOL)
getRegister' dflags is32Bit (CmmRegOff r n)
= getRegister' dflags is32Bit $ mangleIndexTree dflags r n
-- for 32-bit architectuers, support some 64 -> 32 bit conversions:
-- TO_W_(x), TO_W_(x >> 32)
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32)
[CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
| is32Bit = do
ChildCode64 code rlo <- iselExpr64 x
return $ Fixed II32 (getHiVRegFromLo rlo) code
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32)
[CmmMachOp (MO_U_Shr W64) [x,CmmLit (CmmInt 32 _)]])
| is32Bit = do
ChildCode64 code rlo <- iselExpr64 x
return $ Fixed II32 (getHiVRegFromLo rlo) code
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W64 W32) [x])
| is32Bit = do
ChildCode64 code rlo <- iselExpr64 x
return $ Fixed II32 rlo code
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W64 W32) [x])
| is32Bit = do
ChildCode64 code rlo <- iselExpr64 x
return $ Fixed II32 rlo code
getRegister' _ _ (CmmLit lit@(CmmFloat f w)) =
if_sse2 float_const_sse2 float_const_x87
where
float_const_sse2
| f == 0.0 = do
let
size = floatSize w
code dst = unitOL (XOR size (OpReg dst) (OpReg dst))
-- I don't know why there are xorpd, xorps, and pxor instructions.
-- They all appear to do the same thing --SDM
return (Any size code)
| otherwise = do
Amode addr code <- memConstant (widthInBytes w) lit
loadFloatAmode True w addr code
float_const_x87 = case w of
W64
| f == 0.0 ->
let code dst = unitOL (GLDZ dst)
in return (Any FF80 code)
| f == 1.0 ->
let code dst = unitOL (GLD1 dst)
in return (Any FF80 code)
_otherwise -> do
Amode addr code <- memConstant (widthInBytes w) lit
loadFloatAmode False w addr code
-- catch simple cases of zero- or sign-extended load
getRegister' _ _ (CmmMachOp (MO_UU_Conv W8 W32) [CmmLoad addr _]) = do
code <- intLoadCode (MOVZxL II8) addr
return (Any II32 code)
getRegister' _ _ (CmmMachOp (MO_SS_Conv W8 W32) [CmmLoad addr _]) = do
code <- intLoadCode (MOVSxL II8) addr
return (Any II32 code)
getRegister' _ _ (CmmMachOp (MO_UU_Conv W16 W32) [CmmLoad addr _]) = do
code <- intLoadCode (MOVZxL II16) addr
return (Any II32 code)
getRegister' _ _ (CmmMachOp (MO_SS_Conv W16 W32) [CmmLoad addr _]) = do
code <- intLoadCode (MOVSxL II16) addr
return (Any II32 code)
-- catch simple cases of zero- or sign-extended load
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W8 W64) [CmmLoad addr _])
| not is32Bit = do
code <- intLoadCode (MOVZxL II8) addr
return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W8 W64) [CmmLoad addr _])
| not is32Bit = do
code <- intLoadCode (MOVSxL II8) addr
return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W16 W64) [CmmLoad addr _])
| not is32Bit = do
code <- intLoadCode (MOVZxL II16) addr
return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W16 W64) [CmmLoad addr _])
| not is32Bit = do
code <- intLoadCode (MOVSxL II16) addr
return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_UU_Conv W32 W64) [CmmLoad addr _])
| not is32Bit = do
code <- intLoadCode (MOV II32) addr -- 32-bit loads zero-extend
return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_SS_Conv W32 W64) [CmmLoad addr _])
| not is32Bit = do
code <- intLoadCode (MOVSxL II32) addr
return (Any II64 code)
getRegister' _ is32Bit (CmmMachOp (MO_Add W64) [CmmReg (CmmGlobal PicBaseReg),
CmmLit displacement])
| not is32Bit = do
return $ Any II64 (\dst -> unitOL $
LEA II64 (OpAddr (ripRel (litToImm displacement))) (OpReg dst))
getRegister' dflags is32Bit (CmmMachOp mop [x]) = do -- unary MachOps
sse2 <- sse2Enabled
case mop of
MO_F_Neg w
| sse2 -> sse2NegCode w x
| otherwise -> trivialUFCode FF80 (GNEG FF80) x
MO_S_Neg w -> triv_ucode NEGI (intSize w)
MO_Not w -> triv_ucode NOT (intSize w)
-- Nop conversions
MO_UU_Conv W32 W8 -> toI8Reg W32 x
MO_SS_Conv W32 W8 -> toI8Reg W32 x
MO_UU_Conv W16 W8 -> toI8Reg W16 x
MO_SS_Conv W16 W8 -> toI8Reg W16 x
MO_UU_Conv W32 W16 -> toI16Reg W32 x
MO_SS_Conv W32 W16 -> toI16Reg W32 x
MO_UU_Conv W64 W32 | not is32Bit -> conversionNop II64 x
MO_SS_Conv W64 W32 | not is32Bit -> conversionNop II64 x
MO_UU_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
MO_SS_Conv W64 W16 | not is32Bit -> toI16Reg W64 x
MO_UU_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
MO_SS_Conv W64 W8 | not is32Bit -> toI8Reg W64 x
MO_UU_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
MO_SS_Conv rep1 rep2 | rep1 == rep2 -> conversionNop (intSize rep1) x
-- widenings
MO_UU_Conv W8 W32 -> integerExtend W8 W32 MOVZxL x
MO_UU_Conv W16 W32 -> integerExtend W16 W32 MOVZxL x
MO_UU_Conv W8 W16 -> integerExtend W8 W16 MOVZxL x
MO_SS_Conv W8 W32 -> integerExtend W8 W32 MOVSxL x
MO_SS_Conv W16 W32 -> integerExtend W16 W32 MOVSxL x
MO_SS_Conv W8 W16 -> integerExtend W8 W16 MOVSxL x
MO_UU_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVZxL x
MO_UU_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVZxL x
MO_UU_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVZxL x
MO_SS_Conv W8 W64 | not is32Bit -> integerExtend W8 W64 MOVSxL x
MO_SS_Conv W16 W64 | not is32Bit -> integerExtend W16 W64 MOVSxL x
MO_SS_Conv W32 W64 | not is32Bit -> integerExtend W32 W64 MOVSxL x
-- for 32-to-64 bit zero extension, amd64 uses an ordinary movl.
-- However, we don't want the register allocator to throw it
-- away as an unnecessary reg-to-reg move, so we keep it in
-- the form of a movzl and print it as a movl later.
MO_FF_Conv W32 W64
| sse2 -> coerceFP2FP W64 x
| otherwise -> conversionNop FF80 x
MO_FF_Conv W64 W32 -> coerceFP2FP W32 x
MO_FS_Conv from to -> coerceFP2Int from to x
MO_SF_Conv from to -> coerceInt2FP from to x
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VU_Quot {} -> needLlvm
MO_VU_Rem {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister" (pprMachOp mop)
where
triv_ucode :: (Size -> Operand -> Instr) -> Size -> NatM Register
triv_ucode instr size = trivialUCode size (instr size) x
-- signed or unsigned extension.
integerExtend :: Width -> Width
-> (Size -> Operand -> Operand -> Instr)
-> CmmExpr -> NatM Register
integerExtend from to instr expr = do
(reg,e_code) <- if from == W8 then getByteReg expr
else getSomeReg expr
let
code dst =
e_code `snocOL`
instr (intSize from) (OpReg reg) (OpReg dst)
return (Any (intSize to) code)
toI8Reg :: Width -> CmmExpr -> NatM Register
toI8Reg new_rep expr
= do codefn <- getAnyReg expr
return (Any (intSize new_rep) codefn)
-- HACK: use getAnyReg to get a byte-addressable register.
-- If the source was a Fixed register, this will add the
-- mov instruction to put it into the desired destination.
-- We're assuming that the destination won't be a fixed
-- non-byte-addressable register; it won't be, because all
-- fixed registers are word-sized.
toI16Reg = toI8Reg -- for now
conversionNop :: Size -> CmmExpr -> NatM Register
conversionNop new_size expr
= do e_code <- getRegister' dflags is32Bit expr
return (swizzleRegisterRep e_code new_size)
getRegister' _ is32Bit (CmmMachOp mop [x, y]) = do -- dyadic MachOps
sse2 <- sse2Enabled
case mop of
MO_F_Eq _ -> condFltReg is32Bit EQQ x y
MO_F_Ne _ -> condFltReg is32Bit NE x y
MO_F_Gt _ -> condFltReg is32Bit GTT x y
MO_F_Ge _ -> condFltReg is32Bit GE x y
MO_F_Lt _ -> condFltReg is32Bit LTT x y
MO_F_Le _ -> condFltReg is32Bit LE x y
MO_Eq _ -> condIntReg EQQ x y
MO_Ne _ -> condIntReg NE x y
MO_S_Gt _ -> condIntReg GTT x y
MO_S_Ge _ -> condIntReg GE x y
MO_S_Lt _ -> condIntReg LTT x y
MO_S_Le _ -> condIntReg LE x y
MO_U_Gt _ -> condIntReg GU x y
MO_U_Ge _ -> condIntReg GEU x y
MO_U_Lt _ -> condIntReg LU x y
MO_U_Le _ -> condIntReg LEU x y
MO_F_Add w | sse2 -> trivialFCode_sse2 w ADD x y
| otherwise -> trivialFCode_x87 GADD x y
MO_F_Sub w | sse2 -> trivialFCode_sse2 w SUB x y
| otherwise -> trivialFCode_x87 GSUB x y
MO_F_Quot w | sse2 -> trivialFCode_sse2 w FDIV x y
| otherwise -> trivialFCode_x87 GDIV x y
MO_F_Mul w | sse2 -> trivialFCode_sse2 w MUL x y
| otherwise -> trivialFCode_x87 GMUL x y
MO_Add rep -> add_code rep x y
MO_Sub rep -> sub_code rep x y
MO_S_Quot rep -> div_code rep True True x y
MO_S_Rem rep -> div_code rep True False x y
MO_U_Quot rep -> div_code rep False True x y
MO_U_Rem rep -> div_code rep False False x y
MO_S_MulMayOflo rep -> imulMayOflo rep x y
MO_Mul rep -> triv_op rep IMUL
MO_And rep -> triv_op rep AND
MO_Or rep -> triv_op rep OR
MO_Xor rep -> triv_op rep XOR
{- Shift ops on x86s have constraints on their source, it
either has to be Imm, CL or 1
=> trivialCode is not restrictive enough (sigh.)
-}
MO_Shl rep -> shift_code rep SHL x y {-False-}
MO_U_Shr rep -> shift_code rep SHR x y {-False-}
MO_S_Shr rep -> shift_code rep SAR x y {-False-}
MO_V_Insert {} -> needLlvm
MO_V_Extract {} -> needLlvm
MO_V_Add {} -> needLlvm
MO_V_Sub {} -> needLlvm
MO_V_Mul {} -> needLlvm
MO_VS_Quot {} -> needLlvm
MO_VS_Rem {} -> needLlvm
MO_VS_Neg {} -> needLlvm
MO_VF_Insert {} -> needLlvm
MO_VF_Extract {} -> needLlvm
MO_VF_Add {} -> needLlvm
MO_VF_Sub {} -> needLlvm
MO_VF_Mul {} -> needLlvm
MO_VF_Quot {} -> needLlvm
MO_VF_Neg {} -> needLlvm
_other -> pprPanic "getRegister(x86) - binary CmmMachOp (1)" (pprMachOp mop)
where
--------------------
triv_op width instr = trivialCode width op (Just op) x y
where op = instr (intSize width)
imulMayOflo :: Width -> CmmExpr -> CmmExpr -> NatM Register
imulMayOflo rep a b = do
(a_reg, a_code) <- getNonClobberedReg a
b_code <- getAnyReg b
let
shift_amt = case rep of
W32 -> 31
W64 -> 63
_ -> panic "shift_amt"
size = intSize rep
code = a_code `appOL` b_code eax `appOL`
toOL [
IMUL2 size (OpReg a_reg), -- result in %edx:%eax
SAR size (OpImm (ImmInt shift_amt)) (OpReg eax),
-- sign extend lower part
SUB size (OpReg edx) (OpReg eax)
-- compare against upper
-- eax==0 if high part == sign extended low part
]
return (Fixed size eax code)
--------------------
shift_code :: Width
-> (Size -> Operand -> Operand -> Instr)
-> CmmExpr
-> CmmExpr
-> NatM Register
{- Case1: shift length as immediate -}
shift_code width instr x (CmmLit lit) = do
x_code <- getAnyReg x
let
size = intSize width
code dst
= x_code dst `snocOL`
instr size (OpImm (litToImm lit)) (OpReg dst)
return (Any size code)
{- Case2: shift length is complex (non-immediate)
* y must go in %ecx.
* we cannot do y first *and* put its result in %ecx, because
%ecx might be clobbered by x.
* if we do y second, then x cannot be
in a clobbered reg. Also, we cannot clobber x's reg
with the instruction itself.
* so we can either:
- do y first, put its result in a fresh tmp, then copy it to %ecx later
- do y second and put its result into %ecx. x gets placed in a fresh
tmp. This is likely to be better, because the reg alloc can
eliminate this reg->reg move here (it won't eliminate the other one,
because the move is into the fixed %ecx).
-}
shift_code width instr x y{-amount-} = do
x_code <- getAnyReg x
let size = intSize width
tmp <- getNewRegNat size
y_code <- getAnyReg y
let
code = x_code tmp `appOL`
y_code ecx `snocOL`
instr size (OpReg ecx) (OpReg tmp)
return (Fixed size tmp code)
--------------------
add_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
add_code rep x (CmmLit (CmmInt y _))
| is32BitInteger y = add_int rep x y
add_code rep x y = trivialCode rep (ADD size) (Just (ADD size)) x y
where size = intSize rep
-- TODO: There are other interesting patterns we want to replace
-- with a LEA, e.g. `(x + offset) + (y << shift)`.
--------------------
sub_code :: Width -> CmmExpr -> CmmExpr -> NatM Register
sub_code rep x (CmmLit (CmmInt y _))
| is32BitInteger (-y) = add_int rep x (-y)
sub_code rep x y = trivialCode rep (SUB (intSize rep)) Nothing x y
-- our three-operand add instruction:
add_int width x y = do
(x_reg, x_code) <- getSomeReg x
let
size = intSize width
imm = ImmInt (fromInteger y)
code dst
= x_code `snocOL`
LEA size
(OpAddr (AddrBaseIndex (EABaseReg x_reg) EAIndexNone imm))
(OpReg dst)
--
return (Any size code)
----------------------
div_code width signed quotient x y = do
(y_op, y_code) <- getRegOrMem y -- cannot be clobbered
x_code <- getAnyReg x
let
size = intSize width
widen | signed = CLTD size
| otherwise = XOR size (OpReg edx) (OpReg edx)
instr | signed = IDIV
| otherwise = DIV
code = y_code `appOL`
x_code eax `appOL`
toOL [widen, instr size y_op]
result | quotient = eax
| otherwise = edx
return (Fixed size result code)
getRegister' _ _ (CmmLoad mem pk)
| isFloatType pk
= do
Amode addr mem_code <- getAmode mem
use_sse2 <- sse2Enabled
loadFloatAmode use_sse2 (typeWidth pk) addr mem_code
getRegister' _ is32Bit (CmmLoad mem pk)
| is32Bit && not (isWord64 pk)
= do
code <- intLoadCode instr mem
return (Any size code)
where
width = typeWidth pk
size = intSize width
instr = case width of
W8 -> MOVZxL II8
_other -> MOV size
-- We always zero-extend 8-bit loads, if we
-- can't think of anything better. This is because
-- we can't guarantee access to an 8-bit variant of every register
-- (esi and edi don't have 8-bit variants), so to make things
-- simpler we do our 8-bit arithmetic with full 32-bit registers.
-- Simpler memory load code on x86_64
getRegister' _ is32Bit (CmmLoad mem pk)
| not is32Bit
= do
code <- intLoadCode (MOV size) mem
return (Any size code)
where size = intSize $ typeWidth pk
getRegister' _ is32Bit (CmmLit (CmmInt 0 width))
= let
size = intSize width
-- x86_64: 32-bit xor is one byte shorter, and zero-extends to 64 bits
size1 = if is32Bit then size
else case size of
II64 -> II32
_ -> size
code dst
= unitOL (XOR size1 (OpReg dst) (OpReg dst))
in
return (Any size code)
-- optimisation for loading small literals on x86_64: take advantage
-- of the automatic zero-extension from 32 to 64 bits, because the 32-bit
-- instruction forms are shorter.
getRegister' dflags is32Bit (CmmLit lit)
| not is32Bit, isWord64 (cmmLitType dflags lit), not (isBigLit lit)
= let
imm = litToImm lit
code dst = unitOL (MOV II32 (OpImm imm) (OpReg dst))
in
return (Any II64 code)
where
isBigLit (CmmInt i _) = i < 0 || i > 0xffffffff
isBigLit _ = False
-- note1: not the same as (not.is32BitLit), because that checks for
-- signed literals that fit in 32 bits, but we want unsigned
-- literals here.
-- note2: all labels are small, because we're assuming the
-- small memory model (see gcc docs, -mcmodel=small).
getRegister' dflags _ (CmmLit lit)
= do let size = cmmTypeSize (cmmLitType dflags lit)
imm = litToImm lit
code dst = unitOL (MOV size (OpImm imm) (OpReg dst))
return (Any size code)
getRegister' _ _ other
| isVecExpr other = needLlvm
| otherwise = pprPanic "getRegister(x86)" (ppr other)
intLoadCode :: (Operand -> Operand -> Instr) -> CmmExpr
-> NatM (Reg -> InstrBlock)
intLoadCode instr mem = do
Amode src mem_code <- getAmode mem
return (\dst -> mem_code `snocOL` instr (OpAddr src) (OpReg dst))
-- Compute an expression into *any* register, adding the appropriate
-- move instruction if necessary.
getAnyReg :: CmmExpr -> NatM (Reg -> InstrBlock)
getAnyReg expr = do
r <- getRegister expr
anyReg r
anyReg :: Register -> NatM (Reg -> InstrBlock)
anyReg (Any _ code) = return code
anyReg (Fixed rep reg fcode) = return (\dst -> fcode `snocOL` reg2reg rep reg dst)
-- A bit like getSomeReg, but we want a reg that can be byte-addressed.
-- Fixed registers might not be byte-addressable, so we make sure we've
-- got a temporary, inserting an extra reg copy if necessary.
getByteReg :: CmmExpr -> NatM (Reg, InstrBlock)
getByteReg expr = do
is32Bit <- is32BitPlatform
if is32Bit
then do r <- getRegister expr
case r of
Any rep code -> do
tmp <- getNewRegNat rep
return (tmp, code tmp)
Fixed rep reg code
| isVirtualReg reg -> return (reg,code)
| otherwise -> do
tmp <- getNewRegNat rep
return (tmp, code `snocOL` reg2reg rep reg tmp)
-- ToDo: could optimise slightly by checking for
-- byte-addressable real registers, but that will
-- happen very rarely if at all.
else getSomeReg expr -- all regs are byte-addressable on x86_64
-- Another variant: this time we want the result in a register that cannot
-- be modified by code to evaluate an arbitrary expression.
getNonClobberedReg :: CmmExpr -> NatM (Reg, InstrBlock)
getNonClobberedReg expr = do