mirrored from https://gitlab.haskell.org/ghc/ghc.git
/
Linker.c
1984 lines (1723 loc) · 60.9 KB
/
Linker.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* -----------------------------------------------------------------------------
*
* (c) The GHC Team, 2000-2012
*
* RTS Object Linker
*
* ---------------------------------------------------------------------------*/
#if 0
#include "rts/PosixSource.h"
#endif
#include "Rts.h"
#include "HsFFI.h"
#include "sm/Storage.h"
#include "Stats.h"
#include "Hash.h"
#include "LinkerInternals.h"
#include "RtsUtils.h"
#include "Trace.h"
#include "StgPrimFloat.h" // for __int_encodeFloat etc.
#include "Proftimer.h"
#include "GetEnv.h"
#include "StablePtr.h"
#include "RtsSymbols.h"
#include "RtsSymbolInfo.h"
#include "Profiling.h"
#include "ForeignExports.h"
#include "sm/OSMem.h"
#include "linker/M32Alloc.h"
#include "linker/CacheFlush.h"
#include "linker/SymbolExtras.h"
#include "linker/MMap.h"
#include "PathUtils.h"
#include "CheckUnload.h" // createOCSectionIndices
#include "ReportMemoryMap.h"
#if !defined(mingw32_HOST_OS) && defined(HAVE_SIGNAL_H)
#include "posix/Signals.h"
#endif
// get protos for is*()
#include <ctype.h>
#if defined(HAVE_SYS_TYPES_H)
#include <sys/types.h>
#endif
#include <fcntl.h>
#include <unistd.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <fs_rts.h>
#if defined(HAVE_SYS_STAT_H)
#include <sys/stat.h>
#endif
#if defined(HAVE_DLFCN_H)
#include <dlfcn.h>
#endif
#if defined(OBJFORMAT_ELF)
# include "linker/Elf.h"
# include <regex.h> // regex is already used by dlopen() so this is OK
// to use here without requiring an additional lib
#elif defined(OBJFORMAT_PEi386)
# include "linker/PEi386.h"
# include <windows.h>
#elif defined(OBJFORMAT_MACHO)
# include "linker/MachO.h"
# include <regex.h>
# include <mach/machine.h>
# include <mach-o/fat.h>
#endif
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
# include "linker/LoadNativeObjPosix.h"
#endif
#if defined(dragonfly_HOST_OS)
#include <sys/tls.h>
#endif
#define UNUSED(x) (void)(x)
/*
* Note [iconv and FreeBSD]
* ~~~~~~~~~~~~~~~~~~~~~~~~
*
* On FreeBSD libc.so provides an implementation of the iconv_* family of
* functions. However, due to their implementation, these symbols cannot be
* resolved via dlsym(); rather, they can only be resolved using the
* explicitly-versioned dlvsym().
*
* This is problematic for the RTS linker since we may be asked to load
* an object that depends upon iconv. To handle this we include a set of
* fallback cases for these functions, allowing us to resolve them to the
* symbols provided by the libc against which the RTS is linked.
*
* See #20354.
*/
#if defined(freebsd_HOST_OS)
extern void iconvctl();
extern void iconv_open_into();
extern void iconv_open();
extern void iconv_close();
extern void iconv_canonicalize();
extern void iconv();
#endif
/*
Note [runtime-linker-support]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
When adding support for a new platform to the runtime linker please
update `$TOP/configure.ac` under heading `Does target have runtime
linker support?`.
*/
/* `symhash` is a Hash table mapping symbol names to RtsSymbolInfo.
This hashtable will contain information on all symbols
that we know of, however the .o they are in may not be loaded.
Until the ObjectCode the symbol belongs to is actually
loaded this symbol may be replaced. So do not rely on
addresses of unloaded symbols.
Note [runtime-linker-phases]
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Broadly the behavior of the runtime linker can be
split into the following five phases:
- Indexing (e.g. ocVerifyImage and ocGetNames)
- Initialization (e.g. ocResolve)
- RunInit (e.g. ocRunInit)
- Lookup (e.g. lookupSymbol/lookupSymbolInNativeObj)
This is to enable lazy loading of symbols. Eager loading is problematic
as it means that all symbols must be available, even those which we will
never use. This is especially painful on Windows, where the number of
libraries required to link things like QT or WxWidgets grows to be quite high.
We proceed through these stages as follows,
* During Indexing we verify and open the ObjectCode and
perform a quick scan/indexing of the ObjectCode. All the work
required to actually load the ObjectCode is done.
All symbols from the ObjectCode are also inserted into
`symhash`, where possible duplicates are handled via the semantics
described in `ghciInsertSymbolTable`.
This phase will produce ObjectCode with status `OBJECT_LOADED` or `OBJECT_NEEDED`
depending on whether they are an archive member or not.
* During initialization we load ObjectCode, perform relocations, execute
static constructors etc. This phase may trigger other ObjectCodes to
be loaded because of the calls to lookupSymbol.
This phase will produce ObjectCode with status `OBJECT_NEEDED` if the
previous status was `OBJECT_LOADED`.
* During resolve we attempt to resolve all the symbols needed for the
initial link. This essentially means, that for any ObjectCode given
directly to the command-line we perform lookupSymbol on the required
symbols. lookupSymbol may trigger the loading of additional ObjectCode
if required. After resolving an object we mark its text as executable and
not writable.
This phase will produce ObjectCode with status `OBJECT_RESOLVED` if
the previous status was `OBJECT_NEEDED`.
* During RunInit we run the initializers ("constructors") of the objects
that are in `OBJECT_RESOLVED` state and move them to `OBJECT_READY` state.
This must be in a separate phase since we must ensure that all needed
objects have been fully resolved before we can run their initializers.
This is particularly tricky in the presence of cyclic dependencies (see
#21253).
* lookupSymbol is used to lookup any symbols required, both during initial
link and during statement and expression compilations in the REPL.
Declaration of e.g. a foreign import, will eventually call lookupSymbol
which will either fail (symbol unknown) or succeed (and possibly trigger a
load).
This phase may transition an ObjectCode from `OBJECT_LOADED` to `OBJECT_RESOLVED`
When a new scope is introduced (e.g. a new module imported) GHCi does a full re-link
by calling unloadObj and starting over.
When a new declaration or statement is performed ultimately lookupSymbol is called
without doing a re-link.
The goal of these different phases is to allow the linker to be able to perform
"lazy loading" of ObjectCode. The reason for this is that we want to only link
in symbols that are actually required for the link. This reduces:
1) Dependency chains, if A.o required a .o in libB but A.o isn't required to link
then we don't need to load libB. This means the dependency chain for libraries
such as ucrt can be broken down.
2) The number of duplicate symbols, since now only symbols that are
true duplicates will display the error.
*/
StrHashTable *symhash;
#if defined(THREADED_RTS)
/* This protects all the Linker's global state */
Mutex linker_mutex;
#endif
/* Generic wrapper function to try and resolve oc files */
static int ocTryLoad( ObjectCode* oc );
/* Run initializers */
static int ocRunInit( ObjectCode* oc );
static int runPendingInitializers (void);
static void ghciRemoveSymbolTable(StrHashTable *table, const SymbolName* key,
ObjectCode *owner)
{
RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
if (!pinfo || owner != pinfo->owner) return;
removeStrHashTable(table, key, NULL);
if (isSymbolImport (owner, key))
stgFree(pinfo->value);
stgFree(pinfo);
}
static const char *
symbolTypeString (SymType type)
{
switch (type & ~SYM_TYPE_DUP_DISCARD) {
case SYM_TYPE_CODE: return "code";
case SYM_TYPE_DATA: return "data";
case SYM_TYPE_INDIRECT_DATA: return "indirect-data";
default: barf("symbolTypeString: unknown symbol type");
}
}
/* -----------------------------------------------------------------------------
* Insert symbols into hash tables, checking for duplicates.
*
* Returns: 0 on failure, nonzero on success
*/
/*
Note [weak-symbols-support]
~~~~~~~~~~~~~~~~~~~~~~~~~~~
While ghciInsertSymbolTable does implement extensive
logic for weak symbol support, weak symbols are not currently
fully supported by the RTS. This code is mostly here for COMDAT
support which uses the weak symbols support.
Linking weak symbols defined purely in C code with other C code
should also work, probably. Observing weak symbols in Haskell
won't.
Some test have been written for weak symbols but have been disabled
mostly because it's unsure how the weak symbols support should look.
See #11223
*/
int ghciInsertSymbolTable(
pathchar* obj_name,
StrHashTable *table,
const SymbolName* key,
SymbolAddr* data,
SymStrength strength,
SymType type,
ObjectCode *owner)
{
RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
if (!pinfo) /* new entry */
{
pinfo = stgMallocBytes(sizeof (*pinfo), "ghciInsertToSymbolTable");
pinfo->value = data;
pinfo->owner = owner;
pinfo->strength = strength;
pinfo->type = type;
insertStrHashTable(table, key, pinfo);
return 1;
}
else if (pinfo->type ^ type)
{
/* We were asked to discard the symbol on duplicates, do so quietly. */
if (!(type & SYM_TYPE_DUP_DISCARD))
{
debugBelch("Symbol type mismatch.\n");
debugBelch("Symbol %s was defined by %" PATH_FMT " to be a %s symbol.\n",
key, obj_name, symbolTypeString(type));
debugBelch(" yet was defined by %" PATH_FMT " to be a %s symbol.\n",
pinfo->owner ? pinfo->owner->fileName : WSTR("<builtin>"),
symbolTypeString(pinfo->type));
}
return 1;
}
else if (pinfo->strength == STRENGTH_STRONG)
{
/* The existing symbol is strong meaning we must never override it */
IF_DEBUG(linker, debugBelch("%s is already defined as a strong symbol; ignoring redefinition...", key));
return 1;
}
else if (strength == STRENGTH_WEAK &&
data &&
pinfo->strength == STRENGTH_WEAK &&
!pinfo->value)
{
/* The existing symbol is weak with a zero value; replace it with the new symbol. */
pinfo->value = data;
pinfo->owner = owner;
return 1;
}
else if (strength == STRENGTH_WEAK)
{
return 1; /* weak symbol, because the symbol is weak, data = 0 and we
already know of another copy throw this one away.
or both weak symbols have a nonzero value. Keep the existing one.
This also preserves the semantics of linking against
the first symbol we find. */
}
else if (pinfo->strength == STRENGTH_WEAK && strength != STRENGTH_WEAK) /* weak symbol is in the table */
{
/* override the weak definition with the non-weak one */
pinfo->value = data;
pinfo->owner = owner;
pinfo->strength = strength;
return 1;
}
else if ( pinfo->owner
&& pinfo->owner->status != OBJECT_READY
&& pinfo->owner->status != OBJECT_RESOLVED
&& pinfo->owner->status != OBJECT_NEEDED)
{
/* If the other symbol hasn't been loaded or will be loaded and we want to
explicitly load the new one, we can just swap it out and load the one
that has been requested. If not, just keep the first one encountered.
Because the `symHash' table consists symbols we've also not loaded but
found during the initial scan this is safe to do. If however the existing
symbol has been loaded then it means we have a duplicate.
This is essentially emulating the behavior of a linker wherein it will always
link in object files that are .o file arguments, but only take object files
from archives as needed. */
if (owner && (owner->status == OBJECT_NEEDED
|| owner->status == OBJECT_RESOLVED
|| owner->status == OBJECT_READY)) {
pinfo->value = data;
pinfo->owner = owner;
pinfo->strength = strength;
}
return 1;
}
else if (pinfo->owner == owner)
{
/* If it's the same symbol, ignore. This makes ghciInsertSymbolTable idempotent */
return 1;
}
else if (owner && owner->status == OBJECT_LOADED)
{
/* If the duplicate symbol is just in state OBJECT_LOADED it means we're in discovery of an
member. It's not a real duplicate yet. If the Oc Becomes OBJECT_NEEDED then ocTryLoad will
call this function again to trigger the duplicate error. */
return 1;
}
debugBelch(
"GHC runtime linker: fatal error: I found a duplicate definition for symbol\n"
" %s\n"
"whilst processing object file\n"
" %" PATH_FMT "\n"
"The symbol was previously defined in\n"
" %" PATH_FMT "\n"
"This could be caused by:\n"
" * Loading two different object files which export the same symbol\n"
" * Specifying the same object file twice on the GHCi command line\n"
" * An incorrect `package.conf' entry, causing some object to be\n"
" loaded twice.\n",
(char*)key,
obj_name,
pinfo->owner == NULL ? WSTR("(GHCi built-in symbols)") :
pinfo->owner->archiveMemberName ? pinfo->owner->archiveMemberName
: pinfo->owner->fileName
);
return 0;
}
/* -----------------------------------------------------------------------------
* Looks up symbols into hash tables.
*
* Returns: 0 on failure and result is not set,
* nonzero on success and result set to nonzero pointer
*/
HsBool ghciLookupSymbolInfo(StrHashTable *table,
const SymbolName* key, RtsSymbolInfo **result)
{
RtsSymbolInfo *pinfo = lookupStrHashTable(table, key);
if (!pinfo) {
*result = NULL;
return HS_BOOL_FALSE;
}
if (pinfo->strength == STRENGTH_WEAK) {
IF_DEBUG(linker, debugBelch("lookupSymbolInfo: promoting %s\n", key));
/* Once it's looked up, it can no longer be overridden */
pinfo->strength = STRENGTH_NORMAL;
}
*result = pinfo;
return HS_BOOL_TRUE;
}
/* -----------------------------------------------------------------------------
* initialize the object linker
*/
static int linker_init_done = 0 ;
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
static void *dl_prog_handle;
regex_t re_invalid;
regex_t re_realso;
#endif
void initLinker (void)
{
// default to retaining CAFs for backwards compatibility. Most
// users will want initLinker_(0): otherwise unloadObj() will not
// be able to unload object files when they contain CAFs.
initLinker_(1);
}
void
initLinker_ (int retain_cafs)
{
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
int compileResult;
#endif
IF_DEBUG(linker, debugBelch("initLinker: start\n"));
/* Make initLinker idempotent, so we can call it
before every relevant operation; that means we
don't need to initialise the linker separately */
if (linker_init_done == 1) {
IF_DEBUG(linker, debugBelch("initLinker: idempotent return\n"));
return;
} else {
linker_init_done = 1;
}
initUnloadCheck();
#if defined(THREADED_RTS)
initMutex(&linker_mutex);
#endif
symhash = allocStrHashTable();
/* populate the symbol table with stuff from the RTS */
for (const RtsSymbolVal *sym = rtsSyms; sym->lbl != NULL; sym++) {
if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"),
symhash, sym->lbl, sym->addr,
sym->strength, sym->type, NULL)) {
barf("ghciInsertSymbolTable failed");
}
IF_DEBUG(linker, debugBelch("initLinker: inserting rts symbol %s, %p\n", sym->lbl, sym->addr));
}
// Redirect newCAF to newRetainedCAF if retain_cafs is true.
if (! ghciInsertSymbolTable(WSTR("(GHCi built-in symbols)"), symhash,
MAYBE_LEADING_UNDERSCORE_STR("newCAF"),
retain_cafs ? newRetainedCAF : newGCdCAF,
HS_BOOL_FALSE, SYM_TYPE_CODE, NULL)) {
barf("ghciInsertSymbolTable failed");
}
# if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
# if defined(RTLD_DEFAULT)
dl_prog_handle = RTLD_DEFAULT;
# else
dl_prog_handle = dlopen(NULL, RTLD_LAZY);
# endif /* RTLD_DEFAULT */
compileResult = regcomp(&re_invalid,
"(([^ \t()])+\\.so([^ \t:()])*):([ \t])*(invalid ELF header|file too short|invalid file format|Exec format error)",
REG_EXTENDED);
if (compileResult != 0) {
barf("Compiling re_invalid failed");
}
compileResult = regcomp(&re_realso,
"(GROUP|INPUT) *\\( *([^ )]+)",
REG_EXTENDED);
if (compileResult != 0) {
barf("Compiling re_realso failed");
}
# endif
if (RtsFlags.MiscFlags.linkerMemBase != 0) {
// User-override for mmap_32bit_base
mmap_32bit_base = (void*)RtsFlags.MiscFlags.linkerMemBase;
}
#if defined(OBJFORMAT_PEi386)
initLinker_PEi386();
#endif
IF_DEBUG(linker, debugBelch("initLinker: done\n"));
return;
}
void
exitLinker( void ) {
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
if (linker_init_done == 1) {
regfree(&re_invalid);
regfree(&re_realso);
}
#endif
if (linker_init_done == 1) {
freeStrHashTable(symhash, free);
exitUnloadCheck();
}
#if defined(THREADED_RTS)
closeMutex(&linker_mutex);
#endif
}
/* -----------------------------------------------------------------------------
* Loading DLL or .so dynamic libraries
* -----------------------------------------------------------------------------
*
* Add a DLL from which symbols may be found. In the ELF case, just
* do RTLD_GLOBAL-style add, so no further messing around needs to
* happen in order that symbols in the loaded .so are findable --
* lookupSymbol() will subsequently see them by dlsym on the program's
* dl-handle. Returns NULL if success, otherwise ptr to an err msg.
*
* In the PEi386 case, open the DLLs and put handles to them in a
* linked list. When looking for a symbol, try all handles in the
* list. This means that we need to load even DLLs that are guaranteed
* to be in the ghc.exe image already, just so we can get a handle
* to give to loadSymbol, so that we can find the symbols. For such
* libraries, the LoadLibrary call should be a no-op except for returning
* the handle.
*
*/
# if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
/*
Note [RTLD_LOCAL]
~~~~~~~~~~~~~~~~~
In GHCi we want to be able to override previous .so's with newly
loaded .so's when we recompile something. This further implies that
when we look up a symbol in internal_dlsym() we have to iterate
through the loaded libraries (in order from most recently loaded to
oldest) looking up the symbol in each one until we find it.
However, this can cause problems for some symbols that are copied
by the linker into the executable image at runtime - see #8935 for a
lengthy discussion. To solve that problem we need to look up
symbols in the main executable *first*, before attempting to look
them up in the loaded .so's. But in order to make that work, we
have to always call dlopen with RTLD_LOCAL, so that the loaded
libraries don't populate the global symbol table.
*/
static void *
internal_dlsym(const char *symbol) {
void *v;
// concurrent dl* calls may alter dlerror
ASSERT_LOCK_HELD(&linker_mutex);
// clears dlerror
dlerror();
// look in program first
v = dlsym(dl_prog_handle, symbol);
if (dlerror() == NULL) {
IF_DEBUG(linker, debugBelch("internal_dlsym: found symbol '%s' in program\n", symbol));
return v;
}
for (ObjectCode *nc = loaded_objects; nc; nc = nc->next_loaded_object) {
if (nc->type == DYNAMIC_OBJECT) {
v = dlsym(nc->dlopen_handle, symbol);
if (dlerror() == NULL) {
IF_DEBUG(linker, debugBelch("internal_dlsym: found symbol '%s' in shared object\n", symbol));
return v;
}
}
}
IF_DEBUG(linker, debugBelch("internal_dlsym: looking for symbol '%s' in special cases\n", symbol));
# define SPECIAL_SYMBOL(sym) \
if (strcmp(symbol, #sym) == 0) return (void*)&sym;
# if defined(HAVE_SYS_STAT_H) && defined(linux_HOST_OS) && defined(__GLIBC__)
// HACK: GLIBC implements these functions with a great deal of trickery where
// they are either inlined at compile time to their corresponding
// __xxxx(SYS_VER, ...) function or direct syscalls, or resolved at
// link time via libc_nonshared.a.
//
// We borrow the approach that the LLVM JIT uses to resolve these
// symbols. See http://llvm.org/PR274 and #7072 for more info.
SPECIAL_SYMBOL(stat);
SPECIAL_SYMBOL(fstat);
SPECIAL_SYMBOL(lstat);
SPECIAL_SYMBOL(stat64);
SPECIAL_SYMBOL(fstat64);
SPECIAL_SYMBOL(lstat64);
SPECIAL_SYMBOL(atexit);
SPECIAL_SYMBOL(mknod);
# endif
// See Note [iconv and FreeBSD]
# if defined(freebsd_HOST_OS)
SPECIAL_SYMBOL(iconvctl);
SPECIAL_SYMBOL(iconv_open_into);
SPECIAL_SYMBOL(iconv_open);
SPECIAL_SYMBOL(iconv_close);
SPECIAL_SYMBOL(iconv_canonicalize);
SPECIAL_SYMBOL(iconv);
# endif
#undef SPECIAL_SYMBOL
// we failed to find the symbol
return NULL;
}
# endif
void *lookupSymbolInNativeObj(void *handle, const char *symbol_name)
{
ACQUIRE_LOCK(&linker_mutex);
#if defined(OBJFORMAT_MACHO)
// The Mach-O standard says ccall symbols representing a function are prefixed with _
// https://math-atlas.sourceforge.net/devel/assembly/MachORuntime.pdf
CHECK(symbol_name[0] == '_');
symbol_name = symbol_name+1;
#endif
#if defined(OBJFORMAT_ELF) || defined(OBJFORMAT_MACHO)
void *result = dlsym(handle, symbol_name);
#elif defined(OBJFORMAT_PEi386)
void *result = lookupSymbolInDLL_PEi386(symbol_name, handle, NULL, NULL);
#else
void* result;
UNUSED(handle);
UNUSED(symbol_name);
barf("lookupSymbolInNativeObj: Unsupported platform");
#endif
RELEASE_LOCK(&linker_mutex);
return result;
}
const char *addDLL(pathchar* dll_name)
{
char *errmsg;
if (loadNativeObj(dll_name, &errmsg)) {
return NULL;
} else {
ASSERT(errmsg != NULL);
return errmsg;
}
}
/* -----------------------------------------------------------------------------
* Searches the system directories to determine if there is a system DLL that
* satisfies the given name. This prevent GHCi from linking against a static
* library if a DLL is available.
*
* Returns: NULL on failure or no DLL found, else the full path to the DLL
* that can be loaded.
*/
pathchar* findSystemLibrary(pathchar* dll_name)
{
IF_DEBUG(linker, debugBelch("\nfindSystemLibrary: dll_name = `%"
PATH_FMT "'\n", dll_name));
#if defined(OBJFORMAT_PEi386)
return findSystemLibrary_PEi386(dll_name);
#else
(void)(dll_name); // Function not implemented for other platforms.
return NULL;
#endif
}
/* -----------------------------------------------------------------------------
* Emits a warning determining that the system is missing a required security
* update that we need to get access to the proper APIs
*/
void warnMissingKBLibraryPaths( void )
{
static HsBool missing_update_warn = HS_BOOL_FALSE;
if (!missing_update_warn) {
debugBelch("Warning: If linking fails, consider installing KB2533623.\n");
missing_update_warn = HS_BOOL_TRUE;
}
}
/* -----------------------------------------------------------------------------
* appends a directory to the process DLL Load path so LoadLibrary can find it
*
* Returns: NULL on failure, or pointer to be passed to removeLibrarySearchPath to
* restore the search path to what it was before this call.
*/
HsPtr addLibrarySearchPath(pathchar* dll_path)
{
IF_DEBUG(linker, debugBelch("\naddLibrarySearchPath: dll_path = `%"
PATH_FMT "'\n", dll_path));
#if defined(OBJFORMAT_PEi386)
return addLibrarySearchPath_PEi386(dll_path);
#else
(void)(dll_path); // Function not implemented for other platforms.
return NULL;
#endif
}
/* -----------------------------------------------------------------------------
* removes a directory from the process DLL Load path
*
* Returns: HS_BOOL_TRUE on success, otherwise HS_BOOL_FALSE
*/
HsBool removeLibrarySearchPath(HsPtr dll_path_index)
{
IF_DEBUG(linker, debugBelch("\nremoveLibrarySearchPath: ptr = `%p'\n",
dll_path_index));
#if defined(OBJFORMAT_PEi386)
return removeLibrarySearchPath_PEi386(dll_path_index);
#else
(void)(dll_path_index); // Function not implemented for other platforms.
return HS_BOOL_FALSE;
#endif
}
/* -----------------------------------------------------------------------------
* insert a code symbol in the hash table
*
* Returns: 0 on failure, nonzero on success
*/
HsInt insertSymbol(pathchar* obj_name, SymbolName* key, SymbolAddr* data)
{
return ghciInsertSymbolTable(obj_name, symhash, key, data, HS_BOOL_FALSE,
SYM_TYPE_CODE, NULL);
}
/* -----------------------------------------------------------------------------
* Lookup a symbol in the hash table
*
* When 'dependent' is not NULL, adds it as a dependent to the owner of the
* symbol.
*/
#if defined(OBJFORMAT_PEi386)
SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type)
{
ASSERT_LOCK_HELD(&linker_mutex);
return lookupSymbol_PEi386(lbl, dependent, type);
}
#else
SymbolAddr* lookupDependentSymbol (SymbolName* lbl, ObjectCode *dependent, SymType *type)
{
ASSERT_LOCK_HELD(&linker_mutex);
IF_DEBUG(linker_verbose, debugBelch("lookupSymbol: looking up '%s'\n", lbl));
ASSERT(symhash != NULL);
RtsSymbolInfo *pinfo;
/* See Note [Resolving __dso_handle] */
if (strcmp(lbl, MAYBE_LEADING_UNDERSCORE_STR("__dso_handle")) == 0) {
if (dependent) {
return dependent->image;
} else {
// In the case that we don't know which object the reference lives
// in we return a random symbol from the executable image.
return &lookupDependentSymbol;
}
}
if (strcmp(lbl, MAYBE_LEADING_UNDERSCORE_STR("__cxa_atexit")) == 0 && dependent) {
dependent->cxa_finalize = (cxa_finalize_fn) lookupDependentSymbol(
MAYBE_LEADING_UNDERSCORE_STR("__cxa_finalize"),
dependent,
NULL);
}
if (!ghciLookupSymbolInfo(symhash, lbl, &pinfo)) {
IF_DEBUG(linker_verbose, debugBelch("lookupSymbol: symbol '%s' not found, trying dlsym\n", lbl));
# if defined(OBJFORMAT_ELF)
SymbolAddr *ret = internal_dlsym(lbl);
if (type) {
// We assume that the symbol is code since this is usually the case
// and dlsym doesn't tell us.
*type = SYM_TYPE_CODE;
}
// Generally the dynamic linker would define _DYNAMIC, which is
// supposed to point to various bits of dynamic linker state (see
// [1]). However, if dynamic linking isn't supported (e.g. in the case
// of musl) then we can safely declare that it is NULL.
//
// [1] https://wiki.gentoo.org/wiki/Hardened/Introduction_to_Position_Independent_Code
if (ret == NULL && strcmp(lbl, "_DYNAMIC") == 0) {
static void *RTS_DYNAMIC = NULL;
ret = (SymbolAddr *) &RTS_DYNAMIC;
if (type) {
*type = SYM_TYPE_DATA;
}
}
return ret;
# elif defined(OBJFORMAT_MACHO)
/* HACK: On OS X, all symbols are prefixed with an underscore.
However, dlsym wants us to omit the leading underscore from the
symbol name -- the dlsym routine puts it back on before
searching for the symbol. For now, we simply strip it off here
(and ONLY here).
*/
IF_DEBUG(linker, debugBelch("lookupSymbol: looking up %s with dlsym\n",
lbl));
CHECK(lbl[0] == '_');
if (type) {
// We assume that the symbol is code since this is usually the case
// and dlsym doesn't tell us.
*type = SYM_TYPE_CODE;
}
return internal_dlsym(lbl + 1);
# elif defined(OBJFORMAT_WASM32)
return NULL;
# else
# error No OBJFORMAT_* macro set
# endif
} else {
static void *RTS_NO_FINI = NULL;
if (strcmp(lbl, "__fini_array_end") == 0) { return (SymbolAddr *) &RTS_NO_FINI; }
if (strcmp(lbl, "__fini_array_start") == 0) { return (SymbolAddr *) &RTS_NO_FINI; }
if (type) {
// This is an assumption
*type = pinfo->type;
}
if (dependent) {
// Add dependent as symbol's owner's dependency
ObjectCode *owner = pinfo->owner;
if (owner) {
// TODO: what does it mean for a symbol to not have an owner?
insertHashSet(dependent->dependencies, (W_)owner);
}
}
return loadSymbol(lbl, pinfo);
}
}
#endif /* OBJFORMAT_PEi386 */
/* Note [Resolving __dso_handle]
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
* This symbol, which is defined by the C++ ABI, would typically be defined by
* the system's dynamic linker to act as a "handle", identifying a particular
* loaded dynamic object to the C++ standard library for the purpose of running
* destructors on unload. Here we behave the same way that the dynamic linker
* would, using some address (here the start address) of the loaded object as
* its handle.
*
* Note that references to __dso_handle may be relocated using
* relocations of bounded displacement and therefore __dso_handle must not be
* too far from the loaded object's code (hence using its start address).
*
* Finally, when we see a reference to __cxa_atexit in an object we take care
* to lookup and record the address of __cxa_finalize (largely to ensure that
* the symbol dependency is recorded) and call it with the appropriate handle
* when the object is unloaded.
*
* See #20493.
* See section 3.3.5 of the Itanium C++ ABI, version 1.83.
*/
/*
* Load and relocate the object code for a symbol as necessary.
* Symbol name only used for diagnostics output.
*/
SymbolAddr* loadSymbol(SymbolName *lbl, RtsSymbolInfo *pinfo) {
IF_DEBUG(linker_verbose,
debugBelch("lookupSymbol: value of %s is %p, owned by %" PATH_FMT "\n", lbl,
pinfo->value,
pinfo->owner ? OC_INFORMATIVE_FILENAME(pinfo->owner) : WSTR("No owner, probably built-in.")));
ObjectCode* oc = pinfo->owner;
/* Symbol can be found during linking, but hasn't been relocated. Do so now.
See Note [runtime-linker-phases] */
if (oc && lbl && oc->status == OBJECT_LOADED) {
oc->status = OBJECT_NEEDED;
IF_DEBUG(linker, debugBelch("lookupSymbol: on-demand "
"loading symbol '%s'\n", lbl));
int r = ocTryLoad(oc);
if (!r) {
return NULL;
}
}
return pinfo->value;
}
void
printLoadedObjects(void) {
ObjectCode* oc;
for (oc = objects; oc; oc = oc->next) {
if (oc->sections != NULL) {
int i;
printf("%" PATH_FMT "\n", OC_INFORMATIVE_FILENAME(oc));
for (i=0; i < oc->n_sections; i++) {
if(oc->sections[i].mapped_start != NULL || oc->sections[i].start != NULL) {
printf("\tsec %2d[alloc: %d; kind: %d]: %p - %p; mmaped: %p - %p\n",
i, oc->sections[i].alloc, oc->sections[i].kind,
oc->sections[i].start,
(void*)((uintptr_t)(oc->sections[i].start) + oc->sections[i].size),
oc->sections[i].mapped_start,
(void*)((uintptr_t)(oc->sections[i].mapped_start) + oc->sections[i].mapped_size));
}
}
}
}
}
SymbolAddr* lookupSymbol( SymbolName* lbl )
{
ACQUIRE_LOCK(&linker_mutex);
// NULL for "don't add dependent". When adding a dependency we call
// lookupDependentSymbol directly.
SymbolAddr* r = lookupDependentSymbol(lbl, NULL, NULL);
if (!r) {
errorBelch("^^ Could not load '%s', dependency unresolved. "
"See top entry above.\n", lbl);
IF_DEBUG(linker, printLoadedObjects());
fflush(stderr);
}
if (!runPendingInitializers()) {
errorBelch("lookupSymbol: Failed to run initializers.");
}
RELEASE_LOCK(&linker_mutex);
return r;
}
/* -----------------------------------------------------------------------------
* Debugging aid: look in GHCi's object symbol tables for symbols
* within DELTA bytes of the specified address, and show their names.
*/
#if defined(DEBUG)
void ghci_enquire ( SymbolAddr* addr );
void ghci_enquire(SymbolAddr* addr)
{
int i;
SymbolName* sym;
RtsSymbolInfo* a;
const int DELTA = 64;
ObjectCode* oc;
for (oc = objects; oc; oc = oc->next) {
for (i = 0; i < oc->n_symbols; i++) {
sym = oc->symbols[i].name;
if (sym == NULL) continue;
a = NULL;
if (a == NULL) {
ghciLookupSymbolInfo(symhash, sym, &a);
}
if (a == NULL) {
// debugBelch("ghci_enquire: can't find %s\n", sym);
}
else if ( a->value
&& (char*)addr-DELTA <= (char*)a->value
&& (char*)a->value <= (char*)addr+DELTA) {
debugBelch("%p + %3d == `%s'\n", addr,
(int)((char*)a->value - (char*)addr), sym);
}
}
}
}
#endif
pathchar*
resolveSymbolAddr (pathchar* buffer, int size,
SymbolAddr* symbol, uintptr_t* top)
{
#if defined(OBJFORMAT_PEi386)
return resolveSymbolAddr_PEi386 (buffer, size, symbol, top);